upstash-vector

Name	upstash-vector JSON
Version	0.8.0 JSON
	download
home_page	https://github.com/upstash/vector-py
Summary	Serverless Vector SDK from Upstash
upload_time	2025-02-27 11:52:38
maintainer	Upstash
docs_url	None
author	Upstash
requires_python	<4.0,>=3.8
license	MIT
keywords	upstash vector serverless vector
VCS
bugtrack_url
requirements	No requirements were recorded.
Travis-CI	No Travis.
coveralls test coverage	No coveralls.

            # Upstash Vector Python SDK

The Upstash Vector Python client

> [!NOTE]  
> **This project is in GA Stage.**
>
> The Upstash Professional Support fully covers this project. It receives regular updates, and bug fixes.
> The Upstash team is committed to maintaining and improving its functionality.

## Installation

Install a released version from pip:

```shell
pip3 install upstash-vector
```

## Usage

In order to use this client, head out to [Upstash Console](https://console.upstash.com) and create a vector database.
There, get the `UPSTASH_VECTOR_REST_URL` and the `UPSTASH_VECTOR_REST_TOKEN` from the dashboard.

### Initializing the Index

```python
from upstash_vector import Index

index = Index(url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN)
```

or alternatively, initialize from the environment variables

```bash
export UPSTASH_VECTOR_REST_URL [URL]
export UPSTASH_VECTOR_REST_TOKEN [TOKEN]
```

```python
from upstash_vector import Index

index = Index.from_env()
```

### Upsert Vectors

Vectors can be upserted(inserted or updated) into a namespace of an index
to be later queried or fetched.

There are a couple of ways of doing upserts:

```python
# - dense indexes
#   - (id, vector, metadata, data)
#   - (id, vector, metadata)
#   - (id, vector)
index.upsert(
    vectors=[
        ("id1", [0.1, 0.2], {"metadata_field": "metadata_value"}, "data-value"),
        ("id2", [0.2, 0.2], {"metadata_field": "metadata_value"}),
        ("id3", [0.3, 0.4]),
    ]
)

# - sparse indexes
#   - (id, sparse_vector, metadata, data)
#   - (id, sparse_vector, metadata)
#   - (id, sparse_vector)
index.upsert(
    vectors=[
        ("id1", ([0, 1], [0.1, 0.2]), {"metadata_field": "metadata_value"}, "data-value"),
        ("id2", ([1, 2], [0.2, 0.2]), {"metadata_field": "metadata_value"}),
        ("id3", ([2, 3, 4], [0.3, 0.4, 0.5])),
    ]
)

# - hybrid indexes
#   - (id, vector, sparse_vector, metadata, data)
#   - (id, vector, sparse_vector, metadata)
#   - (id, vector, sparse_vector)
index.upsert(
    vectors=[
        ("id1", [0.1, 0.2], ([0, 1], [0.1, 0.2]), {"metadata_field": "metadata_value"}, "data-value"),
        ("id2", [0.2, 0.2], ([1, 2], [0.2, 0.2]), {"metadata_field": "metadata_value"}),
        ("id3", [0.3, 0.4], ([2, 3, 4], [0.3, 0.4, 0.5])),
    ]
)
```

```python
# - dense indexes
#   - {"id": id, "vector": vector, "metadata": metadata, "data": data)
#   - {"id": id, "vector": vector, "metadata": metadata)
#   - {"id": id, "vector": vector, "data": data)
#   - {"id": id, "vector": vector} 
index.upsert(
    vectors=[
        {"id": "id4", "vector": [0.1, 0.2], "metadata": {"field": "value"}, "data": "value"},
        {"id": "id5", "vector": [0.1, 0.2], "metadata": {"field": "value"}},
        {"id": "id6", "vector": [0.1, 0.2], "data": "value"},
        {"id": "id7", "vector": [0.5, 0.6]},
    ]
)

# - sparse indexes
#   - {"id": id, "sparse_vector": sparse_vector, "metadata": metadata, "data": data)
#   - {"id": id, "sparse_vector": sparse_vector, "metadata": metadata)
#   - {"id": id, "sparse_vector": sparse_vector, "data": data)
#   - {"id": id, "sparse_vector": sparse_vector} 
index.upsert(
    vectors=[
        {"id": "id4", "sparse_vector": ([0, 1], [0.1, 0.2]), "metadata": {"field": "value"}, "data": "value"},
        {"id": "id5", "sparse_vector": ([1, 2], [0.2, 0.2]), "metadata": {"field": "value"}},
        {"id": "id6", "sparse_vector": ([2, 3, 4], [0.3, 0.4, 0.5]), "data": "value"},
        {"id": "id7", "sparse_vector": ([4], [0.3])},
    ]
)

# - hybrid indexes
#   - {"id": id, "vector": vector, "sparse_vector": sparse_vector, "metadata": metadata, "data": data)
#   - {"id": id, "vector": vector, "sparse_vector": sparse_vector, "metadata": metadata)
#   - {"id": id, "vector": vector, "sparse_vector": sparse_vector, "data": data)
#   - {"id": id, "vector": vector, "sparse_vector": sparse_vector} 
index.upsert(
    vectors=[
        {"id": "id4", "vector": [0.1, 0.2], "sparse_vector": ([0], [0.1]), "metadata": {"field": "value"},
         "data": "value"},
        {"id": "id5", "vector": [0.1, 0.2], "sparse_vector": ([1, 2], [0.2, 0.2]), "metadata": {"field": "value"}},
        {"id": "id6", "vector": [0.1, 0.2], "sparse_vector": ([2, 3, 4], [0.3, 0.4, 0.5]), "data": "value"},
        {"id": "id7", "vector": [0.5, 0.6], "sparse_vector": ([4], [0.3])},
    ]
)
```

```python
from upstash_vector import Vector
from upstash_vector.types import SparseVector

# dense indexes
index.upsert(
    vectors=[
        Vector(id="id5", vector=[1, 2], metadata={"field": "value"}, data="value"),
        Vector(id="id6", vector=[1, 2], metadata={"field": "value"}),
        Vector(id="id7", vector=[1, 2], data="value"),
        Vector(id="id8", vector=[6, 7]),
    ]
)

# sparse indexes
index.upsert(
    vectors=[
        Vector(id="id5", sparse_vector=SparseVector([1], [0.1]), metadata={"field": "value"}, data="value"),
        Vector(id="id6", sparse_vector=SparseVector([1, 2], [0.1, 0.2]), metadata={"field": "value"}),
        Vector(id="id7", sparse_vector=SparseVector([3, 5], [0.3, 0.3]), data="value"),
        Vector(id="id8", sparse_vector=SparseVector([4], [0.2])),
    ]
)

# hybrid indexes
index.upsert(
    vectors=[
        Vector(id="id5", vector=[1, 2], sparse_vector=SparseVector([1], [0.1]), metadata={"field": "value"},
               data="value"),
        Vector(id="id6", vector=[1, 2], sparse_vector=SparseVector([1, 2], [0.1, 0.2]), metadata={"field": "value"}),
        Vector(id="id7", vector=[1, 2], sparse_vector=SparseVector([3, 5], [0.3, 0.3]), data="value"),
        Vector(id="id8", vector=[6, 7], sparse_vector=SparseVector([4], [0.2])),
    ]
)
```

If the index is created with an embedding model, raw string data can be upserted.
In this case, the `data` field of the vector will also be set to the `data` passed
below, so that it can be accessed later.

```python
from upstash_vector import Data

res = index.upsert(
    vectors=[
        Data(id="id5", data="Goodbye World", metadata={"field": "value"}),
        Data(id="id6", data="Hello World"),
    ]
)
```

Also, a namespace can be specified to upsert vectors into it.
When no namespace is provided, the default namespace is used.

```python
index.upsert(
    vectors=[
        ("id1", [0.1, 0.2]),
        ("id2", [0.3, 0.4]),
    ],
    namespace="ns",
)
```

### Query Vectors

Some number of vectors that are approximately most similar to a given
query vector can be requested from a namespace of an index.

```python
res = index.query(
    vector=[0.6, 0.9],  # for dense and hybrid indexes
    sparse_vector=([0, 1], [0.1, 0.1]),  # for sparse and hybrid indexes 
    top_k=5,
    include_vectors=False,
    include_metadata=True,
    include_data=True,
    filter="metadata_f = 'metadata_v'"
)

# List of query results, sorted in the descending order of similarity
for r in res:
    print(
        r.id,  # The id used while upserting the vector
        r.score,  # The similarity score of this vector to the query vector. Higher is more similar.
        r.vector,  # The value of the vector, if requested (for dense and hybrid indexes).
        r.sparse,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).
        r.metadata,  # The metadata of the vector, if requested and present.
        r.data,  # The data of the vector, if requested and present.
    )
```

If the index is created with an embedding model, raw string data can be queried.

```python
res = index.query(
    data="hello",
    top_k=5,
    include_vectors=False,
    include_metadata=True,
    include_data=True,
)
```

When a filter is provided, query results are further narrowed down based
on the vectors whose metadata matches with it.

See [Metadata Filtering](https://upstash.com/docs/vector/features/filtering) documentation
for more information regarding the filter syntax.

Also, a namespace can be specified to query from.
When no namespace is provided, the default namespace is used.

```python
res = index.query(
    vector=[0.6, 0.9],
    top_k=5,
    namespace="ns",
)
```

### Fetch Vectors

A set of vectors can be fetched from a namespace of an index.

```python
res = index.fetch(
    ids=["id3", "id4"],
    include_vectors=False,
    include_metadata=True,
    include_data=True,
)

# List of fetch results, one for each id passed
for r in res:
    if not r:  # Can be None, if there is no such vector with the given id
        continue

    print(
        r.id,  # The id used while upserting the vector
        r.vector,  # The value of the vector, if requested (for dense and hybrid indexes).
        r.sparse_vector,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).
        r.metadata,  # The metadata of the vector, if requested and present.
        r.data,  # The metadata of the vector, if requested and present.
    )
```

or, for singular fetch:

```python
res = index.fetch(
    "id1",
    include_vectors=True,
    include_metadata=True,
    include_data=False,
)

r = res[0]
if r:  # Can be None, if there is no such vector with the given id
    print(
        r.id,  # The id used while upserting the vector
        r.vector,  # The value of the vector, if requested (for dense and hybrid indexes).
        r.sparse_vector,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).        
        r.metadata,  # The metadata of the vector, if requested and present.
        r.data,  # The metadata of the vector, if requested and present.
    )
```

Apart from the vector ids, vectors can also be fetched with an id prefix.

```python
# Fetch all the vectors whose id starts with `id-1`
res = index.fetch(
    prefix="id-1",
    include_vectors=False,
    include_metadata=True,
    include_data=True,
)
```

Also, a namespace can be specified to fetch from.
When no namespace is provided, the default namespace is used.

```python
res = index.fetch(
    ids=["id3", "id4"],
    namespace="ns",
)
```

### Range Over Vectors

The vectors upserted into a namespace of an index can be scanned
in a page by page fashion.

```python
# Scans the vectors 100 vector at a time,
res = index.range(
    cursor="",  # Start the scan from the beginning 
    limit=100,
    include_vectors=False,
    include_metadata=True,
    include_data=True,
)

while res.next_cursor != "":
    res = index.range(
        cursor=res.next_cursor,
        limit=100,
        include_vectors=False,
        include_metadata=True,
        include_data=True,
    )

    for v in res.vectors:
        print(
            v.id,  # The id used while upserting the vector
            v.vector,  # The value of the vector, if requested (for dense and hybrid indexes).
            v.sparse_vector,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).
            v.metadata,  # The metadata of the vector, if requested and present.
            v.data,  # The data of the vector, if requested and present.
        )
```

Apart from that, vectors can also be ranged with an id prefix.

```python
# Range over all the vectors whose id starts with `id-1`
res = index.range(
    cursor="",
    prefix="id-1",
    limit=100,
    include_vectors=False,
    include_metadata=True,
    include_data=True,
)

while res.next_cursor != "":
    res = index.range(
        cursor=res.next_cursor,
        prefix="id-1",
        limit=100,
        include_vectors=False,
        include_metadata=True,
        include_data=True,
    )

    for v in res.vectors:
        print(v)
```

Also, a namespace can be specified to range from.
When no namespace is provided, the default namespace is used.

```python
res = index.range(
    cursor="",
    limit=100,
    namespace="ns",
)
```

### Delete Vectors

A list of vectors can be deleted from a namespace of index.
If no such vectors with the given ids exist, this is no-op.

```python
res = index.delete(
    ids=["id1", "id2"],
)

# How many vectors are deleted out of the given ids.
print(res.deleted)
```

or, for singular deletion:

```python
res = index.delete(
    "id1",
)

# 1 if the vector is deleted, 0 otherwise.
print(res.deleted)
```

Apart from the vector ids, vectors can also be deleted with an id prefix
or metadata filter.

```python
# Delete all the vectors whose id starts with `id-0`
index.delete(
    prefix="id-0",
)

# Delete all the vectors whose metadata matches with the filter
index.delete(
    filter="salary < 3000",
)
```

Also, a namespace can be specified to delete from.
When no namespace is provided, the default namespace is used.

```python
res = index.delete(
    ids=["id1", "id2"],
    namespace="ns",
)
```

### Update a Vector

Any combination of vector value, sparse vector value, data, or metadata can be updated.

```python
res = index.update(
    "id1",
    metadata={"new_field": "new_value"},
)

print(res)  # A boolean indicating whether the vector is updated or not.
```

Also, a namespace can be specified to update from.
When no namespace is provided, the default namespace is used.

```python
res = index.update(
    "id1",
    metadata={"new_field": "new_value"},
    namespace="ns",
)
```

### Reset the Namespace

All vectors can be removed from a namespace of an index.

```python
index.reset() 
```

Also, a namespace can be specified to reset.
When no namespace is provided, the default namespace is used.

```python
index.reset(
    namespace="ns",
) 
```

All namespaces under the index can be reset with a single call
as well.

```python
index.reset(
    all=True,
)
```

### Index Info

Some information regarding the status and type of the index can be requested.
This information also contains per-namespace status.

```python
info = index.info()
print(
    info.vector_count,  # Total number of vectors across all namespaces
    info.pending_vector_count,  # Total number of vectors waiting to be indexed across all namespaces
    info.index_size,  # Total size of the index on disk in bytes
    info.dimension,  # Vector dimension
    info.similarity_function,  # Similarity function used
)

for ns, ns_info in info.namespaces.items():
    print(
        ns,  # Name of the namespace
        ns_info.vector_count,  # Total number of vectors in this namespaces
        ns_info.pending_vector_count,  # Total number of vectors waiting to be indexed in this namespaces
    )
```

### List Namespaces

All the names of active namespaces can be listed.

```python
namespaces = index.list_namespaces()
for ns in namespaces:
    print(ns)  # name of the namespace
```

### Delete a Namespace

A namespace can be deleted entirely.
If no such namespace exists, and exception is raised.
The default namespaces cannot be deleted.

```python
index.delete_namespace(namespace="ns")
```

# Contributing

## Preparing the environment

This project uses [Poetry](https://python-poetry.org) for packaging and dependency management. Make sure you are able to
create the poetry shell with relevant dependencies.

You will also need a vector database on [Upstash](https://console.upstash.com/).

```commandline
poetry install 
```

## Code Formatting

```bash 
poetry run ruff format .
```

## Running tests

To run all the tests, make sure the poetry virtual environment activated with all
the necessary dependencies.

Create four Vector Stores on Upstash. First one should have 2 dimensions. Second one should use an embedding model. Set
the necessary environment variables:

- A dense index with 2 dimensions, with cosine similarity
- A dense index with an embedding model
- A sparse index
- A hybrid index with 2 dimensions, with cosine similarity for the dense component.
- A hybrid index with embedding models

```
URL=****
TOKEN=****
EMBEDDING_URL=****
EMBEDDING_TOKEN=****
SPARSE_URL=****
SPARSE_TOKEN=****
HYBRID_URL=****
HYBRID_TOKEN=****
HYBRID_EMBEDDING_URL=****
HYBRID_EMBEDDING_TOKEN=****
```

Then, run the following command to run tests:

```bash
poetry run pytest
```

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/upstash/vector-py",
    "name": "upstash-vector",
    "maintainer": "Upstash",
    "docs_url": null,
    "requires_python": "<4.0,>=3.8",
    "maintainer_email": "support@upstash.com",
    "keywords": "Upstash Vector, Serverless Vector",
    "author": "Upstash",
    "author_email": "support@upstash.com",
    "download_url": "https://files.pythonhosted.org/packages/65/22/1b9161b82ef52addc2b71ffca9498cb745b34b2e43e77ef1c921d96fb3f1/upstash_vector-0.8.0.tar.gz",
    "platform": null,
    "description": "# Upstash Vector Python SDK\n\nThe Upstash Vector Python client\n\n> [!NOTE]  \n> **This project is in GA Stage.**\n>\n> The Upstash Professional Support fully covers this project. It receives regular updates, and bug fixes.\n> The Upstash team is committed to maintaining and improving its functionality.\n\n## Installation\n\nInstall a released version from pip:\n\n```shell\npip3 install upstash-vector\n```\n\n## Usage\n\nIn order to use this client, head out to [Upstash Console](https://console.upstash.com) and create a vector database.\nThere, get the `UPSTASH_VECTOR_REST_URL` and the `UPSTASH_VECTOR_REST_TOKEN` from the dashboard.\n\n### Initializing the Index\n\n```python\nfrom upstash_vector import Index\n\nindex = Index(url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN)\n```\n\nor alternatively, initialize from the environment variables\n\n```bash\nexport UPSTASH_VECTOR_REST_URL [URL]\nexport UPSTASH_VECTOR_REST_TOKEN [TOKEN]\n```\n\n```python\nfrom upstash_vector import Index\n\nindex = Index.from_env()\n```\n\n### Upsert Vectors\n\nVectors can be upserted(inserted or updated) into a namespace of an index\nto be later queried or fetched.\n\nThere are a couple of ways of doing upserts:\n\n```python\n# - dense indexes\n#   - (id, vector, metadata, data)\n#   - (id, vector, metadata)\n#   - (id, vector)\nindex.upsert(\n    vectors=[\n        (\"id1\", [0.1, 0.2], {\"metadata_field\": \"metadata_value\"}, \"data-value\"),\n        (\"id2\", [0.2, 0.2], {\"metadata_field\": \"metadata_value\"}),\n        (\"id3\", [0.3, 0.4]),\n    ]\n)\n\n# - sparse indexes\n#   - (id, sparse_vector, metadata, data)\n#   - (id, sparse_vector, metadata)\n#   - (id, sparse_vector)\nindex.upsert(\n    vectors=[\n        (\"id1\", ([0, 1], [0.1, 0.2]), {\"metadata_field\": \"metadata_value\"}, \"data-value\"),\n        (\"id2\", ([1, 2], [0.2, 0.2]), {\"metadata_field\": \"metadata_value\"}),\n        (\"id3\", ([2, 3, 4], [0.3, 0.4, 0.5])),\n    ]\n)\n\n# - hybrid indexes\n#   - (id, vector, sparse_vector, metadata, data)\n#   - (id, vector, sparse_vector, metadata)\n#   - (id, vector, sparse_vector)\nindex.upsert(\n    vectors=[\n        (\"id1\", [0.1, 0.2], ([0, 1], [0.1, 0.2]), {\"metadata_field\": \"metadata_value\"}, \"data-value\"),\n        (\"id2\", [0.2, 0.2], ([1, 2], [0.2, 0.2]), {\"metadata_field\": \"metadata_value\"}),\n        (\"id3\", [0.3, 0.4], ([2, 3, 4], [0.3, 0.4, 0.5])),\n    ]\n)\n```\n\n```python\n# - dense indexes\n#   - {\"id\": id, \"vector\": vector, \"metadata\": metadata, \"data\": data)\n#   - {\"id\": id, \"vector\": vector, \"metadata\": metadata)\n#   - {\"id\": id, \"vector\": vector, \"data\": data)\n#   - {\"id\": id, \"vector\": vector} \nindex.upsert(\n    vectors=[\n        {\"id\": \"id4\", \"vector\": [0.1, 0.2], \"metadata\": {\"field\": \"value\"}, \"data\": \"value\"},\n        {\"id\": \"id5\", \"vector\": [0.1, 0.2], \"metadata\": {\"field\": \"value\"}},\n        {\"id\": \"id6\", \"vector\": [0.1, 0.2], \"data\": \"value\"},\n        {\"id\": \"id7\", \"vector\": [0.5, 0.6]},\n    ]\n)\n\n# - sparse indexes\n#   - {\"id\": id, \"sparse_vector\": sparse_vector, \"metadata\": metadata, \"data\": data)\n#   - {\"id\": id, \"sparse_vector\": sparse_vector, \"metadata\": metadata)\n#   - {\"id\": id, \"sparse_vector\": sparse_vector, \"data\": data)\n#   - {\"id\": id, \"sparse_vector\": sparse_vector} \nindex.upsert(\n    vectors=[\n        {\"id\": \"id4\", \"sparse_vector\": ([0, 1], [0.1, 0.2]), \"metadata\": {\"field\": \"value\"}, \"data\": \"value\"},\n        {\"id\": \"id5\", \"sparse_vector\": ([1, 2], [0.2, 0.2]), \"metadata\": {\"field\": \"value\"}},\n        {\"id\": \"id6\", \"sparse_vector\": ([2, 3, 4], [0.3, 0.4, 0.5]), \"data\": \"value\"},\n        {\"id\": \"id7\", \"sparse_vector\": ([4], [0.3])},\n    ]\n)\n\n# - hybrid indexes\n#   - {\"id\": id, \"vector\": vector, \"sparse_vector\": sparse_vector, \"metadata\": metadata, \"data\": data)\n#   - {\"id\": id, \"vector\": vector, \"sparse_vector\": sparse_vector, \"metadata\": metadata)\n#   - {\"id\": id, \"vector\": vector, \"sparse_vector\": sparse_vector, \"data\": data)\n#   - {\"id\": id, \"vector\": vector, \"sparse_vector\": sparse_vector} \nindex.upsert(\n    vectors=[\n        {\"id\": \"id4\", \"vector\": [0.1, 0.2], \"sparse_vector\": ([0], [0.1]), \"metadata\": {\"field\": \"value\"},\n         \"data\": \"value\"},\n        {\"id\": \"id5\", \"vector\": [0.1, 0.2], \"sparse_vector\": ([1, 2], [0.2, 0.2]), \"metadata\": {\"field\": \"value\"}},\n        {\"id\": \"id6\", \"vector\": [0.1, 0.2], \"sparse_vector\": ([2, 3, 4], [0.3, 0.4, 0.5]), \"data\": \"value\"},\n        {\"id\": \"id7\", \"vector\": [0.5, 0.6], \"sparse_vector\": ([4], [0.3])},\n    ]\n)\n```\n\n```python\nfrom upstash_vector import Vector\nfrom upstash_vector.types import SparseVector\n\n# dense indexes\nindex.upsert(\n    vectors=[\n        Vector(id=\"id5\", vector=[1, 2], metadata={\"field\": \"value\"}, data=\"value\"),\n        Vector(id=\"id6\", vector=[1, 2], metadata={\"field\": \"value\"}),\n        Vector(id=\"id7\", vector=[1, 2], data=\"value\"),\n        Vector(id=\"id8\", vector=[6, 7]),\n    ]\n)\n\n# sparse indexes\nindex.upsert(\n    vectors=[\n        Vector(id=\"id5\", sparse_vector=SparseVector([1], [0.1]), metadata={\"field\": \"value\"}, data=\"value\"),\n        Vector(id=\"id6\", sparse_vector=SparseVector([1, 2], [0.1, 0.2]), metadata={\"field\": \"value\"}),\n        Vector(id=\"id7\", sparse_vector=SparseVector([3, 5], [0.3, 0.3]), data=\"value\"),\n        Vector(id=\"id8\", sparse_vector=SparseVector([4], [0.2])),\n    ]\n)\n\n# hybrid indexes\nindex.upsert(\n    vectors=[\n        Vector(id=\"id5\", vector=[1, 2], sparse_vector=SparseVector([1], [0.1]), metadata={\"field\": \"value\"},\n               data=\"value\"),\n        Vector(id=\"id6\", vector=[1, 2], sparse_vector=SparseVector([1, 2], [0.1, 0.2]), metadata={\"field\": \"value\"}),\n        Vector(id=\"id7\", vector=[1, 2], sparse_vector=SparseVector([3, 5], [0.3, 0.3]), data=\"value\"),\n        Vector(id=\"id8\", vector=[6, 7], sparse_vector=SparseVector([4], [0.2])),\n    ]\n)\n```\n\nIf the index is created with an embedding model, raw string data can be upserted.\nIn this case, the `data` field of the vector will also be set to the `data` passed\nbelow, so that it can be accessed later.\n\n```python\nfrom upstash_vector import Data\n\nres = index.upsert(\n    vectors=[\n        Data(id=\"id5\", data=\"Goodbye World\", metadata={\"field\": \"value\"}),\n        Data(id=\"id6\", data=\"Hello World\"),\n    ]\n)\n```\n\nAlso, a namespace can be specified to upsert vectors into it.\nWhen no namespace is provided, the default namespace is used.\n\n```python\nindex.upsert(\n    vectors=[\n        (\"id1\", [0.1, 0.2]),\n        (\"id2\", [0.3, 0.4]),\n    ],\n    namespace=\"ns\",\n)\n```\n\n### Query Vectors\n\nSome number of vectors that are approximately most similar to a given\nquery vector can be requested from a namespace of an index.\n\n```python\nres = index.query(\n    vector=[0.6, 0.9],  # for dense and hybrid indexes\n    sparse_vector=([0, 1], [0.1, 0.1]),  # for sparse and hybrid indexes \n    top_k=5,\n    include_vectors=False,\n    include_metadata=True,\n    include_data=True,\n    filter=\"metadata_f = 'metadata_v'\"\n)\n\n# List of query results, sorted in the descending order of similarity\nfor r in res:\n    print(\n        r.id,  # The id used while upserting the vector\n        r.score,  # The similarity score of this vector to the query vector. Higher is more similar.\n        r.vector,  # The value of the vector, if requested (for dense and hybrid indexes).\n        r.sparse,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).\n        r.metadata,  # The metadata of the vector, if requested and present.\n        r.data,  # The data of the vector, if requested and present.\n    )\n```\n\nIf the index is created with an embedding model, raw string data can be queried.\n\n```python\nres = index.query(\n    data=\"hello\",\n    top_k=5,\n    include_vectors=False,\n    include_metadata=True,\n    include_data=True,\n)\n```\n\nWhen a filter is provided, query results are further narrowed down based\non the vectors whose metadata matches with it.\n\nSee [Metadata Filtering](https://upstash.com/docs/vector/features/filtering) documentation\nfor more information regarding the filter syntax.\n\nAlso, a namespace can be specified to query from.\nWhen no namespace is provided, the default namespace is used.\n\n```python\nres = index.query(\n    vector=[0.6, 0.9],\n    top_k=5,\n    namespace=\"ns\",\n)\n```\n\n### Fetch Vectors\n\nA set of vectors can be fetched from a namespace of an index.\n\n```python\nres = index.fetch(\n    ids=[\"id3\", \"id4\"],\n    include_vectors=False,\n    include_metadata=True,\n    include_data=True,\n)\n\n# List of fetch results, one for each id passed\nfor r in res:\n    if not r:  # Can be None, if there is no such vector with the given id\n        continue\n\n    print(\n        r.id,  # The id used while upserting the vector\n        r.vector,  # The value of the vector, if requested (for dense and hybrid indexes).\n        r.sparse_vector,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).\n        r.metadata,  # The metadata of the vector, if requested and present.\n        r.data,  # The metadata of the vector, if requested and present.\n    )\n```\n\nor, for singular fetch:\n\n```python\nres = index.fetch(\n    \"id1\",\n    include_vectors=True,\n    include_metadata=True,\n    include_data=False,\n)\n\nr = res[0]\nif r:  # Can be None, if there is no such vector with the given id\n    print(\n        r.id,  # The id used while upserting the vector\n        r.vector,  # The value of the vector, if requested (for dense and hybrid indexes).\n        r.sparse_vector,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).        \n        r.metadata,  # The metadata of the vector, if requested and present.\n        r.data,  # The metadata of the vector, if requested and present.\n    )\n```\n\nApart from the vector ids, vectors can also be fetched with an id prefix.\n\n```python\n# Fetch all the vectors whose id starts with `id-1`\nres = index.fetch(\n    prefix=\"id-1\",\n    include_vectors=False,\n    include_metadata=True,\n    include_data=True,\n)\n```\n\nAlso, a namespace can be specified to fetch from.\nWhen no namespace is provided, the default namespace is used.\n\n```python\nres = index.fetch(\n    ids=[\"id3\", \"id4\"],\n    namespace=\"ns\",\n)\n```\n\n### Range Over Vectors\n\nThe vectors upserted into a namespace of an index can be scanned\nin a page by page fashion.\n\n```python\n# Scans the vectors 100 vector at a time,\nres = index.range(\n    cursor=\"\",  # Start the scan from the beginning \n    limit=100,\n    include_vectors=False,\n    include_metadata=True,\n    include_data=True,\n)\n\nwhile res.next_cursor != \"\":\n    res = index.range(\n        cursor=res.next_cursor,\n        limit=100,\n        include_vectors=False,\n        include_metadata=True,\n        include_data=True,\n    )\n\n    for v in res.vectors:\n        print(\n            v.id,  # The id used while upserting the vector\n            v.vector,  # The value of the vector, if requested (for dense and hybrid indexes).\n            v.sparse_vector,  # The value of the sparse vector, if requested (for sparse and hybrid indexes).\n            v.metadata,  # The metadata of the vector, if requested and present.\n            v.data,  # The data of the vector, if requested and present.\n        )\n```\n\nApart from that, vectors can also be ranged with an id prefix.\n\n```python\n# Range over all the vectors whose id starts with `id-1`\nres = index.range(\n    cursor=\"\",\n    prefix=\"id-1\",\n    limit=100,\n    include_vectors=False,\n    include_metadata=True,\n    include_data=True,\n)\n\nwhile res.next_cursor != \"\":\n    res = index.range(\n        cursor=res.next_cursor,\n        prefix=\"id-1\",\n        limit=100,\n        include_vectors=False,\n        include_metadata=True,\n        include_data=True,\n    )\n\n    for v in res.vectors:\n        print(v)\n```\n\nAlso, a namespace can be specified to range from.\nWhen no namespace is provided, the default namespace is used.\n\n```python\nres = index.range(\n    cursor=\"\",\n    limit=100,\n    namespace=\"ns\",\n)\n```\n\n### Delete Vectors\n\nA list of vectors can be deleted from a namespace of index.\nIf no such vectors with the given ids exist, this is no-op.\n\n```python\nres = index.delete(\n    ids=[\"id1\", \"id2\"],\n)\n\n# How many vectors are deleted out of the given ids.\nprint(res.deleted)\n```\n\nor, for singular deletion:\n\n```python\nres = index.delete(\n    \"id1\",\n)\n\n# 1 if the vector is deleted, 0 otherwise.\nprint(res.deleted)\n```\n\nApart from the vector ids, vectors can also be deleted with an id prefix\nor metadata filter.\n\n```python\n# Delete all the vectors whose id starts with `id-0`\nindex.delete(\n    prefix=\"id-0\",\n)\n\n# Delete all the vectors whose metadata matches with the filter\nindex.delete(\n    filter=\"salary < 3000\",\n)\n```\n\nAlso, a namespace can be specified to delete from.\nWhen no namespace is provided, the default namespace is used.\n\n```python\nres = index.delete(\n    ids=[\"id1\", \"id2\"],\n    namespace=\"ns\",\n)\n```\n\n### Update a Vector\n\nAny combination of vector value, sparse vector value, data, or metadata can be updated.\n\n```python\nres = index.update(\n    \"id1\",\n    metadata={\"new_field\": \"new_value\"},\n)\n\nprint(res)  # A boolean indicating whether the vector is updated or not.\n```\n\nAlso, a namespace can be specified to update from.\nWhen no namespace is provided, the default namespace is used.\n\n```python\nres = index.update(\n    \"id1\",\n    metadata={\"new_field\": \"new_value\"},\n    namespace=\"ns\",\n)\n```\n\n### Reset the Namespace\n\nAll vectors can be removed from a namespace of an index.\n\n```python\nindex.reset() \n```\n\nAlso, a namespace can be specified to reset.\nWhen no namespace is provided, the default namespace is used.\n\n```python\nindex.reset(\n    namespace=\"ns\",\n) \n```\n\nAll namespaces under the index can be reset with a single call\nas well.\n\n```python\nindex.reset(\n    all=True,\n)\n```\n\n### Index Info\n\nSome information regarding the status and type of the index can be requested.\nThis information also contains per-namespace status.\n\n```python\ninfo = index.info()\nprint(\n    info.vector_count,  # Total number of vectors across all namespaces\n    info.pending_vector_count,  # Total number of vectors waiting to be indexed across all namespaces\n    info.index_size,  # Total size of the index on disk in bytes\n    info.dimension,  # Vector dimension\n    info.similarity_function,  # Similarity function used\n)\n\nfor ns, ns_info in info.namespaces.items():\n    print(\n        ns,  # Name of the namespace\n        ns_info.vector_count,  # Total number of vectors in this namespaces\n        ns_info.pending_vector_count,  # Total number of vectors waiting to be indexed in this namespaces\n    )\n```\n\n### List Namespaces\n\nAll the names of active namespaces can be listed.\n\n```python\nnamespaces = index.list_namespaces()\nfor ns in namespaces:\n    print(ns)  # name of the namespace\n```\n\n### Delete a Namespace\n\nA namespace can be deleted entirely.\nIf no such namespace exists, and exception is raised.\nThe default namespaces cannot be deleted.\n\n```python\nindex.delete_namespace(namespace=\"ns\")\n```\n\n# Contributing\n\n## Preparing the environment\n\nThis project uses [Poetry](https://python-poetry.org) for packaging and dependency management. Make sure you are able to\ncreate the poetry shell with relevant dependencies.\n\nYou will also need a vector database on [Upstash](https://console.upstash.com/).\n\n```commandline\npoetry install \n```\n\n## Code Formatting\n\n```bash \npoetry run ruff format .\n```\n\n## Running tests\n\nTo run all the tests, make sure the poetry virtual environment activated with all\nthe necessary dependencies.\n\nCreate four Vector Stores on Upstash. First one should have 2 dimensions. Second one should use an embedding model. Set\nthe necessary environment variables:\n\n- A dense index with 2 dimensions, with cosine similarity\n- A dense index with an embedding model\n- A sparse index\n- A hybrid index with 2 dimensions, with cosine similarity for the dense component.\n- A hybrid index with embedding models\n\n```\nURL=****\nTOKEN=****\nEMBEDDING_URL=****\nEMBEDDING_TOKEN=****\nSPARSE_URL=****\nSPARSE_TOKEN=****\nHYBRID_URL=****\nHYBRID_TOKEN=****\nHYBRID_EMBEDDING_URL=****\nHYBRID_EMBEDDING_TOKEN=****\n```\n\nThen, run the following command to run tests:\n\n```bash\npoetry run pytest\n```\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "Serverless Vector SDK from Upstash",
    "version": "0.8.0",
    "project_urls": {
        "Homepage": "https://github.com/upstash/vector-py",
        "Repository": "https://github.com/upstash/vector-py"
    },
    "split_keywords": [
        "upstash vector",
        " serverless vector"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "abce1528e6e37d4a1ba7a333ebca7191b638986f4ba9f73ba17458b45c4d36e2",
                "md5": "01c23bc236ca9282f7320b3e20cceb57",
                "sha256": "e8a7560e6e80e22ff2a4d95ff0b08723b22bafaae7dab38eddce51feb30c5785"
            },
            "downloads": -1,
            "filename": "upstash_vector-0.8.0-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "01c23bc236ca9282f7320b3e20cceb57",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": "<4.0,>=3.8",
            "size": 18480,
            "upload_time": "2025-02-27T11:52:36",
            "upload_time_iso_8601": "2025-02-27T11:52:36.189265Z",
            "url": "https://files.pythonhosted.org/packages/ab/ce/1528e6e37d4a1ba7a333ebca7191b638986f4ba9f73ba17458b45c4d36e2/upstash_vector-0.8.0-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "65221b9161b82ef52addc2b71ffca9498cb745b34b2e43e77ef1c921d96fb3f1",
                "md5": "62bfd57a20dc82aa69703a961b56dcca",
                "sha256": "cdeeeeabe08c813f0f525d9b6ceefbf17abb720bd30190cd6df88b9f2c318334"
            },
            "downloads": -1,
            "filename": "upstash_vector-0.8.0.tar.gz",
            "has_sig": false,
            "md5_digest": "62bfd57a20dc82aa69703a961b56dcca",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": "<4.0,>=3.8",
            "size": 18565,
            "upload_time": "2025-02-27T11:52:38",
            "upload_time_iso_8601": "2025-02-27T11:52:38.140109Z",
            "url": "https://files.pythonhosted.org/packages/65/22/1b9161b82ef52addc2b71ffca9498cb745b34b2e43e77ef1c921d96fb3f1/upstash_vector-0.8.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2025-02-27 11:52:38",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "upstash",
    "github_project": "vector-py",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": true,
    "lcname": "upstash-vector"
}

Upstash