llama-index-vector-stores-oracledb


Namellama-index-vector-stores-oracledb JSON
Version 0.1.4 PyPI version JSON
download
home_pageNone
Summaryllama-index vector_stores oracle database integration
upload_time2024-10-15 01:55:49
maintainerNone
docs_urlNone
authorYour Name
requires_python<3.13,>=3.8.1
licenseMIT
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # LlamaIndex VectorStore Integration for Oracle

This is a very basic example on how to use Oracle as a vector store with llamaindex. For a detailed guide look at https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/orallamavs.ipynb

`pip install llama-index-vector-stores-oracledb`

# A sample example

```python
from typing import TYPE_CHECKING
import sys
from llama_index.core.schema import Document, TextNode
from llama_index.readers.oracleai import OracleReader, OracleTextSplitter
from llama_index.embeddings.oracleai import OracleEmbeddings
from llama_index.utils.oracleai import OracleSummary
from llama_index.vector_stores.oracledb import OraLlamaVS, DistanceStrategy
from llama_index.vector_stores.oracledb import base as orallamavs

if TYPE_CHECKING:
    import oracledb

"""
In this sample example, we will use 'database' provider for both summary and embeddings.
So, we don't need to do the following:
    - set proxy for 3rd party providers
    - create credential for 3rd party providers

If you choose to use 3rd party provider,
please follow the necessary steps for proxy and credential.
"""

# oracle connection
# please update with your username, password, hostname, and service_name
username = "testuser"
password = "testuser"
dsn = "<hostname/service_name>"

try:
    conn = oracledb.connect(user=username, password=password, dsn=dsn)
    print("Connection successful!")
except Exception as e:
    print("Connection failed!")
    sys.exit(1)


# load onnx model
# please update with your related information
onnx_dir = "DEMO_PY_DIR"
onnx_file = "tinybert.onnx"
model_name = "demo_model"
try:
    OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)
    print("ONNX model loaded.")
except Exception as e:
    print("ONNX model loading failed!")
    sys.exit(1)


# params
# please update necessary fields with related information
loader_params = {
    "owner": "testuser",
    "tablename": "demo_tab",
    "colname": "data",
}
summary_params = {
    "provider": "database",
    "glevel": "S",
    "numParagraphs": 1,
    "language": "english",
}
splitter_params = {"normalize": "all"}
embedder_params = {"provider": "database", "model": "demo_model"}

# instantiate loader, summary, splitter, and embedder
loader = OracleReader(conn=conn, params=loader_params)
summary = OracleSummary(conn=conn, params=summary_params)
splitter = OracleTextSplitter(conn=conn, params=splitter_params)
embedder = OracleEmbeddings(conn=conn, params=embedder_params)

# process the documents
loader = OracleReader(conn=conn, params=loader_params)
docs = loader.load()

chunks_with_mdata = []
for id, doc in enumerate(docs, start=1):
    summ = summary.get_summary(doc.text)
    chunks = splitter.split_text(doc.text)
    for ic, chunk in enumerate(chunks, start=1):
        chunk_metadata = doc.metadata.copy()
        chunk_metadata["id"] = (
            chunk_metadata["_oid"] + "$" + str(id) + "$" + str(ic)
        )
        chunk_metadata["document_id"] = str(id)
        chunk_metadata["document_summary"] = str(summ[0])
        textnode = TextNode(
            text=chunk,
            id_=chunk_metadata["id"],
            embedding=embedder._get_text_embedding(chunk),
            metadata=chunk_metadata,
        )
        chunks_with_mdata.append(textnode)

""" verify """
print(f"Number of total chunks with metadata: {len(chunks_with_mdata)}")


# create Oracle AI Vector Store
vectorstore = OraLlamaVS.from_documents(
    client=conn,
    docs=chunks_with_mdata,
    table_name="oravs",
    distance_strategy=DistanceStrategy.DOT_PRODUCT,
)

""" verify """
print(f"Vector Store Table: {vectorstore.table_name}")

# Create Index
orallamavs.create_index(
    conn, vectorstore, params={"idx_name": "hnsw_oravs", "idx_type": "HNSW"}
)

print("Index created.")


# Perform Semantic Search
query = "What is Oracle AI Vector Store?"
filter = {"document_id": ["1"]}

# Similarity search without a filter
print(vectorstore.similarity_search(query, 1))

# Similarity search with a filter
print(vectorstore.similarity_search(query, 1, filter=filter))

# Similarity search with relevance score
print(vectorstore.similarity_search_with_score(query, 1))

# Similarity search with relevance score with filter
print(vectorstore.similarity_search_with_score(query, 1, filter=filter))

# Max marginal relevance search
print(
    vectorstore.max_marginal_relevance_search(
        query, 1, fetch_k=20, lambda_mult=0.5
    )
)

# Max marginal relevance search with filter
print(
    vectorstore.max_marginal_relevance_search(
        query, 1, fetch_k=20, lambda_mult=0.5, filter=filter
    )
)
```

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "llama-index-vector-stores-oracledb",
    "maintainer": null,
    "docs_url": null,
    "requires_python": "<3.13,>=3.8.1",
    "maintainer_email": null,
    "keywords": null,
    "author": "Your Name",
    "author_email": "you@example.com",
    "download_url": "https://files.pythonhosted.org/packages/6f/3b/70e4ed6d0f9a78d5439ad9cb8395d82bc7790bfc9d128abdfba4381402d3/llama_index_vector_stores_oracledb-0.1.4.tar.gz",
    "platform": null,
    "description": "# LlamaIndex VectorStore Integration for Oracle\n\nThis is a very basic example on how to use Oracle as a vector store with llamaindex. For a detailed guide look at https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/orallamavs.ipynb\n\n`pip install llama-index-vector-stores-oracledb`\n\n# A sample example\n\n```python\nfrom typing import TYPE_CHECKING\nimport sys\nfrom llama_index.core.schema import Document, TextNode\nfrom llama_index.readers.oracleai import OracleReader, OracleTextSplitter\nfrom llama_index.embeddings.oracleai import OracleEmbeddings\nfrom llama_index.utils.oracleai import OracleSummary\nfrom llama_index.vector_stores.oracledb import OraLlamaVS, DistanceStrategy\nfrom llama_index.vector_stores.oracledb import base as orallamavs\n\nif TYPE_CHECKING:\n    import oracledb\n\n\"\"\"\nIn this sample example, we will use 'database' provider for both summary and embeddings.\nSo, we don't need to do the following:\n    - set proxy for 3rd party providers\n    - create credential for 3rd party providers\n\nIf you choose to use 3rd party provider,\nplease follow the necessary steps for proxy and credential.\n\"\"\"\n\n# oracle connection\n# please update with your username, password, hostname, and service_name\nusername = \"testuser\"\npassword = \"testuser\"\ndsn = \"<hostname/service_name>\"\n\ntry:\n    conn = oracledb.connect(user=username, password=password, dsn=dsn)\n    print(\"Connection successful!\")\nexcept Exception as e:\n    print(\"Connection failed!\")\n    sys.exit(1)\n\n\n# load onnx model\n# please update with your related information\nonnx_dir = \"DEMO_PY_DIR\"\nonnx_file = \"tinybert.onnx\"\nmodel_name = \"demo_model\"\ntry:\n    OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)\n    print(\"ONNX model loaded.\")\nexcept Exception as e:\n    print(\"ONNX model loading failed!\")\n    sys.exit(1)\n\n\n# params\n# please update necessary fields with related information\nloader_params = {\n    \"owner\": \"testuser\",\n    \"tablename\": \"demo_tab\",\n    \"colname\": \"data\",\n}\nsummary_params = {\n    \"provider\": \"database\",\n    \"glevel\": \"S\",\n    \"numParagraphs\": 1,\n    \"language\": \"english\",\n}\nsplitter_params = {\"normalize\": \"all\"}\nembedder_params = {\"provider\": \"database\", \"model\": \"demo_model\"}\n\n# instantiate loader, summary, splitter, and embedder\nloader = OracleReader(conn=conn, params=loader_params)\nsummary = OracleSummary(conn=conn, params=summary_params)\nsplitter = OracleTextSplitter(conn=conn, params=splitter_params)\nembedder = OracleEmbeddings(conn=conn, params=embedder_params)\n\n# process the documents\nloader = OracleReader(conn=conn, params=loader_params)\ndocs = loader.load()\n\nchunks_with_mdata = []\nfor id, doc in enumerate(docs, start=1):\n    summ = summary.get_summary(doc.text)\n    chunks = splitter.split_text(doc.text)\n    for ic, chunk in enumerate(chunks, start=1):\n        chunk_metadata = doc.metadata.copy()\n        chunk_metadata[\"id\"] = (\n            chunk_metadata[\"_oid\"] + \"$\" + str(id) + \"$\" + str(ic)\n        )\n        chunk_metadata[\"document_id\"] = str(id)\n        chunk_metadata[\"document_summary\"] = str(summ[0])\n        textnode = TextNode(\n            text=chunk,\n            id_=chunk_metadata[\"id\"],\n            embedding=embedder._get_text_embedding(chunk),\n            metadata=chunk_metadata,\n        )\n        chunks_with_mdata.append(textnode)\n\n\"\"\" verify \"\"\"\nprint(f\"Number of total chunks with metadata: {len(chunks_with_mdata)}\")\n\n\n# create Oracle AI Vector Store\nvectorstore = OraLlamaVS.from_documents(\n    client=conn,\n    docs=chunks_with_mdata,\n    table_name=\"oravs\",\n    distance_strategy=DistanceStrategy.DOT_PRODUCT,\n)\n\n\"\"\" verify \"\"\"\nprint(f\"Vector Store Table: {vectorstore.table_name}\")\n\n# Create Index\norallamavs.create_index(\n    conn, vectorstore, params={\"idx_name\": \"hnsw_oravs\", \"idx_type\": \"HNSW\"}\n)\n\nprint(\"Index created.\")\n\n\n# Perform Semantic Search\nquery = \"What is Oracle AI Vector Store?\"\nfilter = {\"document_id\": [\"1\"]}\n\n# Similarity search without a filter\nprint(vectorstore.similarity_search(query, 1))\n\n# Similarity search with a filter\nprint(vectorstore.similarity_search(query, 1, filter=filter))\n\n# Similarity search with relevance score\nprint(vectorstore.similarity_search_with_score(query, 1))\n\n# Similarity search with relevance score with filter\nprint(vectorstore.similarity_search_with_score(query, 1, filter=filter))\n\n# Max marginal relevance search\nprint(\n    vectorstore.max_marginal_relevance_search(\n        query, 1, fetch_k=20, lambda_mult=0.5\n    )\n)\n\n# Max marginal relevance search with filter\nprint(\n    vectorstore.max_marginal_relevance_search(\n        query, 1, fetch_k=20, lambda_mult=0.5, filter=filter\n    )\n)\n```\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "llama-index vector_stores oracle database integration",
    "version": "0.1.4",
    "project_urls": null,
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "86746c0b96b8a2f6306e6abc419118d4bf8308aae931d846db017785061b3c5d",
                "md5": "a3356477a5db7f3923c1ac746ecee804",
                "sha256": "acc2678f9d8f4aa01f9e966349044d67d0c1ca1d226f4adbe21fddf771e038b6"
            },
            "downloads": -1,
            "filename": "llama_index_vector_stores_oracledb-0.1.4-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "a3356477a5db7f3923c1ac746ecee804",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": "<3.13,>=3.8.1",
            "size": 9205,
            "upload_time": "2024-10-15T01:55:48",
            "upload_time_iso_8601": "2024-10-15T01:55:48.886549Z",
            "url": "https://files.pythonhosted.org/packages/86/74/6c0b96b8a2f6306e6abc419118d4bf8308aae931d846db017785061b3c5d/llama_index_vector_stores_oracledb-0.1.4-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "6f3b70e4ed6d0f9a78d5439ad9cb8395d82bc7790bfc9d128abdfba4381402d3",
                "md5": "0b3e2cba2dc089c173c4199f0aeff2c1",
                "sha256": "5aef9d5c6a5778d9f47386070b50abe17a0fcc9c0f2db017055ce4589eaa463e"
            },
            "downloads": -1,
            "filename": "llama_index_vector_stores_oracledb-0.1.4.tar.gz",
            "has_sig": false,
            "md5_digest": "0b3e2cba2dc089c173c4199f0aeff2c1",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": "<3.13,>=3.8.1",
            "size": 10078,
            "upload_time": "2024-10-15T01:55:49",
            "upload_time_iso_8601": "2024-10-15T01:55:49.994380Z",
            "url": "https://files.pythonhosted.org/packages/6f/3b/70e4ed6d0f9a78d5439ad9cb8395d82bc7790bfc9d128abdfba4381402d3/llama_index_vector_stores_oracledb-0.1.4.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-10-15 01:55:49",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "llama-index-vector-stores-oracledb"
}
        
Elapsed time: 0.35636s