Name | llama-index-vector-stores-oracledb JSON |
Version |
0.1.4
JSON |
| download |
home_page | None |
Summary | llama-index vector_stores oracle database integration |
upload_time | 2024-10-15 01:55:49 |
maintainer | None |
docs_url | None |
author | Your Name |
requires_python | <3.13,>=3.8.1 |
license | MIT |
keywords |
|
VCS |
|
bugtrack_url |
|
requirements |
No requirements were recorded.
|
Travis-CI |
No Travis.
|
coveralls test coverage |
No coveralls.
|
# LlamaIndex VectorStore Integration for Oracle
This is a very basic example on how to use Oracle as a vector store with llamaindex. For a detailed guide look at https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/orallamavs.ipynb
`pip install llama-index-vector-stores-oracledb`
# A sample example
```python
from typing import TYPE_CHECKING
import sys
from llama_index.core.schema import Document, TextNode
from llama_index.readers.oracleai import OracleReader, OracleTextSplitter
from llama_index.embeddings.oracleai import OracleEmbeddings
from llama_index.utils.oracleai import OracleSummary
from llama_index.vector_stores.oracledb import OraLlamaVS, DistanceStrategy
from llama_index.vector_stores.oracledb import base as orallamavs
if TYPE_CHECKING:
import oracledb
"""
In this sample example, we will use 'database' provider for both summary and embeddings.
So, we don't need to do the following:
- set proxy for 3rd party providers
- create credential for 3rd party providers
If you choose to use 3rd party provider,
please follow the necessary steps for proxy and credential.
"""
# oracle connection
# please update with your username, password, hostname, and service_name
username = "testuser"
password = "testuser"
dsn = "<hostname/service_name>"
try:
conn = oracledb.connect(user=username, password=password, dsn=dsn)
print("Connection successful!")
except Exception as e:
print("Connection failed!")
sys.exit(1)
# load onnx model
# please update with your related information
onnx_dir = "DEMO_PY_DIR"
onnx_file = "tinybert.onnx"
model_name = "demo_model"
try:
OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)
print("ONNX model loaded.")
except Exception as e:
print("ONNX model loading failed!")
sys.exit(1)
# params
# please update necessary fields with related information
loader_params = {
"owner": "testuser",
"tablename": "demo_tab",
"colname": "data",
}
summary_params = {
"provider": "database",
"glevel": "S",
"numParagraphs": 1,
"language": "english",
}
splitter_params = {"normalize": "all"}
embedder_params = {"provider": "database", "model": "demo_model"}
# instantiate loader, summary, splitter, and embedder
loader = OracleReader(conn=conn, params=loader_params)
summary = OracleSummary(conn=conn, params=summary_params)
splitter = OracleTextSplitter(conn=conn, params=splitter_params)
embedder = OracleEmbeddings(conn=conn, params=embedder_params)
# process the documents
loader = OracleReader(conn=conn, params=loader_params)
docs = loader.load()
chunks_with_mdata = []
for id, doc in enumerate(docs, start=1):
summ = summary.get_summary(doc.text)
chunks = splitter.split_text(doc.text)
for ic, chunk in enumerate(chunks, start=1):
chunk_metadata = doc.metadata.copy()
chunk_metadata["id"] = (
chunk_metadata["_oid"] + "$" + str(id) + "$" + str(ic)
)
chunk_metadata["document_id"] = str(id)
chunk_metadata["document_summary"] = str(summ[0])
textnode = TextNode(
text=chunk,
id_=chunk_metadata["id"],
embedding=embedder._get_text_embedding(chunk),
metadata=chunk_metadata,
)
chunks_with_mdata.append(textnode)
""" verify """
print(f"Number of total chunks with metadata: {len(chunks_with_mdata)}")
# create Oracle AI Vector Store
vectorstore = OraLlamaVS.from_documents(
client=conn,
docs=chunks_with_mdata,
table_name="oravs",
distance_strategy=DistanceStrategy.DOT_PRODUCT,
)
""" verify """
print(f"Vector Store Table: {vectorstore.table_name}")
# Create Index
orallamavs.create_index(
conn, vectorstore, params={"idx_name": "hnsw_oravs", "idx_type": "HNSW"}
)
print("Index created.")
# Perform Semantic Search
query = "What is Oracle AI Vector Store?"
filter = {"document_id": ["1"]}
# Similarity search without a filter
print(vectorstore.similarity_search(query, 1))
# Similarity search with a filter
print(vectorstore.similarity_search(query, 1, filter=filter))
# Similarity search with relevance score
print(vectorstore.similarity_search_with_score(query, 1))
# Similarity search with relevance score with filter
print(vectorstore.similarity_search_with_score(query, 1, filter=filter))
# Max marginal relevance search
print(
vectorstore.max_marginal_relevance_search(
query, 1, fetch_k=20, lambda_mult=0.5
)
)
# Max marginal relevance search with filter
print(
vectorstore.max_marginal_relevance_search(
query, 1, fetch_k=20, lambda_mult=0.5, filter=filter
)
)
```
Raw data
{
"_id": null,
"home_page": null,
"name": "llama-index-vector-stores-oracledb",
"maintainer": null,
"docs_url": null,
"requires_python": "<3.13,>=3.8.1",
"maintainer_email": null,
"keywords": null,
"author": "Your Name",
"author_email": "you@example.com",
"download_url": "https://files.pythonhosted.org/packages/6f/3b/70e4ed6d0f9a78d5439ad9cb8395d82bc7790bfc9d128abdfba4381402d3/llama_index_vector_stores_oracledb-0.1.4.tar.gz",
"platform": null,
"description": "# LlamaIndex VectorStore Integration for Oracle\n\nThis is a very basic example on how to use Oracle as a vector store with llamaindex. For a detailed guide look at https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/orallamavs.ipynb\n\n`pip install llama-index-vector-stores-oracledb`\n\n# A sample example\n\n```python\nfrom typing import TYPE_CHECKING\nimport sys\nfrom llama_index.core.schema import Document, TextNode\nfrom llama_index.readers.oracleai import OracleReader, OracleTextSplitter\nfrom llama_index.embeddings.oracleai import OracleEmbeddings\nfrom llama_index.utils.oracleai import OracleSummary\nfrom llama_index.vector_stores.oracledb import OraLlamaVS, DistanceStrategy\nfrom llama_index.vector_stores.oracledb import base as orallamavs\n\nif TYPE_CHECKING:\n import oracledb\n\n\"\"\"\nIn this sample example, we will use 'database' provider for both summary and embeddings.\nSo, we don't need to do the following:\n - set proxy for 3rd party providers\n - create credential for 3rd party providers\n\nIf you choose to use 3rd party provider,\nplease follow the necessary steps for proxy and credential.\n\"\"\"\n\n# oracle connection\n# please update with your username, password, hostname, and service_name\nusername = \"testuser\"\npassword = \"testuser\"\ndsn = \"<hostname/service_name>\"\n\ntry:\n conn = oracledb.connect(user=username, password=password, dsn=dsn)\n print(\"Connection successful!\")\nexcept Exception as e:\n print(\"Connection failed!\")\n sys.exit(1)\n\n\n# load onnx model\n# please update with your related information\nonnx_dir = \"DEMO_PY_DIR\"\nonnx_file = \"tinybert.onnx\"\nmodel_name = \"demo_model\"\ntry:\n OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)\n print(\"ONNX model loaded.\")\nexcept Exception as e:\n print(\"ONNX model loading failed!\")\n sys.exit(1)\n\n\n# params\n# please update necessary fields with related information\nloader_params = {\n \"owner\": \"testuser\",\n \"tablename\": \"demo_tab\",\n \"colname\": \"data\",\n}\nsummary_params = {\n \"provider\": \"database\",\n \"glevel\": \"S\",\n \"numParagraphs\": 1,\n \"language\": \"english\",\n}\nsplitter_params = {\"normalize\": \"all\"}\nembedder_params = {\"provider\": \"database\", \"model\": \"demo_model\"}\n\n# instantiate loader, summary, splitter, and embedder\nloader = OracleReader(conn=conn, params=loader_params)\nsummary = OracleSummary(conn=conn, params=summary_params)\nsplitter = OracleTextSplitter(conn=conn, params=splitter_params)\nembedder = OracleEmbeddings(conn=conn, params=embedder_params)\n\n# process the documents\nloader = OracleReader(conn=conn, params=loader_params)\ndocs = loader.load()\n\nchunks_with_mdata = []\nfor id, doc in enumerate(docs, start=1):\n summ = summary.get_summary(doc.text)\n chunks = splitter.split_text(doc.text)\n for ic, chunk in enumerate(chunks, start=1):\n chunk_metadata = doc.metadata.copy()\n chunk_metadata[\"id\"] = (\n chunk_metadata[\"_oid\"] + \"$\" + str(id) + \"$\" + str(ic)\n )\n chunk_metadata[\"document_id\"] = str(id)\n chunk_metadata[\"document_summary\"] = str(summ[0])\n textnode = TextNode(\n text=chunk,\n id_=chunk_metadata[\"id\"],\n embedding=embedder._get_text_embedding(chunk),\n metadata=chunk_metadata,\n )\n chunks_with_mdata.append(textnode)\n\n\"\"\" verify \"\"\"\nprint(f\"Number of total chunks with metadata: {len(chunks_with_mdata)}\")\n\n\n# create Oracle AI Vector Store\nvectorstore = OraLlamaVS.from_documents(\n client=conn,\n docs=chunks_with_mdata,\n table_name=\"oravs\",\n distance_strategy=DistanceStrategy.DOT_PRODUCT,\n)\n\n\"\"\" verify \"\"\"\nprint(f\"Vector Store Table: {vectorstore.table_name}\")\n\n# Create Index\norallamavs.create_index(\n conn, vectorstore, params={\"idx_name\": \"hnsw_oravs\", \"idx_type\": \"HNSW\"}\n)\n\nprint(\"Index created.\")\n\n\n# Perform Semantic Search\nquery = \"What is Oracle AI Vector Store?\"\nfilter = {\"document_id\": [\"1\"]}\n\n# Similarity search without a filter\nprint(vectorstore.similarity_search(query, 1))\n\n# Similarity search with a filter\nprint(vectorstore.similarity_search(query, 1, filter=filter))\n\n# Similarity search with relevance score\nprint(vectorstore.similarity_search_with_score(query, 1))\n\n# Similarity search with relevance score with filter\nprint(vectorstore.similarity_search_with_score(query, 1, filter=filter))\n\n# Max marginal relevance search\nprint(\n vectorstore.max_marginal_relevance_search(\n query, 1, fetch_k=20, lambda_mult=0.5\n )\n)\n\n# Max marginal relevance search with filter\nprint(\n vectorstore.max_marginal_relevance_search(\n query, 1, fetch_k=20, lambda_mult=0.5, filter=filter\n )\n)\n```\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "llama-index vector_stores oracle database integration",
"version": "0.1.4",
"project_urls": null,
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "86746c0b96b8a2f6306e6abc419118d4bf8308aae931d846db017785061b3c5d",
"md5": "a3356477a5db7f3923c1ac746ecee804",
"sha256": "acc2678f9d8f4aa01f9e966349044d67d0c1ca1d226f4adbe21fddf771e038b6"
},
"downloads": -1,
"filename": "llama_index_vector_stores_oracledb-0.1.4-py3-none-any.whl",
"has_sig": false,
"md5_digest": "a3356477a5db7f3923c1ac746ecee804",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<3.13,>=3.8.1",
"size": 9205,
"upload_time": "2024-10-15T01:55:48",
"upload_time_iso_8601": "2024-10-15T01:55:48.886549Z",
"url": "https://files.pythonhosted.org/packages/86/74/6c0b96b8a2f6306e6abc419118d4bf8308aae931d846db017785061b3c5d/llama_index_vector_stores_oracledb-0.1.4-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "6f3b70e4ed6d0f9a78d5439ad9cb8395d82bc7790bfc9d128abdfba4381402d3",
"md5": "0b3e2cba2dc089c173c4199f0aeff2c1",
"sha256": "5aef9d5c6a5778d9f47386070b50abe17a0fcc9c0f2db017055ce4589eaa463e"
},
"downloads": -1,
"filename": "llama_index_vector_stores_oracledb-0.1.4.tar.gz",
"has_sig": false,
"md5_digest": "0b3e2cba2dc089c173c4199f0aeff2c1",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<3.13,>=3.8.1",
"size": 10078,
"upload_time": "2024-10-15T01:55:49",
"upload_time_iso_8601": "2024-10-15T01:55:49.994380Z",
"url": "https://files.pythonhosted.org/packages/6f/3b/70e4ed6d0f9a78d5439ad9cb8395d82bc7790bfc9d128abdfba4381402d3/llama_index_vector_stores_oracledb-0.1.4.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-10-15 01:55:49",
"github": false,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"lcname": "llama-index-vector-stores-oracledb"
}