anchor-gpt

Name	anchor-gpt JSON
Version	0.0.2 JSON
	download
home_page	https://github.com/dioptra-ai/anchor-gpt
Summary	Diagnose and find hallucinations in your grounded Large Language Model prompts with Anchor-GPT!
upload_time	2023-06-24 02:13:01
maintainer
docs_url	None
author	dioptra.ai
requires_python	>=3.5.3
license	BSD
keywords	dioptra anchor gpt hallucinations grounding llm
VCS
bugtrack_url
requirements	No requirements were recorded.
Travis-CI	No Travis.
coveralls test coverage	No coveralls.

            <p align="center">
    <b>Find hallucination prone prompts and use them to fine-tune / ground your LLM</b>
</p>

<p align="center">
    <img src='https://img.shields.io/pypi/pyversions/anchor-gpt'</img>
    <a href="https://pypi.org/project/anchor-gpt/"><img alt="PyPI" src="https://img.shields.io/pypi/v/anchor-gpt"></a>
    <img src='https://img.shields.io/github/license/dioptra-ai/anchor-gpt'></img></a>
</p>


## Why Anchor GPT ?

Because you can't get groundtruth answers for every prompt and fine-tuning / grounding with the right data gives much better results. We compared side by side fine-tuning with prompts sampled randomly and with CoreSet (the core algo of anchor-gpt) and the results speak for themselves 👇
<p align="center">
    <img src="./img/accuracy_results.png" width="400"/>
    <br>
    <i>Accuracy on a sample of the MMLU test dataset of a fine tuned LLama with 1000 datapoints sampled from the Alpaca dataset using either Random sampling or CoreSet</i>
</p>

## Installation

```bash
pip install anchor-gpt
```

## Step by Step
1. Use prompt logger to log your prompts and their grouding scores
```python
from anchor_gpt import PromptLogger, Prompt

# Your regular grounding process
prompt_embeddings = embedding_model.encode(prompt)
index_response = my_index_endpoint.find_neighbors(
    queries=prompt_embeddings,
    num_neighbors=10,
)

grounding_data = []
grounding_distances = []
for grounding_index, grounding_distance in index_response:
    grounding_data.append(my_index_endpoint.get(grounding_index))
    grounding_distances.append(grounding_distance)

grounded_prompt = build_prompt(prompt, grounding_data)

# Call your LLM
chat_response = my_llm.chat(grounded_prompt, temperature=0.1)


# Log the prompt
prompt_logger = PromptLogger()
my_prompt = prompt_logger.log(Prompt(
    text=prompt,
    response=chat_response,
    scores={'grounding_distances': grounding_distances},
    embeddings=prompt_embeddings,
))
```

2. Add additional scores like user feedback asynchronously
```python
my_prompt.update_scores({'user_feedback': 0.8})
```

3. Retreive the worst performing prompts to fine-tune your model or improve your grounding database

```python
# Define a custom prompt scoring method
def retriever(store, threshold):
    def prompt_average_score(prompt):
        return 0.2 * prompt.scores['grounding_distances'][0] + 0.8 * prompt.scores['user_feedback']

    return list(filter(lambda x: prompt_average_score(x) > threshold, store.select_prompts()))

# Retreive the ones above a threshold
worst_prompts = prompt_logger.retrieve(retriever, 0.5)
# Remove near duplicates to only keep what matters
deduped_prompts = prompt_logger.deduplicate(worst_prompts, 100)

# Add the right answers to your grounding DB to better answer those prompts next time
```

## Example in a chat service

```python
from anchor_gpt import PromptLogger, Prompt

prompt_logger = PromptLogger()

# Your regular chat endpoint with logging enabeled
@app.route("/chat", methods=["POST"])
def chat():
    # Do your grounding as normal:
    prompt_embeddings = model.encode(request.json["prompt"])
    vector_store_results = vector_store.query(prompt_embeddings, top_k=10)

    grounded_prompt = build_prompt(prompt, vector_store_results)
    chat_response = my_llm.chat(grounded_prompt, temperature=0.1)

    # Then log the prompt with the response, scores and embeddings.
    # Prompts are stored locally in a SQLite database.
    prompt_logger.log(Prompt(
        text=request.json["prompt"],
        response=chat_response,
        scores={'grounding_distances': [r.distance for r in vector_store_results]},
        embeddings=prompt_embeddings,
    ))

    return chat_response

# A new hallucination retreival endpoint to get the worst prompts from your LLM
@app.route("/hallucinations", methods=["GET"])
def hallucinations():
    def retriever(store, threshold):
        def prompt_average_score(prompt):
                return prompt.scores['grounding_distances'][0]
            return list(filter(lambda x: prompt_average_score(x) > threshold, store.select_prompts()))

    # Retrieve a list of the prompts with the greated distance from your grounding data
    worst_prompts = prompt_logger.retrieve_n(0.5)

    # Remove near duplicates and only keep 10 prompts
    deduped_prompts = prompt_logger.deduplicate(worst_prompts, 10)

    # Clean up the store
    prompt_logger.store.purge()

    return jsonify([{'text': p.text, 'response': p.response} for p in deduped_prompts])
```

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/dioptra-ai/anchor-gpt",
    "name": "anchor-gpt",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">=3.5.3",
    "maintainer_email": "",
    "keywords": "dioptra,anchor,gpt,hallucinations,grounding,llm",
    "author": "dioptra.ai",
    "author_email": "info@dioptra.ai",
    "download_url": "https://files.pythonhosted.org/packages/87/7e/8058d65279d94a0dc15a835e7bafd5c4007708b7628a5686782d2bd71068/anchor-gpt-0.0.2.tar.gz",
    "platform": null,
    "description": "<p align=\"center\">\n    <b>Find hallucination prone prompts and use them to fine-tune / ground your LLM</b>\n</p>\n\n<p align=\"center\">\n    <img src='https://img.shields.io/pypi/pyversions/anchor-gpt'</img>\n    <a href=\"https://pypi.org/project/anchor-gpt/\"><img alt=\"PyPI\" src=\"https://img.shields.io/pypi/v/anchor-gpt\"></a>\n    <img src='https://img.shields.io/github/license/dioptra-ai/anchor-gpt'></img></a>\n</p>\n\n\n## Why Anchor GPT ?\n\nBecause you can't get groundtruth answers for every prompt and fine-tuning / grounding with the right data gives much better results. We compared side by side fine-tuning with prompts sampled randomly and with CoreSet (the core algo of anchor-gpt) and the results speak for themselves \ud83d\udc47\n<p align=\"center\">\n    <img src=\"./img/accuracy_results.png\" width=\"400\"/>\n    <br>\n    <i>Accuracy on a sample of the MMLU test dataset of a fine tuned LLama with 1000 datapoints sampled from the Alpaca dataset using either Random sampling or CoreSet</i>\n</p>\n\n## Installation\n\n```bash\npip install anchor-gpt\n```\n\n## Step by Step\n1. Use prompt logger to log your prompts and their grouding scores\n```python\nfrom anchor_gpt import PromptLogger, Prompt\n\n# Your regular grounding process\nprompt_embeddings = embedding_model.encode(prompt)\nindex_response = my_index_endpoint.find_neighbors(\n    queries=prompt_embeddings,\n    num_neighbors=10,\n)\n\ngrounding_data = []\ngrounding_distances = []\nfor grounding_index, grounding_distance in index_response:\n    grounding_data.append(my_index_endpoint.get(grounding_index))\n    grounding_distances.append(grounding_distance)\n\ngrounded_prompt = build_prompt(prompt, grounding_data)\n\n# Call your LLM\nchat_response = my_llm.chat(grounded_prompt, temperature=0.1)\n\n\n# Log the prompt\nprompt_logger = PromptLogger()\nmy_prompt = prompt_logger.log(Prompt(\n    text=prompt,\n    response=chat_response,\n    scores={'grounding_distances': grounding_distances},\n    embeddings=prompt_embeddings,\n))\n```\n\n2. Add additional scores like user feedback asynchronously\n```python\nmy_prompt.update_scores({'user_feedback': 0.8})\n```\n\n3. Retreive the worst performing prompts to fine-tune your model or improve your grounding database\n\n```python\n# Define a custom prompt scoring method\ndef retriever(store, threshold):\n    def prompt_average_score(prompt):\n        return 0.2 * prompt.scores['grounding_distances'][0] + 0.8 * prompt.scores['user_feedback']\n\n    return list(filter(lambda x: prompt_average_score(x) > threshold, store.select_prompts()))\n\n# Retreive the ones above a threshold\nworst_prompts = prompt_logger.retrieve(retriever, 0.5)\n# Remove near duplicates to only keep what matters\ndeduped_prompts = prompt_logger.deduplicate(worst_prompts, 100)\n\n# Add the right answers to your grounding DB to better answer those prompts next time\n```\n\n## Example in a chat service\n\n```python\nfrom anchor_gpt import PromptLogger, Prompt\n\nprompt_logger = PromptLogger()\n\n# Your regular chat endpoint with logging enabeled\n@app.route(\"/chat\", methods=[\"POST\"])\ndef chat():\n    # Do your grounding as normal:\n    prompt_embeddings = model.encode(request.json[\"prompt\"])\n    vector_store_results = vector_store.query(prompt_embeddings, top_k=10)\n\n    grounded_prompt = build_prompt(prompt, vector_store_results)\n    chat_response = my_llm.chat(grounded_prompt, temperature=0.1)\n\n    # Then log the prompt with the response, scores and embeddings.\n    # Prompts are stored locally in a SQLite database.\n    prompt_logger.log(Prompt(\n        text=request.json[\"prompt\"],\n        response=chat_response,\n        scores={'grounding_distances': [r.distance for r in vector_store_results]},\n        embeddings=prompt_embeddings,\n    ))\n\n    return chat_response\n\n# A new hallucination retreival endpoint to get the worst prompts from your LLM\n@app.route(\"/hallucinations\", methods=[\"GET\"])\ndef hallucinations():\n    def retriever(store, threshold):\n        def prompt_average_score(prompt):\n                return prompt.scores['grounding_distances'][0]\n            return list(filter(lambda x: prompt_average_score(x) > threshold, store.select_prompts()))\n\n    # Retrieve a list of the prompts with the greated distance from your grounding data\n    worst_prompts = prompt_logger.retrieve_n(0.5)\n\n    # Remove near duplicates and only keep 10 prompts\n    deduped_prompts = prompt_logger.deduplicate(worst_prompts, 10)\n\n    # Clean up the store\n    prompt_logger.store.purge()\n\n    return jsonify([{'text': p.text, 'response': p.response} for p in deduped_prompts])\n```\n\n",
    "bugtrack_url": null,
    "license": "BSD",
    "summary": "Diagnose and find hallucinations in your grounded Large Language Model prompts with Anchor-GPT!",
    "version": "0.0.2",
    "project_urls": {
        "Homepage": "https://github.com/dioptra-ai/anchor-gpt",
        "dioptra.ai": "https://www.dioptra.ai"
    },
    "split_keywords": [
        "dioptra",
        "anchor",
        "gpt",
        "hallucinations",
        "grounding",
        "llm"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ec2ae96f3b9f9a5b8a95c25b377bea4b339956c03a113605cf2cf1493e430184",
                "md5": "6c70c9223d07de5ca0fafac629b55eec",
                "sha256": "768ad210273cda2d693335ebd6c6cf525842a9a3ac0cf1bbce581ecfbfd32cfc"
            },
            "downloads": -1,
            "filename": "anchor_gpt-0.0.2-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "6c70c9223d07de5ca0fafac629b55eec",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.5.3",
            "size": 12554,
            "upload_time": "2023-06-24T02:12:59",
            "upload_time_iso_8601": "2023-06-24T02:12:59.522503Z",
            "url": "https://files.pythonhosted.org/packages/ec/2a/e96f3b9f9a5b8a95c25b377bea4b339956c03a113605cf2cf1493e430184/anchor_gpt-0.0.2-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "877e8058d65279d94a0dc15a835e7bafd5c4007708b7628a5686782d2bd71068",
                "md5": "609e2a2f017ec99986c6c46bcf60433c",
                "sha256": "2fe96f31246529311c4698743a5c536a4ca9a9613f0271556448874e584a0d89"
            },
            "downloads": -1,
            "filename": "anchor-gpt-0.0.2.tar.gz",
            "has_sig": false,
            "md5_digest": "609e2a2f017ec99986c6c46bcf60433c",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.5.3",
            "size": 13303,
            "upload_time": "2023-06-24T02:13:01",
            "upload_time_iso_8601": "2023-06-24T02:13:01.119965Z",
            "url": "https://files.pythonhosted.org/packages/87/7e/8058d65279d94a0dc15a835e7bafd5c4007708b7628a5686782d2bd71068/anchor-gpt-0.0.2.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-06-24 02:13:01",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "dioptra-ai",
    "github_project": "anchor-gpt",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "circle": true,
    "requirements": [],
    "lcname": "anchor-gpt"
}

dioptra.ai