text-generation

Name	text-generation JSON
Version	0.7.0 JSON
	download
home_page	https://github.com/huggingface/text-generation-inference
Summary	Hugging Face Text Generation Python Client
upload_time	2024-03-22 16:09:22
maintainer	Olivier Dehaene
docs_url	None
author	Olivier Dehaene
requires_python	<4.0,>=3.7
license	Apache-2.0
keywords
VCS
bugtrack_url
requirements	No requirements were recorded.
Travis-CI	No Travis.
coveralls test coverage	No coveralls.

            # Text Generation

The Hugging Face Text Generation Python library provides a convenient way of interfacing with a
`text-generation-inference` instance running on
[Hugging Face Inference Endpoints](https://huggingface.co/inference-endpoints) or on the Hugging Face Hub.

## Get Started

### Install

```shell
pip install text-generation
```

### Inference API Usage

```python
from text_generation import InferenceAPIClient

client = InferenceAPIClient("bigscience/bloomz")
text = client.generate("Why is the sky blue?").generated_text
print(text)
# ' Rayleigh scattering'

# Token Streaming
text = ""
for response in client.generate_stream("Why is the sky blue?"):
    if not response.token.special:
        text += response.token.text

print(text)
# ' Rayleigh scattering'
```

or with the asynchronous client:

```python
from text_generation import InferenceAPIAsyncClient

client = InferenceAPIAsyncClient("bigscience/bloomz")
response = await client.generate("Why is the sky blue?")
print(response.generated_text)
# ' Rayleigh scattering'

# Token Streaming
text = ""
async for response in client.generate_stream("Why is the sky blue?"):
    if not response.token.special:
        text += response.token.text

print(text)
# ' Rayleigh scattering'
```

Check all currently deployed models on the Huggingface Inference API with `Text Generation` support:

```python
from text_generation.inference_api import deployed_models

print(deployed_models())
```

### Hugging Face Inference Endpoint usage

```python
from text_generation import Client

endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"

client = Client(endpoint_url)
text = client.generate("Why is the sky blue?").generated_text
print(text)
# ' Rayleigh scattering'

# Token Streaming
text = ""
for response in client.generate_stream("Why is the sky blue?"):
    if not response.token.special:
        text += response.token.text

print(text)
# ' Rayleigh scattering'
```

or with the asynchronous client:

```python
from text_generation import AsyncClient

endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"

client = AsyncClient(endpoint_url)
response = await client.generate("Why is the sky blue?")
print(response.generated_text)
# ' Rayleigh scattering'

# Token Streaming
text = ""
async for response in client.generate_stream("Why is the sky blue?"):
    if not response.token.special:
        text += response.token.text

print(text)
# ' Rayleigh scattering'
```

### Types

```python
# enum for grammar type
class GrammarType(Enum):
    Json = "json"
    Regex = "regex"


# Grammar type and value
class Grammar:
    # Grammar type
    type: GrammarType
    # Grammar value
    value: Union[str, dict]

class Parameters:
    # Activate logits sampling
    do_sample: bool
    # Maximum number of generated tokens
    max_new_tokens: int
    # The parameter for repetition penalty. 1.0 means no penalty.
    # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
    repetition_penalty: Optional[float]
    # The parameter for frequency penalty. 1.0 means no penalty
    # Penalize new tokens based on their existing frequency in the text so far,
    # decreasing the model's likelihood to repeat the same line verbatim.
    frequency_penalty: Optional[float]
    # Whether to prepend the prompt to the generated text
    return_full_text: bool
    # Stop generating tokens if a member of `stop_sequences` is generated
    stop: List[str]
    # Random sampling seed
    seed: Optional[int]
    # The value used to module the logits distribution.
    temperature: Optional[float]
    # The number of highest probability vocabulary tokens to keep for top-k-filtering.
    top_k: Optional[int]
    # If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
    # higher are kept for generation.
    top_p: Optional[float]
    # truncate inputs tokens to the given size
    truncate: Optional[int]
    # Typical Decoding mass
    # See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
    typical_p: Optional[float]
    # Generate best_of sequences and return the one if the highest token logprobs
    best_of: Optional[int]
    # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
    watermark: bool
    # Get generation details
    details: bool
    # Get decoder input token logprobs and ids
    decoder_input_details: bool
    # Return the N most likely tokens at each step
    top_n_tokens: Optional[int]
    # grammar to use for generation
    grammar: Optional[Grammar]

class Request:
    # Prompt
    inputs: str
    # Generation parameters
    parameters: Optional[Parameters]
    # Whether to stream output tokens
    stream: bool

# Decoder input tokens
class InputToken:
    # Token ID from the model tokenizer
    id: int
    # Token text
    text: str
    # Logprob
    # Optional since the logprob of the first token cannot be computed
    logprob: Optional[float]


# Generated tokens
class Token:
    # Token ID from the model tokenizer
    id: int
    # Token text
    text: str
    # Logprob
    logprob: Optional[float]
    # Is the token a special token
    # Can be used to ignore tokens when concatenating
    special: bool


# Generation finish reason
class FinishReason(Enum):
    # number of generated tokens == `max_new_tokens`
    Length = "length"
    # the model generated its end of sequence token
    EndOfSequenceToken = "eos_token"
    # the model generated a text included in `stop_sequences`
    StopSequence = "stop_sequence"


# Additional sequences when using the `best_of` parameter
class BestOfSequence:
    # Generated text
    generated_text: str
    # Generation finish reason
    finish_reason: FinishReason
    # Number of generated tokens
    generated_tokens: int
    # Sampling seed if sampling was activated
    seed: Optional[int]
    # Decoder input tokens, empty if decoder_input_details is False
    prefill: List[InputToken]
    # Generated tokens
    tokens: List[Token]
    # Most likely tokens
    top_tokens: Optional[List[List[Token]]]


# `generate` details
class Details:
    # Generation finish reason
    finish_reason: FinishReason
    # Number of generated tokens
    generated_tokens: int
    # Sampling seed if sampling was activated
    seed: Optional[int]
    # Decoder input tokens, empty if decoder_input_details is False
    prefill: List[InputToken]
    # Generated tokens
    tokens: List[Token]
    # Most likely tokens
    top_tokens: Optional[List[List[Token]]]
    # Additional sequences when using the `best_of` parameter
    best_of_sequences: Optional[List[BestOfSequence]]


# `generate` return value
class Response:
    # Generated text
    generated_text: str
    # Generation details
    details: Details


# `generate_stream` details
class StreamDetails:
    # Generation finish reason
    finish_reason: FinishReason
    # Number of generated tokens
    generated_tokens: int
    # Sampling seed if sampling was activated
    seed: Optional[int]


# `generate_stream` return value
class StreamResponse:
    # Generated token
    token: Token
    # Most likely tokens
    top_tokens: Optional[List[Token]]
    # Complete generated text
    # Only available when the generation is finished
    generated_text: Optional[str]
    # Generation details
    # Only available when the generation is finished
    details: Optional[StreamDetails]

# Inference API currently deployed model
class DeployedModel:
    model_id: str
    sha: str
```

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/huggingface/text-generation-inference",
    "name": "text-generation",
    "maintainer": "Olivier Dehaene",
    "docs_url": null,
    "requires_python": "<4.0,>=3.7",
    "maintainer_email": "olivier@huggingface.co",
    "keywords": null,
    "author": "Olivier Dehaene",
    "author_email": "olivier@huggingface.co",
    "download_url": "https://files.pythonhosted.org/packages/ef/53/1b2dc20686079464ae381f230a9fc412984a4255cea73c21afb6a46bc21f/text_generation-0.7.0.tar.gz",
    "platform": null,
    "description": "# Text Generation\n\nThe Hugging Face Text Generation Python library provides a convenient way of interfacing with a\n`text-generation-inference` instance running on\n[Hugging Face Inference Endpoints](https://huggingface.co/inference-endpoints) or on the Hugging Face Hub.\n\n## Get Started\n\n### Install\n\n```shell\npip install text-generation\n```\n\n### Inference API Usage\n\n```python\nfrom text_generation import InferenceAPIClient\n\nclient = InferenceAPIClient(\"bigscience/bloomz\")\ntext = client.generate(\"Why is the sky blue?\").generated_text\nprint(text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nfor response in client.generate_stream(\"Why is the sky blue?\"):\n    if not response.token.special:\n        text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\nor with the asynchronous client:\n\n```python\nfrom text_generation import InferenceAPIAsyncClient\n\nclient = InferenceAPIAsyncClient(\"bigscience/bloomz\")\nresponse = await client.generate(\"Why is the sky blue?\")\nprint(response.generated_text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nasync for response in client.generate_stream(\"Why is the sky blue?\"):\n    if not response.token.special:\n        text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\nCheck all currently deployed models on the Huggingface Inference API with `Text Generation` support:\n\n```python\nfrom text_generation.inference_api import deployed_models\n\nprint(deployed_models())\n```\n\n### Hugging Face Inference Endpoint usage\n\n```python\nfrom text_generation import Client\n\nendpoint_url = \"https://YOUR_ENDPOINT.endpoints.huggingface.cloud\"\n\nclient = Client(endpoint_url)\ntext = client.generate(\"Why is the sky blue?\").generated_text\nprint(text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nfor response in client.generate_stream(\"Why is the sky blue?\"):\n    if not response.token.special:\n        text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\nor with the asynchronous client:\n\n```python\nfrom text_generation import AsyncClient\n\nendpoint_url = \"https://YOUR_ENDPOINT.endpoints.huggingface.cloud\"\n\nclient = AsyncClient(endpoint_url)\nresponse = await client.generate(\"Why is the sky blue?\")\nprint(response.generated_text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nasync for response in client.generate_stream(\"Why is the sky blue?\"):\n    if not response.token.special:\n        text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\n### Types\n\n```python\n# enum for grammar type\nclass GrammarType(Enum):\n    Json = \"json\"\n    Regex = \"regex\"\n\n\n# Grammar type and value\nclass Grammar:\n    # Grammar type\n    type: GrammarType\n    # Grammar value\n    value: Union[str, dict]\n\nclass Parameters:\n    # Activate logits sampling\n    do_sample: bool\n    # Maximum number of generated tokens\n    max_new_tokens: int\n    # The parameter for repetition penalty. 1.0 means no penalty.\n    # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.\n    repetition_penalty: Optional[float]\n    # The parameter for frequency penalty. 1.0 means no penalty\n    # Penalize new tokens based on their existing frequency in the text so far,\n    # decreasing the model's likelihood to repeat the same line verbatim.\n    frequency_penalty: Optional[float]\n    # Whether to prepend the prompt to the generated text\n    return_full_text: bool\n    # Stop generating tokens if a member of `stop_sequences` is generated\n    stop: List[str]\n    # Random sampling seed\n    seed: Optional[int]\n    # The value used to module the logits distribution.\n    temperature: Optional[float]\n    # The number of highest probability vocabulary tokens to keep for top-k-filtering.\n    top_k: Optional[int]\n    # If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or\n    # higher are kept for generation.\n    top_p: Optional[float]\n    # truncate inputs tokens to the given size\n    truncate: Optional[int]\n    # Typical Decoding mass\n    # See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information\n    typical_p: Optional[float]\n    # Generate best_of sequences and return the one if the highest token logprobs\n    best_of: Optional[int]\n    # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)\n    watermark: bool\n    # Get generation details\n    details: bool\n    # Get decoder input token logprobs and ids\n    decoder_input_details: bool\n    # Return the N most likely tokens at each step\n    top_n_tokens: Optional[int]\n    # grammar to use for generation\n    grammar: Optional[Grammar]\n\nclass Request:\n    # Prompt\n    inputs: str\n    # Generation parameters\n    parameters: Optional[Parameters]\n    # Whether to stream output tokens\n    stream: bool\n\n# Decoder input tokens\nclass InputToken:\n    # Token ID from the model tokenizer\n    id: int\n    # Token text\n    text: str\n    # Logprob\n    # Optional since the logprob of the first token cannot be computed\n    logprob: Optional[float]\n\n\n# Generated tokens\nclass Token:\n    # Token ID from the model tokenizer\n    id: int\n    # Token text\n    text: str\n    # Logprob\n    logprob: Optional[float]\n    # Is the token a special token\n    # Can be used to ignore tokens when concatenating\n    special: bool\n\n\n# Generation finish reason\nclass FinishReason(Enum):\n    # number of generated tokens == `max_new_tokens`\n    Length = \"length\"\n    # the model generated its end of sequence token\n    EndOfSequenceToken = \"eos_token\"\n    # the model generated a text included in `stop_sequences`\n    StopSequence = \"stop_sequence\"\n\n\n# Additional sequences when using the `best_of` parameter\nclass BestOfSequence:\n    # Generated text\n    generated_text: str\n    # Generation finish reason\n    finish_reason: FinishReason\n    # Number of generated tokens\n    generated_tokens: int\n    # Sampling seed if sampling was activated\n    seed: Optional[int]\n    # Decoder input tokens, empty if decoder_input_details is False\n    prefill: List[InputToken]\n    # Generated tokens\n    tokens: List[Token]\n    # Most likely tokens\n    top_tokens: Optional[List[List[Token]]]\n\n\n# `generate` details\nclass Details:\n    # Generation finish reason\n    finish_reason: FinishReason\n    # Number of generated tokens\n    generated_tokens: int\n    # Sampling seed if sampling was activated\n    seed: Optional[int]\n    # Decoder input tokens, empty if decoder_input_details is False\n    prefill: List[InputToken]\n    # Generated tokens\n    tokens: List[Token]\n    # Most likely tokens\n    top_tokens: Optional[List[List[Token]]]\n    # Additional sequences when using the `best_of` parameter\n    best_of_sequences: Optional[List[BestOfSequence]]\n\n\n# `generate` return value\nclass Response:\n    # Generated text\n    generated_text: str\n    # Generation details\n    details: Details\n\n\n# `generate_stream` details\nclass StreamDetails:\n    # Generation finish reason\n    finish_reason: FinishReason\n    # Number of generated tokens\n    generated_tokens: int\n    # Sampling seed if sampling was activated\n    seed: Optional[int]\n\n\n# `generate_stream` return value\nclass StreamResponse:\n    # Generated token\n    token: Token\n    # Most likely tokens\n    top_tokens: Optional[List[Token]]\n    # Complete generated text\n    # Only available when the generation is finished\n    generated_text: Optional[str]\n    # Generation details\n    # Only available when the generation is finished\n    details: Optional[StreamDetails]\n\n# Inference API currently deployed model\nclass DeployedModel:\n    model_id: str\n    sha: str\n```\n",
    "bugtrack_url": null,
    "license": "Apache-2.0",
    "summary": "Hugging Face Text Generation Python Client",
    "version": "0.7.0",
    "project_urls": {
        "Homepage": "https://github.com/huggingface/text-generation-inference",
        "Repository": "https://github.com/huggingface/text-generation-inference"
    },
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "7b798fc351fd919a41287243c998a47692c7eb0fa5acded13db0080f2c6f1852",
                "md5": "0327304b45805af6867a8e98c180431b",
                "sha256": "02ab337a0ee0e7c70e04a607b311c261caae74bde46a7d837c6fdd150108f4d8"
            },
            "downloads": -1,
            "filename": "text_generation-0.7.0-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "0327304b45805af6867a8e98c180431b",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": "<4.0,>=3.7",
            "size": 12718,
            "upload_time": "2024-03-22T16:09:20",
            "upload_time_iso_8601": "2024-03-22T16:09:20.874987Z",
            "url": "https://files.pythonhosted.org/packages/7b/79/8fc351fd919a41287243c998a47692c7eb0fa5acded13db0080f2c6f1852/text_generation-0.7.0-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ef531b2dc20686079464ae381f230a9fc412984a4255cea73c21afb6a46bc21f",
                "md5": "378279c683e158dbf86bb07daba2a34c",
                "sha256": "689200cd1f0d4141562af2515393c2c21cdbd9fac21c8398bf3043cdcc14184e"
            },
            "downloads": -1,
            "filename": "text_generation-0.7.0.tar.gz",
            "has_sig": false,
            "md5_digest": "378279c683e158dbf86bb07daba2a34c",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": "<4.0,>=3.7",
            "size": 10373,
            "upload_time": "2024-03-22T16:09:22",
            "upload_time_iso_8601": "2024-03-22T16:09:22.167381Z",
            "url": "https://files.pythonhosted.org/packages/ef/53/1b2dc20686079464ae381f230a9fc412984a4255cea73c21afb6a46bc21f/text_generation-0.7.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-03-22 16:09:22",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "huggingface",
    "github_project": "text-generation-inference",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": true,
    "lcname": "text-generation"
}

Olivier Dehaene