# Text Generation
The Hugging Face Text Generation Python library provides a convenient way of interfacing with a
`text-generation-inference` instance running on
[Hugging Face Inference Endpoints](https://huggingface.co/inference-endpoints) or on the Hugging Face Hub.
## Get Started
### Install
```shell
pip install text-generation
```
### Inference API Usage
```python
from text_generation import InferenceAPIClient
client = InferenceAPIClient("bigscience/bloomz")
text = client.generate("Why is the sky blue?").generated_text
print(text)
# ' Rayleigh scattering'
# Token Streaming
text = ""
for response in client.generate_stream("Why is the sky blue?"):
if not response.token.special:
text += response.token.text
print(text)
# ' Rayleigh scattering'
```
or with the asynchronous client:
```python
from text_generation import InferenceAPIAsyncClient
client = InferenceAPIAsyncClient("bigscience/bloomz")
response = await client.generate("Why is the sky blue?")
print(response.generated_text)
# ' Rayleigh scattering'
# Token Streaming
text = ""
async for response in client.generate_stream("Why is the sky blue?"):
if not response.token.special:
text += response.token.text
print(text)
# ' Rayleigh scattering'
```
Check all currently deployed models on the Huggingface Inference API with `Text Generation` support:
```python
from text_generation.inference_api import deployed_models
print(deployed_models())
```
### Hugging Face Inference Endpoint usage
```python
from text_generation import Client
endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"
client = Client(endpoint_url)
text = client.generate("Why is the sky blue?").generated_text
print(text)
# ' Rayleigh scattering'
# Token Streaming
text = ""
for response in client.generate_stream("Why is the sky blue?"):
if not response.token.special:
text += response.token.text
print(text)
# ' Rayleigh scattering'
```
or with the asynchronous client:
```python
from text_generation import AsyncClient
endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"
client = AsyncClient(endpoint_url)
response = await client.generate("Why is the sky blue?")
print(response.generated_text)
# ' Rayleigh scattering'
# Token Streaming
text = ""
async for response in client.generate_stream("Why is the sky blue?"):
if not response.token.special:
text += response.token.text
print(text)
# ' Rayleigh scattering'
```
### Types
```python
# enum for grammar type
class GrammarType(Enum):
Json = "json"
Regex = "regex"
# Grammar type and value
class Grammar:
# Grammar type
type: GrammarType
# Grammar value
value: Union[str, dict]
class Parameters:
# Activate logits sampling
do_sample: bool
# Maximum number of generated tokens
max_new_tokens: int
# The parameter for repetition penalty. 1.0 means no penalty.
# See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
repetition_penalty: Optional[float]
# The parameter for frequency penalty. 1.0 means no penalty
# Penalize new tokens based on their existing frequency in the text so far,
# decreasing the model's likelihood to repeat the same line verbatim.
frequency_penalty: Optional[float]
# Whether to prepend the prompt to the generated text
return_full_text: bool
# Stop generating tokens if a member of `stop_sequences` is generated
stop: List[str]
# Random sampling seed
seed: Optional[int]
# The value used to module the logits distribution.
temperature: Optional[float]
# The number of highest probability vocabulary tokens to keep for top-k-filtering.
top_k: Optional[int]
# If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
# higher are kept for generation.
top_p: Optional[float]
# truncate inputs tokens to the given size
truncate: Optional[int]
# Typical Decoding mass
# See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
typical_p: Optional[float]
# Generate best_of sequences and return the one if the highest token logprobs
best_of: Optional[int]
# Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
watermark: bool
# Get generation details
details: bool
# Get decoder input token logprobs and ids
decoder_input_details: bool
# Return the N most likely tokens at each step
top_n_tokens: Optional[int]
# grammar to use for generation
grammar: Optional[Grammar]
class Request:
# Prompt
inputs: str
# Generation parameters
parameters: Optional[Parameters]
# Whether to stream output tokens
stream: bool
# Decoder input tokens
class InputToken:
# Token ID from the model tokenizer
id: int
# Token text
text: str
# Logprob
# Optional since the logprob of the first token cannot be computed
logprob: Optional[float]
# Generated tokens
class Token:
# Token ID from the model tokenizer
id: int
# Token text
text: str
# Logprob
logprob: Optional[float]
# Is the token a special token
# Can be used to ignore tokens when concatenating
special: bool
# Generation finish reason
class FinishReason(Enum):
# number of generated tokens == `max_new_tokens`
Length = "length"
# the model generated its end of sequence token
EndOfSequenceToken = "eos_token"
# the model generated a text included in `stop_sequences`
StopSequence = "stop_sequence"
# Additional sequences when using the `best_of` parameter
class BestOfSequence:
# Generated text
generated_text: str
# Generation finish reason
finish_reason: FinishReason
# Number of generated tokens
generated_tokens: int
# Sampling seed if sampling was activated
seed: Optional[int]
# Decoder input tokens, empty if decoder_input_details is False
prefill: List[InputToken]
# Generated tokens
tokens: List[Token]
# Most likely tokens
top_tokens: Optional[List[List[Token]]]
# `generate` details
class Details:
# Generation finish reason
finish_reason: FinishReason
# Number of generated tokens
generated_tokens: int
# Sampling seed if sampling was activated
seed: Optional[int]
# Decoder input tokens, empty if decoder_input_details is False
prefill: List[InputToken]
# Generated tokens
tokens: List[Token]
# Most likely tokens
top_tokens: Optional[List[List[Token]]]
# Additional sequences when using the `best_of` parameter
best_of_sequences: Optional[List[BestOfSequence]]
# `generate` return value
class Response:
# Generated text
generated_text: str
# Generation details
details: Details
# `generate_stream` details
class StreamDetails:
# Generation finish reason
finish_reason: FinishReason
# Number of generated tokens
generated_tokens: int
# Sampling seed if sampling was activated
seed: Optional[int]
# `generate_stream` return value
class StreamResponse:
# Generated token
token: Token
# Most likely tokens
top_tokens: Optional[List[Token]]
# Complete generated text
# Only available when the generation is finished
generated_text: Optional[str]
# Generation details
# Only available when the generation is finished
details: Optional[StreamDetails]
# Inference API currently deployed model
class DeployedModel:
model_id: str
sha: str
```
Raw data
{
"_id": null,
"home_page": "https://github.com/huggingface/text-generation-inference",
"name": "text-generation",
"maintainer": "Olivier Dehaene",
"docs_url": null,
"requires_python": "<4.0,>=3.7",
"maintainer_email": "olivier@huggingface.co",
"keywords": null,
"author": "Olivier Dehaene",
"author_email": "olivier@huggingface.co",
"download_url": "https://files.pythonhosted.org/packages/ef/53/1b2dc20686079464ae381f230a9fc412984a4255cea73c21afb6a46bc21f/text_generation-0.7.0.tar.gz",
"platform": null,
"description": "# Text Generation\n\nThe Hugging Face Text Generation Python library provides a convenient way of interfacing with a\n`text-generation-inference` instance running on\n[Hugging Face Inference Endpoints](https://huggingface.co/inference-endpoints) or on the Hugging Face Hub.\n\n## Get Started\n\n### Install\n\n```shell\npip install text-generation\n```\n\n### Inference API Usage\n\n```python\nfrom text_generation import InferenceAPIClient\n\nclient = InferenceAPIClient(\"bigscience/bloomz\")\ntext = client.generate(\"Why is the sky blue?\").generated_text\nprint(text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nfor response in client.generate_stream(\"Why is the sky blue?\"):\n if not response.token.special:\n text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\nor with the asynchronous client:\n\n```python\nfrom text_generation import InferenceAPIAsyncClient\n\nclient = InferenceAPIAsyncClient(\"bigscience/bloomz\")\nresponse = await client.generate(\"Why is the sky blue?\")\nprint(response.generated_text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nasync for response in client.generate_stream(\"Why is the sky blue?\"):\n if not response.token.special:\n text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\nCheck all currently deployed models on the Huggingface Inference API with `Text Generation` support:\n\n```python\nfrom text_generation.inference_api import deployed_models\n\nprint(deployed_models())\n```\n\n### Hugging Face Inference Endpoint usage\n\n```python\nfrom text_generation import Client\n\nendpoint_url = \"https://YOUR_ENDPOINT.endpoints.huggingface.cloud\"\n\nclient = Client(endpoint_url)\ntext = client.generate(\"Why is the sky blue?\").generated_text\nprint(text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nfor response in client.generate_stream(\"Why is the sky blue?\"):\n if not response.token.special:\n text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\nor with the asynchronous client:\n\n```python\nfrom text_generation import AsyncClient\n\nendpoint_url = \"https://YOUR_ENDPOINT.endpoints.huggingface.cloud\"\n\nclient = AsyncClient(endpoint_url)\nresponse = await client.generate(\"Why is the sky blue?\")\nprint(response.generated_text)\n# ' Rayleigh scattering'\n\n# Token Streaming\ntext = \"\"\nasync for response in client.generate_stream(\"Why is the sky blue?\"):\n if not response.token.special:\n text += response.token.text\n\nprint(text)\n# ' Rayleigh scattering'\n```\n\n### Types\n\n```python\n# enum for grammar type\nclass GrammarType(Enum):\n Json = \"json\"\n Regex = \"regex\"\n\n\n# Grammar type and value\nclass Grammar:\n # Grammar type\n type: GrammarType\n # Grammar value\n value: Union[str, dict]\n\nclass Parameters:\n # Activate logits sampling\n do_sample: bool\n # Maximum number of generated tokens\n max_new_tokens: int\n # The parameter for repetition penalty. 1.0 means no penalty.\n # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.\n repetition_penalty: Optional[float]\n # The parameter for frequency penalty. 1.0 means no penalty\n # Penalize new tokens based on their existing frequency in the text so far,\n # decreasing the model's likelihood to repeat the same line verbatim.\n frequency_penalty: Optional[float]\n # Whether to prepend the prompt to the generated text\n return_full_text: bool\n # Stop generating tokens if a member of `stop_sequences` is generated\n stop: List[str]\n # Random sampling seed\n seed: Optional[int]\n # The value used to module the logits distribution.\n temperature: Optional[float]\n # The number of highest probability vocabulary tokens to keep for top-k-filtering.\n top_k: Optional[int]\n # If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or\n # higher are kept for generation.\n top_p: Optional[float]\n # truncate inputs tokens to the given size\n truncate: Optional[int]\n # Typical Decoding mass\n # See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information\n typical_p: Optional[float]\n # Generate best_of sequences and return the one if the highest token logprobs\n best_of: Optional[int]\n # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)\n watermark: bool\n # Get generation details\n details: bool\n # Get decoder input token logprobs and ids\n decoder_input_details: bool\n # Return the N most likely tokens at each step\n top_n_tokens: Optional[int]\n # grammar to use for generation\n grammar: Optional[Grammar]\n\nclass Request:\n # Prompt\n inputs: str\n # Generation parameters\n parameters: Optional[Parameters]\n # Whether to stream output tokens\n stream: bool\n\n# Decoder input tokens\nclass InputToken:\n # Token ID from the model tokenizer\n id: int\n # Token text\n text: str\n # Logprob\n # Optional since the logprob of the first token cannot be computed\n logprob: Optional[float]\n\n\n# Generated tokens\nclass Token:\n # Token ID from the model tokenizer\n id: int\n # Token text\n text: str\n # Logprob\n logprob: Optional[float]\n # Is the token a special token\n # Can be used to ignore tokens when concatenating\n special: bool\n\n\n# Generation finish reason\nclass FinishReason(Enum):\n # number of generated tokens == `max_new_tokens`\n Length = \"length\"\n # the model generated its end of sequence token\n EndOfSequenceToken = \"eos_token\"\n # the model generated a text included in `stop_sequences`\n StopSequence = \"stop_sequence\"\n\n\n# Additional sequences when using the `best_of` parameter\nclass BestOfSequence:\n # Generated text\n generated_text: str\n # Generation finish reason\n finish_reason: FinishReason\n # Number of generated tokens\n generated_tokens: int\n # Sampling seed if sampling was activated\n seed: Optional[int]\n # Decoder input tokens, empty if decoder_input_details is False\n prefill: List[InputToken]\n # Generated tokens\n tokens: List[Token]\n # Most likely tokens\n top_tokens: Optional[List[List[Token]]]\n\n\n# `generate` details\nclass Details:\n # Generation finish reason\n finish_reason: FinishReason\n # Number of generated tokens\n generated_tokens: int\n # Sampling seed if sampling was activated\n seed: Optional[int]\n # Decoder input tokens, empty if decoder_input_details is False\n prefill: List[InputToken]\n # Generated tokens\n tokens: List[Token]\n # Most likely tokens\n top_tokens: Optional[List[List[Token]]]\n # Additional sequences when using the `best_of` parameter\n best_of_sequences: Optional[List[BestOfSequence]]\n\n\n# `generate` return value\nclass Response:\n # Generated text\n generated_text: str\n # Generation details\n details: Details\n\n\n# `generate_stream` details\nclass StreamDetails:\n # Generation finish reason\n finish_reason: FinishReason\n # Number of generated tokens\n generated_tokens: int\n # Sampling seed if sampling was activated\n seed: Optional[int]\n\n\n# `generate_stream` return value\nclass StreamResponse:\n # Generated token\n token: Token\n # Most likely tokens\n top_tokens: Optional[List[Token]]\n # Complete generated text\n # Only available when the generation is finished\n generated_text: Optional[str]\n # Generation details\n # Only available when the generation is finished\n details: Optional[StreamDetails]\n\n# Inference API currently deployed model\nclass DeployedModel:\n model_id: str\n sha: str\n```\n",
"bugtrack_url": null,
"license": "Apache-2.0",
"summary": "Hugging Face Text Generation Python Client",
"version": "0.7.0",
"project_urls": {
"Homepage": "https://github.com/huggingface/text-generation-inference",
"Repository": "https://github.com/huggingface/text-generation-inference"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "7b798fc351fd919a41287243c998a47692c7eb0fa5acded13db0080f2c6f1852",
"md5": "0327304b45805af6867a8e98c180431b",
"sha256": "02ab337a0ee0e7c70e04a607b311c261caae74bde46a7d837c6fdd150108f4d8"
},
"downloads": -1,
"filename": "text_generation-0.7.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "0327304b45805af6867a8e98c180431b",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<4.0,>=3.7",
"size": 12718,
"upload_time": "2024-03-22T16:09:20",
"upload_time_iso_8601": "2024-03-22T16:09:20.874987Z",
"url": "https://files.pythonhosted.org/packages/7b/79/8fc351fd919a41287243c998a47692c7eb0fa5acded13db0080f2c6f1852/text_generation-0.7.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "ef531b2dc20686079464ae381f230a9fc412984a4255cea73c21afb6a46bc21f",
"md5": "378279c683e158dbf86bb07daba2a34c",
"sha256": "689200cd1f0d4141562af2515393c2c21cdbd9fac21c8398bf3043cdcc14184e"
},
"downloads": -1,
"filename": "text_generation-0.7.0.tar.gz",
"has_sig": false,
"md5_digest": "378279c683e158dbf86bb07daba2a34c",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<4.0,>=3.7",
"size": 10373,
"upload_time": "2024-03-22T16:09:22",
"upload_time_iso_8601": "2024-03-22T16:09:22.167381Z",
"url": "https://files.pythonhosted.org/packages/ef/53/1b2dc20686079464ae381f230a9fc412984a4255cea73c21afb6a46bc21f/text_generation-0.7.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-03-22 16:09:22",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "huggingface",
"github_project": "text-generation-inference",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "text-generation"
}