# gjdutils
A collection of useful utility functions (strings, dates, data science/AI, web development, types, etc).
This is a smorgasbord of utility functions, patterns and convenient wrappers that I've found myself rewriting and reusing across multiple projects, gathered into one place.
Probably many of these exist elsewhere in libraries - if so, please let me know, because I'd probably rather use something cleaner and better-maintained.
Caveat emptor: some of these are old, and haven't been tested in a while.
## Highlights
### Audio: convenient microphone voice recognition with Whisper, and text-to-speech using ElevenLabs
```python
from gjdutils.outloud_text_to_speech import outloud
from gjdutils.voice_speechrecognition import recognise_speech
# Record speech and play it back in a different voice
text = recognise_speech("Say something!") # Records from microphone until you press ENTER
outloud(text, prog="elevenlabs", mp3_filen="recording.mp3", should_play=True) # Plays back what you said
```
### Run shell commands clearly & conveniently
```python
from gjdutils.cmd import run_cmd
from pathlib import Path
# Get Python version and capture the output
retcode, stdout, extra = run_cmd(
"python --version", # you can also provide as a list-of-strings
before_msg="Checking Python version...",
fatal_msg="Some problem running Python", # will show up in red, sys.exit(1)
verbose=0, # Run silently unless there's an error
**{"timeout": 5} # Pass additional arguments to subprocess
)
print(f"Python version: {stdout}") # e.g. "Python 3.9.7"
print(f"Ran command: {extra['cmd_str']}") # plus lots of other stuff stored
```
### Environment variables with type validation and helpful error messages
```bash
$ python -m gjdutils.scripts.export_envs .env
```
```python
from gjdutils.env import get_env_var
api_key = get_env_var("OPENAI_API_KEY") # Ensures non-empty by default
num_workers = get_env_var("NUM_WORKERS", typ=int) # Validates and converts to int
```
### Strict Jinja templating that catches both undefined and unused variables
```python
from gjdutils.strings import jinja_render
template = "{{name}} is {{age}} years old"
context = {"name": "Bob", "unused": True}
text = jinja_render(template, context) # will fail both because `age` is missing and `unused` is superfluous
```
### Set random seeds across Python, NumPy, PyTorch for reproducibility
```python
from gjdutils.rand import set_seeds
set_seeds(42) # Sets seeds for random, numpy, torch if available
```
### Call Claude/OpenAI APIs with function calling, image analysis & JSON support
```python
from gjdutils.llms_claude import call_claude_gpt
from gjdutils.llm_utils import image_to_base64
response, extra = call_claude_gpt(
"What's in this image?",
image_filens=["path/to/image.jpg"],
temperature=0.001
)
```
### Translate text between languages with Google Translate
```python
from gjdutils.google_translate import translate_text, detect_language
# First detect the language
text = "Bonjour le monde"
lang, confidence = detect_language(text) # Returns ("fr", 0.98)
# Then translate to English
english_text, _ = translate_text(text, lang_src_code=lang, lang_tgt_code="en") # Returns "Hello world"
```
### Calculate text similarity using longest common substring analysis
```python
from gjdutils.strings import calc_proportion_longest_common_substring
similarity = calc_proportion_longest_common_substring(["hello world", "hello there"]) # Returns ~0.45 for "hello" match
```
### Measure data uniformity & distribution with simple proportion analysis
```python
from gjdutils.dsci import calc_proportion_identical
uniformity = calc_proportion_identical(['a', 'a', 'a', 'b']) # Returns 0.75 (75% are 'a')
```
### Generate deterministic cache keys for complex Python objects
```python
from gjdutils.caching import generate_mckey
cache_key = generate_mckey("myprefix", {"a": 100, "b": "foo"}) # Creates deterministic cache key
```
### Generate consistent hashes for caching/comparison
```python
from gjdutils.hashing import hash_readable
# Same input always produces same hash, even across sessions
config = {"foo": "bar"}
cache_key = hash_readable(config) # e.g. "8f4e5d3..."
```
### Pretty-print and process HTML with customizable indentation
```python
from gjdutils.html import prettify_html
# Prettify a string of HTML (also useful for testing two HTML strings are identical without caring about whitespace)
html = "<div><p>Hello</p><p>World</p></div>" # Also works with BeautifulSoup elements
pretty = prettify_html(html, indent=4) # Custom indentation
print(pretty)
# <div>
# <p>Hello</p>
# <p>World</p>
# </div>
```
### Debug by printing local variables, excluding noise
```python
from gjdutils.misc import print_locals
def my_function(x, y):
z = x + y
some_func = lambda x: x * 2
_internal = "temp"
# Print all local vars except functions and _prefixed
print_locals(locals(), ignore_functions=True, ignore_underscores=True)
# Output: {'x': 1, 'y': 2, 'z': 3}
```
### Generate readable random IDs (no confusing characters)
```python
from gjdutils.rand import gen_readable_rand_id
# Generate random ID without confusing chars (0/O, 1/I/l, etc)
uid = gen_readable_rand_id(n=7) # e.g. "k8m5p3h"
```
----
## Installation
```bash
pip install gjdutils
```
For optional features:
```bash
pip install "gjdutils[dt]" # Date/time utilities
pip install "gjdutils[llm]" # AI/LLM integrations
pip install "gjdutils[audio_lang]" # Speech/translation, language-related
pip install "gjdutils[html_web]" # Web scraping
pip install "gjdutils[dev]" # Development tools (for tweaking `gjdutils` itself, e.g. pytest)
# Install all optional dependencies at once (except `dev`, which is used for developing `gjdutils` itself)
pip install "gjdutils[all_no_dev]"
```
### Development Setup
If you're developing `gjdutils` itself, install in editable mode:
```bash
# (Assumes you have already setup your virtualenv)
# from the gjdutils root directory
pip install -e ".[all_no_dev, dev]" # Install all optional dependencies
```
Or if you're feeling lazy and can't remember that command, just use:
```bash
python -m gjdutils.scripts.install_all_dev_dependencies
```
### Adding to requirements.txt
To add to your `requirements.txt` in editable mode, e.g. to install all optional dependencies:
```text
-e "git+https://github.com/gregdetre/gjdutils.git#egg=gjdutils[all_no_dev]"
```
## For gjdutils Developers
### Deployment Process
When you're ready to deploy a new version of `gjdutils`, follow these steps:
- Update the version number in `src/gjdutils/__version__.py`
- Ensure your Git working directory is clean (no uncommitted changes)
- Run the deployment scripts in this order:
```bash
# Test locally first (builds package and runs tests in a fresh venv)
python -m scripts.check_locally
# Deploy to PyPI Test and verify
python -m scripts.deploy_pypitest
python -m scripts.check_pypitest
# Deploy to PyPI Production and verify
python -m scripts.deploy_pypiprod
python -m scripts.check_pypiprod
# Or use the all-in-one deployment script that runs all the above
python -m scripts.deploy_all
```
Each script will:
- Verify prerequisites (git status, version numbers, etc.)
- Ask for confirmation before making changes
- Clean build directories
- Build and test the package
- Upload to the appropriate PyPI repository
- Verify the upload was successful
Note: The deployment scripts require you to have appropriate PyPI credentials configured.
### Setting Up Environment Variables
If you need to set environment variables for development or testing, you can use the provided `export_envs.sh` script:
1. Create a `.env` file with your environment variables:
```bash
# .env example
OPENAI_API_KEY=your-key-here
NUM_WORKERS=4
```
2. Source the script to load the variables:
```bash
source scripts/export_envs.sh .env
```
This will export all non-commented variables from your `.env` file into your current shell session. You can verify it worked by checking any variable:
```bash
echo $OPENAI_API_KEY
```
Raw data
{
"_id": null,
"home_page": null,
"name": "GJDutils",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": null,
"keywords": "ai, data science, dates, llm, strings, utilities, web development",
"author": null,
"author_email": "Greg Detre <greg@gregdetre.com>",
"download_url": "https://files.pythonhosted.org/packages/41/0a/03497427db56a9abfc12422463915c6825ae8326187779794043894b73ab/gjdutils-0.3.9.tar.gz",
"platform": null,
"description": "# gjdutils\n\nA collection of useful utility functions (strings, dates, data science/AI, web development, types, etc).\n\nThis is a smorgasbord of utility functions, patterns and convenient wrappers that I've found myself rewriting and reusing across multiple projects, gathered into one place.\n\nProbably many of these exist elsewhere in libraries - if so, please let me know, because I'd probably rather use something cleaner and better-maintained.\n\nCaveat emptor: some of these are old, and haven't been tested in a while.\n\n\n## Highlights\n\n### Audio: convenient microphone voice recognition with Whisper, and text-to-speech using ElevenLabs\n```python\nfrom gjdutils.outloud_text_to_speech import outloud\nfrom gjdutils.voice_speechrecognition import recognise_speech\n\n# Record speech and play it back in a different voice\ntext = recognise_speech(\"Say something!\") # Records from microphone until you press ENTER\noutloud(text, prog=\"elevenlabs\", mp3_filen=\"recording.mp3\", should_play=True) # Plays back what you said\n```\n\n\n### Run shell commands clearly & conveniently\n```python\nfrom gjdutils.cmd import run_cmd\nfrom pathlib import Path\n\n# Get Python version and capture the output\nretcode, stdout, extra = run_cmd(\n \"python --version\", # you can also provide as a list-of-strings\n before_msg=\"Checking Python version...\",\n fatal_msg=\"Some problem running Python\", # will show up in red, sys.exit(1)\n verbose=0, # Run silently unless there's an error\n **{\"timeout\": 5} # Pass additional arguments to subprocess\n)\nprint(f\"Python version: {stdout}\") # e.g. \"Python 3.9.7\"\nprint(f\"Ran command: {extra['cmd_str']}\") # plus lots of other stuff stored\n```\n\n\n### Environment variables with type validation and helpful error messages\n```bash\n$ python -m gjdutils.scripts.export_envs .env\n```\n\n```python\nfrom gjdutils.env import get_env_var\n\napi_key = get_env_var(\"OPENAI_API_KEY\") # Ensures non-empty by default\nnum_workers = get_env_var(\"NUM_WORKERS\", typ=int) # Validates and converts to int\n```\n\n### Strict Jinja templating that catches both undefined and unused variables\n```python\nfrom gjdutils.strings import jinja_render\n\ntemplate = \"{{name}} is {{age}} years old\"\ncontext = {\"name\": \"Bob\", \"unused\": True}\ntext = jinja_render(template, context) # will fail both because `age` is missing and `unused` is superfluous\n```\n\n\n### Set random seeds across Python, NumPy, PyTorch for reproducibility\n```python\nfrom gjdutils.rand import set_seeds\n\nset_seeds(42) # Sets seeds for random, numpy, torch if available\n```\n\n\n### Call Claude/OpenAI APIs with function calling, image analysis & JSON support\n```python\nfrom gjdutils.llms_claude import call_claude_gpt\nfrom gjdutils.llm_utils import image_to_base64\n\nresponse, extra = call_claude_gpt(\n \"What's in this image?\",\n image_filens=[\"path/to/image.jpg\"],\n temperature=0.001\n)\n```\n\n\n### Translate text between languages with Google Translate\n```python\nfrom gjdutils.google_translate import translate_text, detect_language\n\n# First detect the language\ntext = \"Bonjour le monde\"\nlang, confidence = detect_language(text) # Returns (\"fr\", 0.98)\n\n# Then translate to English\nenglish_text, _ = translate_text(text, lang_src_code=lang, lang_tgt_code=\"en\") # Returns \"Hello world\"\n```\n\n\n### Calculate text similarity using longest common substring analysis\n```python\nfrom gjdutils.strings import calc_proportion_longest_common_substring\nsimilarity = calc_proportion_longest_common_substring([\"hello world\", \"hello there\"]) # Returns ~0.45 for \"hello\" match\n```\n\n\n### Measure data uniformity & distribution with simple proportion analysis\n```python\nfrom gjdutils.dsci import calc_proportion_identical\nuniformity = calc_proportion_identical(['a', 'a', 'a', 'b']) # Returns 0.75 (75% are 'a')\n```\n\n\n### Generate deterministic cache keys for complex Python objects\n```python\nfrom gjdutils.caching import generate_mckey\ncache_key = generate_mckey(\"myprefix\", {\"a\": 100, \"b\": \"foo\"}) # Creates deterministic cache key\n```\n\n\n### Generate consistent hashes for caching/comparison\n```python\nfrom gjdutils.hashing import hash_readable\n\n# Same input always produces same hash, even across sessions\nconfig = {\"foo\": \"bar\"}\ncache_key = hash_readable(config) # e.g. \"8f4e5d3...\"\n```\n\n\n### Pretty-print and process HTML with customizable indentation\n```python\nfrom gjdutils.html import prettify_html\n\n# Prettify a string of HTML (also useful for testing two HTML strings are identical without caring about whitespace)\nhtml = \"<div><p>Hello</p><p>World</p></div>\" # Also works with BeautifulSoup elements\npretty = prettify_html(html, indent=4) # Custom indentation\nprint(pretty)\n# <div>\n# <p>Hello</p>\n# <p>World</p>\n# </div>\n```\n\n\n### Debug by printing local variables, excluding noise\n```python\nfrom gjdutils.misc import print_locals\n\ndef my_function(x, y):\n z = x + y\n some_func = lambda x: x * 2\n _internal = \"temp\"\n # Print all local vars except functions and _prefixed\n print_locals(locals(), ignore_functions=True, ignore_underscores=True)\n # Output: {'x': 1, 'y': 2, 'z': 3}\n```\n\n\n### Generate readable random IDs (no confusing characters)\n```python\nfrom gjdutils.rand import gen_readable_rand_id\n\n# Generate random ID without confusing chars (0/O, 1/I/l, etc)\nuid = gen_readable_rand_id(n=7) # e.g. \"k8m5p3h\"\n```\n\n----\n\n## Installation\n\n```bash\npip install gjdutils\n```\n\nFor optional features:\n```bash\npip install \"gjdutils[dt]\" # Date/time utilities\npip install \"gjdutils[llm]\" # AI/LLM integrations\npip install \"gjdutils[audio_lang]\" # Speech/translation, language-related\npip install \"gjdutils[html_web]\" # Web scraping\n\npip install \"gjdutils[dev]\" # Development tools (for tweaking `gjdutils` itself, e.g. pytest)\n\n# Install all optional dependencies at once (except `dev`, which is used for developing `gjdutils` itself)\npip install \"gjdutils[all_no_dev]\"\n```\n\n### Development Setup\n\nIf you're developing `gjdutils` itself, install in editable mode:\n```bash\n# (Assumes you have already setup your virtualenv)\n# from the gjdutils root directory\npip install -e \".[all_no_dev, dev]\" # Install all optional dependencies\n```\n\nOr if you're feeling lazy and can't remember that command, just use:\n\n```bash\npython -m gjdutils.scripts.install_all_dev_dependencies\n```\n\n\n### Adding to requirements.txt\n\nTo add to your `requirements.txt` in editable mode, e.g. to install all optional dependencies:\n```text\n-e \"git+https://github.com/gregdetre/gjdutils.git#egg=gjdutils[all_no_dev]\"\n```\n\n## For gjdutils Developers\n\n### Deployment Process\n\nWhen you're ready to deploy a new version of `gjdutils`, follow these steps:\n\n- Update the version number in `src/gjdutils/__version__.py`\n- Ensure your Git working directory is clean (no uncommitted changes)\n- Run the deployment scripts in this order:\n\n```bash\n# Test locally first (builds package and runs tests in a fresh venv)\npython -m scripts.check_locally\n\n# Deploy to PyPI Test and verify\npython -m scripts.deploy_pypitest\npython -m scripts.check_pypitest\n\n# Deploy to PyPI Production and verify\npython -m scripts.deploy_pypiprod\npython -m scripts.check_pypiprod\n\n# Or use the all-in-one deployment script that runs all the above\npython -m scripts.deploy_all\n```\n\nEach script will:\n- Verify prerequisites (git status, version numbers, etc.)\n- Ask for confirmation before making changes\n- Clean build directories\n- Build and test the package\n- Upload to the appropriate PyPI repository\n- Verify the upload was successful\n\nNote: The deployment scripts require you to have appropriate PyPI credentials configured.\n\n### Setting Up Environment Variables\n\nIf you need to set environment variables for development or testing, you can use the provided `export_envs.sh` script:\n\n1. Create a `.env` file with your environment variables:\n```bash\n# .env example\nOPENAI_API_KEY=your-key-here\nNUM_WORKERS=4\n```\n\n2. Source the script to load the variables:\n```bash\nsource scripts/export_envs.sh .env\n```\n\nThis will export all non-commented variables from your `.env` file into your current shell session. You can verify it worked by checking any variable:\n```bash\necho $OPENAI_API_KEY\n```\n",
"bugtrack_url": null,
"license": null,
"summary": "A collection of useful utility functions (basics, data science/AI, web development, etc)",
"version": "0.3.9",
"project_urls": {
"Homepage": "https://github.com/gregdetre/gjdutils",
"Repository": "https://github.com/gregdetre/gjdutils"
},
"split_keywords": [
"ai",
" data science",
" dates",
" llm",
" strings",
" utilities",
" web development"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "964012b83acca15694709d718b7d0105f06bc3fafc8bb4fa435c02b380861cf9",
"md5": "569b814bc2c35ab329abc3d181c2f240",
"sha256": "c45035f39965377dfaf8e94559e0eb20bb07559a04403e159828f9ca2e986ddf"
},
"downloads": -1,
"filename": "gjdutils-0.3.9-py3-none-any.whl",
"has_sig": false,
"md5_digest": "569b814bc2c35ab329abc3d181c2f240",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 63298,
"upload_time": "2025-02-21T12:11:28",
"upload_time_iso_8601": "2025-02-21T12:11:28.421964Z",
"url": "https://files.pythonhosted.org/packages/96/40/12b83acca15694709d718b7d0105f06bc3fafc8bb4fa435c02b380861cf9/gjdutils-0.3.9-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "410a03497427db56a9abfc12422463915c6825ae8326187779794043894b73ab",
"md5": "83ff1b00b60b4077603abf2d377e70e1",
"sha256": "70939d672798de620d809b147b9b7a1062ad623d50f018fc2a6f9b815221ad76"
},
"downloads": -1,
"filename": "gjdutils-0.3.9.tar.gz",
"has_sig": false,
"md5_digest": "83ff1b00b60b4077603abf2d377e70e1",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.10",
"size": 56471,
"upload_time": "2025-02-21T12:11:31",
"upload_time_iso_8601": "2025-02-21T12:11:31.055739Z",
"url": "https://files.pythonhosted.org/packages/41/0a/03497427db56a9abfc12422463915c6825ae8326187779794043894b73ab/gjdutils-0.3.9.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-02-21 12:11:31",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "gregdetre",
"github_project": "gjdutils",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "gjdutils"
}