# Streamlit Mic Recorder
Streamlit component that allows to record mono audio from the user's microphone, and/or perform speech recognition
directly.
## Installation instructions
```sh
pip install streamlit-mic-recorder
```
## Usage instructions
Two functions are provided (with the same front-end):
### 1. Mic Recorder
```python
from streamlit_mic_recorder import mic_recorder
audio = mic_recorder(
start_prompt="Start recording",
stop_prompt="Stop recording",
just_once=False,
use_container_width=False,
callback=None,
args=(),
kwargs={},
key=None
)
```
Renders a button. Click to start recording, click to stop. Returns None or a dictionary with the following structure:
```python
{
"bytes": audio_bytes, # wav audio bytes mono signal, can be processed directly by st.audio
"sample_rate": sample_rate, # depends on your browser's audio configuration
"sample_width": sample_width, # 2
"id": id # A unique timestamp identifier of the audio
}
```
sample_rate and sample_width are provided in case you need them for further audio processing.
Arguments:
- 'start/stop_prompt', the prompts appearing on the button depending on its recording state.
- 'just_once' determines if the widget returns the audio only once just after it has been recorded (and then None), or
on every rerun of the app. Useful to avoid reprocessing the same audio twice.
- 'use_container_width' just like for st.button, determines if the button fills its container width or not.
- 'callback': an optional callback being called when a new audio is received
- 'args/kwargs': optional args and kwargs passed to the callback when triggered
Remark:
When using a key for the widget, due to how streamlit's component API works, the associated state variable will only
contain the raw unprocessed output from the React frontend, which was not very practical.
For convenience, I added a special state variable to be able to access the output in the expected format (the dictionary
described above) more easily. If `key` is the key you gave to the widget, you can acces the properly formatted output
via `key+'_output'` in the session state.
Here is an example on how it can be used within a callback:
```python
from streamlit_mic_recorder import mic_recorder
import streamlit as st
def callback():
if st.session_state.my_recorder_output:
audio_bytes = st.session_state.my_recorder_output['bytes']
st.audio(audio_bytes)
mic_recorder(key='my_recorder', callback=callback)
```
### 2. Speech recognition with Google API
```python
from streamlit_mic_recorder import speech_to_text
text = speech_to_text(
language='en',
start_prompt="Start recording",
stop_prompt="Stop recording",
just_once=False,
use_container_width=False,
callback=None,
args=(),
kwargs={},
key=None
)
```
Renders a button. Click to start recording, click to stop. Returns None or a text transcription of the recorded speech
in the chosen language.
Similarly to the mic_recorder function, you can pass a callback that will trigger when a new text transcription is
received, and access this transcription directly in the session state by adding an '_output' suffix to the key you chose
for the widget.
```python
import streamlit as st
from streamlit_mic_recorder import speech_to_text
def callback():
if st.session_state.my_stt_output:
st.write(st.session_state.my_stt_output)
speech_to_text(key='my_stt', callback=callback)
```
## Example
```python
import streamlit as st
from streamlit_mic_recorder import mic_recorder, speech_to_text
state = st.session_state
if 'text_received' not in state:
state.text_received = []
c1, c2 = st.columns(2)
with c1:
st.write("Convert speech to text:")
with c2:
text = speech_to_text(language='en', use_container_width=True, just_once=True, key='STT')
if text:
state.text_received.append(text)
for text in state.text_received:
st.text(text)
st.write("Record your voice, and play the recorded audio:")
audio = mic_recorder(start_prompt="⏺️", stop_prompt="⏹️", key='recorder')
if audio:
st.audio(audio['bytes'])
```
## Using it with OpenAI Whisper API
For those interested in using the mic recorder component with Whisper here is the script I'm using, working just fine
for me.
```python
# whisper.py
from streamlit_mic_recorder import mic_recorder
import streamlit as st
import io
from openai import OpenAI
import dotenv
import os
def whisper_stt(openai_api_key=None, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
if not 'openai_client' in st.session_state:
dotenv.load_dotenv()
st.session_state.openai_client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
if not '_last_speech_to_text_transcript_id' in st.session_state:
st.session_state._last_speech_to_text_transcript_id = 0
if not '_last_speech_to_text_transcript' in st.session_state:
st.session_state._last_speech_to_text_transcript = None
if key and not key + '_output' in st.session_state:
st.session_state[key + '_output'] = None
audio = mic_recorder(start_prompt=start_prompt, stop_prompt=stop_prompt, just_once=just_once,
use_container_width=use_container_width, key=key)
new_output = False
if audio is None:
output = None
else:
id = audio['id']
new_output = (id > st.session_state._last_speech_to_text_transcript_id)
if new_output:
output = None
st.session_state._last_speech_to_text_transcript_id = id
audio_bio = io.BytesIO(audio['bytes'])
audio_bio.name = 'audio.mp3'
success = False
err = 0
while not success and err < 3: # Retry up to 3 times in case of OpenAI server error.
try:
transcript = st.session_state.openai_client.audio.transcriptions.create(
model="whisper-1",
file=audio_bio,
language=language
)
except Exception as e:
print(str(e)) # log the exception in the terminal
err += 1
else:
success = True
output = transcript.text
st.session_state._last_speech_to_text_transcript = output
elif not just_once:
output = st.session_state._last_speech_to_text_transcript
else:
output = None
if key:
st.session_state[key + '_output'] = output
if new_output and callback:
callback(*args, **(kwargs or {}))
return output
```
Usage:
```python
import streamlit as st
from whisper import whisper_stt
text = whisper_stt(
openai_api_key="<your_api_key>", language = 'en') # If you don't pass an API key, the function will attempt to load a .env file in the current directory and retrieve it as an environment variable : 'OPENAI_API_KEY'.
if text:
st.write(text)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/B4PT0R/streamlit-mic-recorder",
"name": "streamlit-mic-recorder",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.7",
"maintainer_email": "",
"keywords": "",
"author": "Baptiste Ferrand",
"author_email": "bferrand.math@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/97/1b/8dc0c547691abb4f98ce42da7767f49ee6e3257a576c7a94e7941003d5dc/streamlit_mic_recorder-0.0.8.tar.gz",
"platform": null,
"description": "# Streamlit Mic Recorder\n\nStreamlit component that allows to record mono audio from the user's microphone, and/or perform speech recognition\ndirectly.\n\n## Installation instructions\n\n```sh\npip install streamlit-mic-recorder\n```\n\n## Usage instructions\n\nTwo functions are provided (with the same front-end):\n\n### 1. Mic Recorder\n\n```python\nfrom streamlit_mic_recorder import mic_recorder\naudio = mic_recorder(\n start_prompt=\"Start recording\",\n stop_prompt=\"Stop recording\",\n just_once=False,\n use_container_width=False,\n callback=None,\n args=(),\n kwargs={},\n key=None\n)\n```\n\nRenders a button. Click to start recording, click to stop. Returns None or a dictionary with the following structure:\n\n```python\n{\n \"bytes\": audio_bytes, # wav audio bytes mono signal, can be processed directly by st.audio\n \"sample_rate\": sample_rate, # depends on your browser's audio configuration\n \"sample_width\": sample_width, # 2\n \"id\": id # A unique timestamp identifier of the audio\n}\n```\n\nsample_rate and sample_width are provided in case you need them for further audio processing.\n\nArguments:\n\n- 'start/stop_prompt', the prompts appearing on the button depending on its recording state.\n- 'just_once' determines if the widget returns the audio only once just after it has been recorded (and then None), or\n on every rerun of the app. Useful to avoid reprocessing the same audio twice.\n- 'use_container_width' just like for st.button, determines if the button fills its container width or not.\n- 'callback': an optional callback being called when a new audio is received\n- 'args/kwargs': optional args and kwargs passed to the callback when triggered\n\nRemark:\nWhen using a key for the widget, due to how streamlit's component API works, the associated state variable will only\ncontain the raw unprocessed output from the React frontend, which was not very practical.\nFor convenience, I added a special state variable to be able to access the output in the expected format (the dictionary\ndescribed above) more easily. If `key` is the key you gave to the widget, you can acces the properly formatted output\nvia `key+'_output'` in the session state.\nHere is an example on how it can be used within a callback:\n\n```python\nfrom streamlit_mic_recorder import mic_recorder\nimport streamlit as st\n\ndef callback():\n if st.session_state.my_recorder_output:\n audio_bytes = st.session_state.my_recorder_output['bytes']\n st.audio(audio_bytes)\n\n\nmic_recorder(key='my_recorder', callback=callback)\n```\n\n### 2. Speech recognition with Google API\n\n```python\nfrom streamlit_mic_recorder import speech_to_text\ntext = speech_to_text(\n language='en',\n start_prompt=\"Start recording\",\n stop_prompt=\"Stop recording\",\n just_once=False,\n use_container_width=False,\n callback=None,\n args=(),\n kwargs={},\n key=None\n)\n```\n\nRenders a button. Click to start recording, click to stop. Returns None or a text transcription of the recorded speech\nin the chosen language.\nSimilarly to the mic_recorder function, you can pass a callback that will trigger when a new text transcription is\nreceived, and access this transcription directly in the session state by adding an '_output' suffix to the key you chose\nfor the widget.\n\n```python\nimport streamlit as st\nfrom streamlit_mic_recorder import speech_to_text\ndef callback():\n if st.session_state.my_stt_output:\n st.write(st.session_state.my_stt_output)\n\n\nspeech_to_text(key='my_stt', callback=callback)\n```\n\n## Example\n\n```python\nimport streamlit as st\nfrom streamlit_mic_recorder import mic_recorder, speech_to_text\n\nstate = st.session_state\n\nif 'text_received' not in state:\n state.text_received = []\n\nc1, c2 = st.columns(2)\nwith c1:\n st.write(\"Convert speech to text:\")\nwith c2:\n text = speech_to_text(language='en', use_container_width=True, just_once=True, key='STT')\n\nif text:\n state.text_received.append(text)\n\nfor text in state.text_received:\n st.text(text)\n\nst.write(\"Record your voice, and play the recorded audio:\")\naudio = mic_recorder(start_prompt=\"\u23fa\ufe0f\", stop_prompt=\"\u23f9\ufe0f\", key='recorder')\n\nif audio:\n st.audio(audio['bytes'])\n```\n\n## Using it with OpenAI Whisper API\n\nFor those interested in using the mic recorder component with Whisper here is the script I'm using, working just fine\nfor me.\n\n```python\n# whisper.py\n\nfrom streamlit_mic_recorder import mic_recorder\nimport streamlit as st\nimport io\nfrom openai import OpenAI\nimport dotenv\nimport os\n\n\ndef whisper_stt(openai_api_key=None, start_prompt=\"Start recording\", stop_prompt=\"Stop recording\", just_once=False,\n use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):\n if not 'openai_client' in st.session_state:\n dotenv.load_dotenv()\n st.session_state.openai_client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))\n if not '_last_speech_to_text_transcript_id' in st.session_state:\n st.session_state._last_speech_to_text_transcript_id = 0\n if not '_last_speech_to_text_transcript' in st.session_state:\n st.session_state._last_speech_to_text_transcript = None\n if key and not key + '_output' in st.session_state:\n st.session_state[key + '_output'] = None\n audio = mic_recorder(start_prompt=start_prompt, stop_prompt=stop_prompt, just_once=just_once,\n use_container_width=use_container_width, key=key)\n new_output = False\n if audio is None:\n output = None\n else:\n id = audio['id']\n new_output = (id > st.session_state._last_speech_to_text_transcript_id)\n if new_output:\n output = None\n st.session_state._last_speech_to_text_transcript_id = id\n audio_bio = io.BytesIO(audio['bytes'])\n audio_bio.name = 'audio.mp3'\n success = False\n err = 0\n while not success and err < 3: # Retry up to 3 times in case of OpenAI server error.\n try:\n transcript = st.session_state.openai_client.audio.transcriptions.create(\n model=\"whisper-1\",\n file=audio_bio,\n language=language\n )\n except Exception as e:\n print(str(e)) # log the exception in the terminal\n err += 1\n else:\n success = True\n output = transcript.text\n st.session_state._last_speech_to_text_transcript = output\n elif not just_once:\n output = st.session_state._last_speech_to_text_transcript\n else:\n output = None\n\n if key:\n st.session_state[key + '_output'] = output\n if new_output and callback:\n callback(*args, **(kwargs or {}))\n return output\n```\n\nUsage:\n\n```python\nimport streamlit as st\nfrom whisper import whisper_stt\n\ntext = whisper_stt(\n openai_api_key=\"<your_api_key>\", language = 'en') # If you don't pass an API key, the function will attempt to load a .env file in the current directory and retrieve it as an environment variable : 'OPENAI_API_KEY'.\nif text:\n st.write(text)\n```\n",
"bugtrack_url": null,
"license": "",
"summary": "Streamlit component that allows to record mono audio from the user's microphone, and/or perform speech recognition directly.",
"version": "0.0.8",
"project_urls": {
"Homepage": "https://github.com/B4PT0R/streamlit-mic-recorder"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "29452883ea5ac05aab014399f996255d7a84fc764c39b892fe68d0835135e641",
"md5": "4cb7d724b38066baf9c72474b1134eb6",
"sha256": "acbd5ed868dba083d567341c85f1740ae42bd03259c2780dce7f69d5bc109ac8"
},
"downloads": -1,
"filename": "streamlit_mic_recorder-0.0.8-py3-none-any.whl",
"has_sig": false,
"md5_digest": "4cb7d724b38066baf9c72474b1134eb6",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.7",
"size": 2246945,
"upload_time": "2024-03-05T19:14:01",
"upload_time_iso_8601": "2024-03-05T19:14:01.239477Z",
"url": "https://files.pythonhosted.org/packages/29/45/2883ea5ac05aab014399f996255d7a84fc764c39b892fe68d0835135e641/streamlit_mic_recorder-0.0.8-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "971b8dc0c547691abb4f98ce42da7767f49ee6e3257a576c7a94e7941003d5dc",
"md5": "4ab9d081c16fec19c1f368c4081f869e",
"sha256": "5a29a98f3bd1582f9d5d90911ef498b32244863d646759c2f5ceec515befb6cf"
},
"downloads": -1,
"filename": "streamlit_mic_recorder-0.0.8.tar.gz",
"has_sig": false,
"md5_digest": "4ab9d081c16fec19c1f368c4081f869e",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.7",
"size": 473935,
"upload_time": "2024-03-05T19:14:04",
"upload_time_iso_8601": "2024-03-05T19:14:04.457183Z",
"url": "https://files.pythonhosted.org/packages/97/1b/8dc0c547691abb4f98ce42da7767f49ee6e3257a576c7a94e7941003d5dc/streamlit_mic_recorder-0.0.8.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-03-05 19:14:04",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "B4PT0R",
"github_project": "streamlit-mic-recorder",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "streamlit-mic-recorder"
}