Repository: realpython/python-speech-recognition Branch: master Commit: 0c07b810808c Files: 4 Total size: 7.7 KB Directory structure: gitextract_ancsn7q2/ ├── .gitignore ├── LICENSE ├── README.md └── guessing_game.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2018 Real Python Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Speech Recognition with Python This repository contains resources from [The Ultimate Guide to Speech Recognition with Python](https://realpython.com/python-speech-recognition/) tutorial on Real Python. Audio files for the examples in the *Working With Audio Files* section of the post can be found in the `audio_files` directory. To download them, use the green "Clone or download" button at the top right corner of this page. The `guessing_game.py` file contains the full source code for the "Guess a Word" game example. > **NOTE**: You will need to install the [SpeechRecognition](https://github.com/Uberi/speech_recognition) and [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) packages in order to run the example. Please see the [tutorial](https://realpython.com/python-speech-recognition/) for step-by-step instructions. You can test your SpeechRecognition and PyAudio installation by downloading `guessing_game.py` and typing the following into a Python REPL session: ```pycon >>> import speech_recognition as sr >>> from guessing_game.py import recognize_speech_from_mic >>> r = sr.Recognizer() >>> m = sr.Microphone() >>> recognize_speech_from_mic(r, m) # speak after running this line {'success': True, 'error': None, 'transcription': 'hello'} ``` Of course, your output will vary depending on what you said after running `recognize_speech_from_mic(r, m)`. ================================================ FILE: guessing_game.py ================================================ import random import time import speech_recognition as sr def recognize_speech_from_mic(recognizer, microphone): """Transcribe speech from recorded from `microphone`. Returns a dictionary with three keys: "success": a boolean indicating whether or not the API request was successful "error": `None` if no error occured, otherwise a string containing an error message if the API could not be reached or speech was unrecognizable "transcription": `None` if speech could not be transcribed, otherwise a string containing the transcribed text """ # check that recognizer and microphone arguments are appropriate type if not isinstance(recognizer, sr.Recognizer): raise TypeError("`recognizer` must be `Recognizer` instance") if not isinstance(microphone, sr.Microphone): raise TypeError("`microphone` must be `Microphone` instance") # adjust the recognizer sensitivity to ambient noise and record audio # from the microphone with microphone as source: recognizer.adjust_for_ambient_noise(source) audio = recognizer.listen(source) # set up the response object response = { "success": True, "error": None, "transcription": None } # try recognizing the speech in the recording # if a RequestError or UnknownValueError exception is caught, # update the response object accordingly try: response["transcription"] = recognizer.recognize_google(audio) except sr.RequestError: # API was unreachable or unresponsive response["success"] = False response["error"] = "API unavailable" except sr.UnknownValueError: # speech was unintelligible response["error"] = "Unable to recognize speech" return response if __name__ == "__main__": # set the list of words, maxnumber of guesses, and prompt limit WORDS = ["apple", "banana", "grape", "orange", "mango", "lemon"] NUM_GUESSES = 3 PROMPT_LIMIT = 5 # create recognizer and mic instances recognizer = sr.Recognizer() microphone = sr.Microphone() # get a random word from the list word = random.choice(WORDS) # format the instructions string instructions = ( "I'm thinking of one of these words:\n" "{words}\n" "You have {n} tries to guess which one.\n" ).format(words=', '.join(WORDS), n=NUM_GUESSES) # show instructions and wait 3 seconds before starting the game print(instructions) time.sleep(3) for i in range(NUM_GUESSES): # get the guess from the user # if a transcription is returned, break out of the loop and # continue # if no transcription returned and API request failed, break # loop and continue # if API request succeeded but no transcription was returned, # re-prompt the user to say their guess again. Do this up # to PROMPT_LIMIT times for j in range(PROMPT_LIMIT): print('Guess {}. Speak!'.format(i+1)) guess = recognize_speech_from_mic(recognizer, microphone) if guess["transcription"]: break if not guess["success"]: break print("I didn't catch that. What did you say?\n") # if there was an error, stop the game if guess["error"]: print("ERROR: {}".format(guess["error"])) break # show the user the transcription print("You said: {}".format(guess["transcription"])) # determine if guess is correct and if any attempts remain guess_is_correct = guess["transcription"].lower() == word.lower() user_has_more_attempts = i < NUM_GUESSES - 1 # determine if the user has won the game # if not, repeat the loop if user has more attempts # if no attempts left, the user loses the game if guess_is_correct: print("Correct! You win!".format(word)) break elif user_has_more_attempts: print("Incorrect. Try again.\n") else: print("Sorry, you lose!\nI was thinking of '{}'.".format(word)) break