diff --git a/examples/audio_transcribe.py b/examples/audio_transcribe.py index 7806023f..d4c36b0e 100644 --- a/examples/audio_transcribe.py +++ b/examples/audio_transcribe.py @@ -50,6 +50,15 @@ except sr.RequestError as e: print("Could not request results from Wit.ai service; {0}".format(e)) +# recognize speech using the new Wit.ai +WIT_AI_KEY_NEW = "INSERT WIT.AI API KEY HERE" # Wit.ai keys are 32-character uppercase alphanumeric strings +try: + print("Wit.ai thinks you said " + r.recognize_wit_new(audio, key=WIT_AI_KEY_NEW, api="dictation")) # or api="speech" +except sr.UnknownValueError: + print("Wit.ai could not understand audio") +except sr.RequestError as e: + print("Could not request results from Wit.ai service; {0}".format(e)) + # recognize speech using Microsoft Azure Speech AZURE_SPEECH_KEY = "INSERT AZURE SPEECH API KEY HERE" # Microsoft Speech API keys 32-character lowercase hexadecimal strings try: diff --git a/examples/extended_results.py b/examples/extended_results.py index 599c67f2..b1d2168f 100644 --- a/examples/extended_results.py +++ b/examples/extended_results.py @@ -56,6 +56,16 @@ except sr.RequestError as e: print("Could not request results from Wit.ai service; {0}".format(e)) +# recognize speech using the new Wit.ai +WIT_AI_KEY_NEW = "INSERT WIT.AI API KEY HERE" # Wit.ai keys are 32-character uppercase alphanumeric strings +try: + print("Wit.ai recognition results:") + pprint(r.recognize_wit_new(audio, key=WIT_AI_KEY_NEW, show_all=True, api="dictation")) # or api="speech" +except sr.UnknownValueError: + print("Wit.ai could not understand audio") +except sr.RequestError as e: + print("Could not request results from Wit.ai service; {0}".format(e)) + # recognize speech using Microsoft Bing Voice Recognition BING_KEY = "INSERT BING API KEY HERE" # Microsoft Bing Voice Recognition API keys 32-character lowercase hexadecimal strings try: diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py index 8e68c30d..acb261e6 100644 --- a/speech_recognition/__init__.py +++ b/speech_recognition/__init__.py @@ -19,6 +19,7 @@ import tempfile import threading import time +import re import uuid import wave from urllib.error import HTTPError, URLError @@ -763,7 +764,7 @@ def recognize_wit(self, audio_data, key, show_all=False): convert_rate=None if audio_data.sample_rate >= 8000 else 8000, # audio samples must be at least 8 kHz convert_width=2 # audio samples should be 16-bit ) - url = "https://api.wit.ai/speech?v=20170307" + url = "https://api.wit.ai/speech?v=20210926" # The last version of Wit.AI API that doesn't return multiple json responses (and it's not deprecated) request = Request(url, data=wav_data, headers={"Authorization": "Bearer {}".format(key), "Content-Type": "audio/wav"}) try: response = urlopen(request, timeout=self.operation_timeout) @@ -776,8 +777,57 @@ def recognize_wit(self, audio_data, key, show_all=False): # return results if show_all: return result - if "_text" not in result or result["_text"] is None: raise UnknownValueError() - return result["_text"] + if "text" not in result or result["text"] is None: raise UnknownValueError() + return result["text"] + + def recognize_wit_new(self, audio_data, key, show_all=False, api="dictation"): + """ + Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Wit.ai API. + + The Wit.ai API key is specified by ``key``. Unfortunately, these are not available without `signing up for an account `__ and creating an app. You will need to add at least one intent to the app before you can see the API key, though the actual intent settings don't matter. + + To get the API key for a Wit.ai app, go to the app's overview page, go to the section titled "Make an API request", and look for something along the lines of ``Authorization: Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX``; ``XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX`` is the API key. Wit.ai API keys are 32-character uppercase alphanumeric strings. + + The recognition language is configured in the Wit.ai app settings. + + Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response `__ as a JSON dictionary. + + Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection. + + Two possible string can be passed as "api" variable: "dictation" (default) and "speech". The version can be added + like (speech?v=20240304), otherwise Wit.AI will use the latest automatically + """ + assert isinstance(audio_data, AudioData), "Data must be audio data" + assert isinstance(key, str), "``key`` must be a string" + assert isinstance(api, str), "``api`` must be a string" + + wav_data = audio_data.get_wav_data( + convert_rate=None if audio_data.sample_rate >= 8000 else 8000, # audio samples must be at least 8 kHz + convert_width=2 # audio samples should be 16-bit + ) + + url = "https://api.wit.ai/" + api + request = Request(url, data=wav_data, headers={"Authorization": "Bearer {}".format(key), "Content-Type": "audio/wav"}) + try: + response = urlopen(request, timeout=self.operation_timeout) + except HTTPError as e: + raise RequestError("recognition request failed: {}".format(e.reason)) + except URLError as e: + raise RequestError("recognition connection failed: {}".format(e.reason)) + + response_text = response.read().decode("utf-8") + concat_json = re.sub("\n}\r\n{\n", "\n},\n{\n", response_text) + concat_json_str = f"[{concat_json}]" + results = json.loads(concat_json_str) + + # return results + if show_all: return results + for result in results: + if result["type"] == "FINAL_TRANSCRIPTION": + if "text" not in result or result["text"] is None or result["text"] == '': raise UnknownValueError() + return result["text"] + + return None # If you reach here there are problem with the API response def recognize_azure(self, audio_data, key, language="en-US", profanity="masked", location="westus", show_all=False): """