Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Wit.AI API request #750

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions examples/audio_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@
except sr.RequestError as e:
print("Could not request results from Wit.ai service; {0}".format(e))

# recognize speech using the new Wit.ai <https://wit.ai/docs/http/20240304/#post__dictation_link>
WIT_AI_KEY_NEW = "INSERT WIT.AI API KEY HERE" # Wit.ai keys are 32-character uppercase alphanumeric strings
try:
print("Wit.ai thinks you said " + r.recognize_wit_new(audio, key=WIT_AI_KEY_NEW, api="dictation")) # or api="speech"
except sr.UnknownValueError:
print("Wit.ai could not understand audio")
except sr.RequestError as e:
print("Could not request results from Wit.ai service; {0}".format(e))

# recognize speech using Microsoft Azure Speech
AZURE_SPEECH_KEY = "INSERT AZURE SPEECH API KEY HERE" # Microsoft Speech API keys 32-character lowercase hexadecimal strings
try:
Expand Down
10 changes: 10 additions & 0 deletions examples/extended_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@
except sr.RequestError as e:
print("Could not request results from Wit.ai service; {0}".format(e))

# recognize speech using the new Wit.ai <https://wit.ai/docs/http/20240304/#post__dictation_link>
WIT_AI_KEY_NEW = "INSERT WIT.AI API KEY HERE" # Wit.ai keys are 32-character uppercase alphanumeric strings
try:
print("Wit.ai recognition results:")
pprint(r.recognize_wit_new(audio, key=WIT_AI_KEY_NEW, show_all=True, api="dictation")) # or api="speech"
except sr.UnknownValueError:
print("Wit.ai could not understand audio")
except sr.RequestError as e:
print("Could not request results from Wit.ai service; {0}".format(e))

# recognize speech using Microsoft Bing Voice Recognition
BING_KEY = "INSERT BING API KEY HERE" # Microsoft Bing Voice Recognition API keys 32-character lowercase hexadecimal strings
try:
Expand Down
56 changes: 53 additions & 3 deletions speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import tempfile
import threading
import time
import re
import uuid
import wave
from urllib.error import HTTPError, URLError
Expand Down Expand Up @@ -763,7 +764,7 @@ def recognize_wit(self, audio_data, key, show_all=False):
convert_rate=None if audio_data.sample_rate >= 8000 else 8000, # audio samples must be at least 8 kHz
convert_width=2 # audio samples should be 16-bit
)
url = "https://api.wit.ai/speech?v=20170307"
url = "https://api.wit.ai/speech?v=20210926" # The last version of Wit.AI API that doesn't return multiple json responses (and it's not deprecated)
request = Request(url, data=wav_data, headers={"Authorization": "Bearer {}".format(key), "Content-Type": "audio/wav"})
try:
response = urlopen(request, timeout=self.operation_timeout)
Expand All @@ -776,8 +777,57 @@ def recognize_wit(self, audio_data, key, show_all=False):

# return results
if show_all: return result
if "_text" not in result or result["_text"] is None: raise UnknownValueError()
return result["_text"]
if "text" not in result or result["text"] is None: raise UnknownValueError()
return result["text"]

def recognize_wit_new(self, audio_data, key, show_all=False, api="dictation"):
"""
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Wit.ai API.

The Wit.ai API key is specified by ``key``. Unfortunately, these are not available without `signing up for an account <https://wit.ai/>`__ and creating an app. You will need to add at least one intent to the app before you can see the API key, though the actual intent settings don't matter.

To get the API key for a Wit.ai app, go to the app's overview page, go to the section titled "Make an API request", and look for something along the lines of ``Authorization: Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX``; ``XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX`` is the API key. Wit.ai API keys are 32-character uppercase alphanumeric strings.

The recognition language is configured in the Wit.ai app settings.

Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response <https://wit.ai/docs/http/20141022#get-intent-via-text-link>`__ as a JSON dictionary.

Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.

Two possible string can be passed as "api" variable: "dictation" (default) and "speech". The version can be added
like (speech?v=20240304), otherwise Wit.AI will use the latest automatically
"""
assert isinstance(audio_data, AudioData), "Data must be audio data"
assert isinstance(key, str), "``key`` must be a string"
assert isinstance(api, str), "``api`` must be a string"

wav_data = audio_data.get_wav_data(
convert_rate=None if audio_data.sample_rate >= 8000 else 8000, # audio samples must be at least 8 kHz
convert_width=2 # audio samples should be 16-bit
)

url = "https://api.wit.ai/" + api
request = Request(url, data=wav_data, headers={"Authorization": "Bearer {}".format(key), "Content-Type": "audio/wav"})
try:
response = urlopen(request, timeout=self.operation_timeout)
except HTTPError as e:
raise RequestError("recognition request failed: {}".format(e.reason))
except URLError as e:
raise RequestError("recognition connection failed: {}".format(e.reason))

response_text = response.read().decode("utf-8")
concat_json = re.sub("\n}\r\n{\n", "\n},\n{\n", response_text)
concat_json_str = f"[{concat_json}]"
results = json.loads(concat_json_str)

# return results
if show_all: return results
for result in results:
if result["type"] == "FINAL_TRANSCRIPTION":
if "text" not in result or result["text"] is None or result["text"] == '': raise UnknownValueError()
return result["text"]

return None # If you reach here there are problem with the API response

def recognize_azure(self, audio_data, key, language="en-US", profanity="masked", location="westus", show_all=False):
"""
Expand Down