How do I manage speech recognition directly with the recognizer?
Last reviewed: 7/8/2022
HOW Article ID: H072202
The information in this article applies to:
- SpeechKit 11
Summary
Optimize speech recognition apps by managing the recognizer directly in applications.
More Information
A speech recognizer converts speech to text for transcription, data entry, or command and control. In addition, events are generated to return recognized speech and indicate processing states.
The Microsoft Speech API (SAPI5) runtime is part of Windows that provides application control of the listening context and events for recognized speech and processing states of a recognizer. Microsoft includes a speech recognizer in many Windows SKUs.
Recognizers from other speech technology vendors do not support Microsoft APIs and event processing and provide their own proprietary speech API with SDK and runtimes.
SpeechKit provides common speech recognition management for multiple application scenarios across across the various speech technology APIs by managing speech recognition directly with the recognizer.
SpeechKit includes libraries for the following Speech APIs for speech recognition:
Speech API | Platforms |
---|---|
Apple Speech | ARM, x64, x86 |
Google android.speech | ARM |
Microsoft SAPI 5 | x64, x86 |
Microsoft Speech Platform | x64, x86 |
Microsoft .NET System.Speech | x64, x86 |
Microsoft .NET Microsoft.Speech | x64, x86 |
Microsoft WindowsMedia (UWP) | ARM, x64, x86 |
Microsoft WindowsMedia (WinRT) | x86, x64 |
Libraries for the most popular recognizer speech APIs are included in Chant Developer Workbench. For additional libraries that support different APIs, runtimes, versions, and vendors contact Chant Support.
SpeechKit supports speech recognition with with a single request.
// Create Recognizer
_Recognizer = _SpeechKit.createAndroidRecognizer();
// Start speech recognition from microphone audio source
_Recognizer.startRecognition();
// Stop speech recognition from microphone audio source
_Recognizer.stopRecognition();
// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit.CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit.CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit.CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit.CreateWindowsMediaRecognizer();
// Start speech recognition from microphone audio source
_Recognizer.StartRecognition();
// Stop speech recognition from microphone audio source
_Recognizer.StopRecognition();
// Transcribe from audio file
_Recognizer.TranscribeAudio("myaudio.wav");
// Free the recognizer
_Recognizer.Dispose();
// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit->CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit->CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit->CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit->CreateWindowsMediaRecognizer();
// Start speech recognition from microphone audio source
_Recognizer->StartRecognition();
// Stop speech recognition from microphone audio source
_Recognizer->StopRecognition();
// Transcribe from audio file
_Recognizer->TranscribeAudio(L"myaudio.wav");
// Free the recognizer
delete _Recognizer;
// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit->CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit->CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit->CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit->CreateWindowsMediaRecognizer();
// Start speech recognition from microphone audio source
_Recognizer->StartRecognition();
// Stop speech recognition from microphone audio source
_Recognizer->StopRecognition();
// Transcribe from audio file
_Recognizer->TranscribeAudio("myaudio.wav");
// Free the recognizer
delete _Recognizer;
// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer := _SpeechKit.CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer := _SpeechKit.CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer := _SpeechKit.CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer := _SpeechKit.CreateWindowsMediaRecognizer();
// Start speech recognition from microphone audio source
_Recognizer.StartRecognition();
// Stop speech recognition from microphone audio source
_Recognizer.StopRecognition();
// Transcribe from audio file
_Recognizer.TranscribeAudio('myaudio.wav');
// Free the recognizer
_SpeechKit.Destroy();
// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit.createDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit.createMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit.createSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit.createWindowsMediaRecognizer();
// Start speech recognition from microphone audio source
_Recognizer.startRecognition();
// Stop speech recognition from microphone audio source
_Recognizer.stopRecognition();
// Transcribe from audio file
_Recognizer.transcribeAudio("myaudio.wav");
// Free the recognizer
_Recognizer.dispose();
// Create iOS Recognizer
_recognizer = [_speechKit createiOSRecognizer];
// Start speech recognition from microphone audio source
[_recognizer startRecognition];
// Free the recognizer
[_recognizer dispose];
' Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit.CreateDgnRecognizer()
' Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit.CreateMSPRecognizer()
' Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit.CreateSAPI5Recognizer()
' Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit.CreateWindowsMediaRecognizer()
' Start speech recognition from microphone audio source
_Recognizer.StartRecognition()
' Stop speech recognition from microphone audio source
_Recognizer.StopRecognition()
' Transcribe from audio file
_Recognizer.TranscribeAudio("myaudio.wav")
' Free the recognizer
_Recognizer.Dispose()
To know the progress or state of speech recognition and process the recognized speech, the application processes event callbacks.
public class MainActivity extends AppCompatActivity implements com.speechkit.JChantSpeechKitEvents
{
...
// Set the callback
_Recognizer.setChantSpeechKitEvents(this);
// Register Callbacks for engine init
_Recognizer.registerCallback(ChantSpeechKitCallback.CCSRRecognitionDictation);
...
@Override
public void recognitionDictation(Object o, RecognitionDictationEventArgs recognitionDictationEventArgs)
{
// Display recognized speech
final EditText textBox1 = (EditText) findViewById(R.id.textbox1);
if ((textBox1 != null) && (recognitionDictationEventArgs.getText() != null)) {
textBox1.append( recognitionDictationEventArgs.getText() + "\n" );
}
...
}
}
// Register Event Handler
_Recognizer.RecognitionDictation += Recognizer_RecognitionDictation;
private void Recognizer_RecognitionDictation(object sender, RecognitionDictationEventArgs e)
{
if ((e != null) && (e.Text != string.Empty))
{
textBox1.Text += e.Text;
textBox1.Text += " ";
// Make Visible
textBox1.SelectionStart = textBox1.Text.Length;
}
}
// Register Event Handler
_Recognizer->SetRecognitionDictation(RecognitionDictation);
void CALLBACK RecognitionDictation(void* Sender, CRecognitionDictationEventArgs* Args)
{
CDictationDlg* dlg = (CDictationDlg*)AfxGetApp()->GetMainWnd();
if (dlg != NULL)
{
// Add text in the text box
if ((Args != NULL) && (wcslen(Args->GetText()) > 0))
{
CString sText;
CEdit* pEdit = (CEdit*)dlg->GetDlgItem(IDC_EDIT1);
pEdit->GetWindowText(sText);
sText += Args->GetText();
pEdit->SetWindowText(sText);
// Make Visible
pEdit->SetSel(sText.GetLength(), sText.GetLength());
}
}
}
// Register Event Handler
_Recognizer->SetRecognitionDictation(RecognitionDictation);
void CALLBACK RecognitionDictation(void* Sender, CRecognitionDictationEventArgs* Args)
{
// Add text in the text box
if ((Args != NULL) && (Args->GetText().Length() > 0))
{
Form1->Memo1->Text = Form1->Memo1->Text + Args->GetText();
}
}
// Register event handler
_Recognizer.RecognitionDictation := RecognitionDictation;
procedure TForm1.RecognitionDictation(Sender: TObject; Args: TRecognitionDictationEventArgs);
begin
// Add text in the text box
If ((Args <> nil) and (Length(Args.Text) > 0)) then
begin
Form1.Memo1.Text := Form1.Memo1.Text + Args.Text;
end;
end;
public class Frame1 extends JFrame implements com.speechkit.JChantSpeechKitEvents
// Set the callback
_Recognizer.setChantSpeechKitEvents(this);
// Register Callbacks for receiving recognized speech.
_Recognizer.registerCallback(ChantSpeechKitCallback.CCSRRecognitionDictation);
public void recognitionDictation(Object sender, RecognitionDictationEventArgs args)
{
if ((args != null) && (args.getText() != null))
{
jTextArea1.append(args.getText());
// Make Visible
jTextArea1.setCaretPosition(jTextArea1.getText().length());
}
}
// Set the callback
[_recognizer setDelegate:(id<SPChantRecognizerDelegate>)self];
-(void)recognitionDictation:(NSObject *)sender args:(SPRecognitionDictationEventArgs *)args;
{
NSString* newText = [NSString stringWithFormat:@"%&@%& ", [_textView1 text], [args text]];
[_textView1 setText:newText];
}
Dim WithEvents _Recognizer As NSAPI5Recognizer = Nothing
Private Sub Recognizer_RecognitionDictation(ByVal sender As System.Object, ByVal e As RecognitionDictationEventArgs) Handles _Recognizer.RecognitionDictation
If (e.Text <> String.Empty) Then
textBox1.Text += e.Text
textBox1.Text += " "
' Make Visible
textBox1.SelectionStart = textBox1.Text.Length
End If
End Sub