How do I manage speech recognition directly with the recognizer?

Last reviewed: 7/8/2022

HOW Article ID: H072202

The information in this article applies to:

  • SpeechKit 11

Summary

Optimize speech recognition apps by managing the recognizer directly in applications.

More Information

A speech recognizer converts speech to text for transcription, data entry, or command and control. In addition, events are generated to return recognized speech and indicate processing states.

The Microsoft Speech API (SAPI5) runtime is part of Windows that provides application control of the listening context and events for recognized speech and processing states of a recognizer. Microsoft includes a speech recognizer in many Windows SKUs.

Recognizers from other speech technology vendors do not support Microsoft APIs and event processing and provide their own proprietary speech API with SDK and runtimes.

SpeechKit provides common speech recognition management for multiple application scenarios across across the various speech technology APIs by managing speech recognition directly with the recognizer.

SpeechKit includes libraries for the following Speech APIs for speech recognition:

Speech APIPlatforms
Apple SpeechARM, x64, x86
Google android.speechARM
Microsoft SAPI 5x64, x86
Microsoft Speech Platformx64, x86
Microsoft .NET System.Speechx64, x86
Microsoft .NET Microsoft.Speechx64, x86
Microsoft WindowsMedia (UWP)ARM, x64, x86
Microsoft WindowsMedia (WinRT)x86, x64

Libraries for the most popular recognizer speech APIs are included in Chant Developer Workbench. For additional libraries that support different APIs, runtimes, versions, and vendors contact Chant Support.

SpeechKit supports speech recognition with with a single request.


// Create Recognizer
_Recognizer = _SpeechKit.createAndroidRecognizer();

// Start speech recognition from microphone audio source
_Recognizer.startRecognition();

// Stop speech recognition from microphone audio source
_Recognizer.stopRecognition();

// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit.CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit.CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit.CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit.CreateWindowsMediaRecognizer();

// Start speech recognition from microphone audio source
_Recognizer.StartRecognition();

// Stop speech recognition from microphone audio source
_Recognizer.StopRecognition();

// Transcribe from audio file
_Recognizer.TranscribeAudio("myaudio.wav");

// Free the recognizer
_Recognizer.Dispose();

// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit->CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit->CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit->CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit->CreateWindowsMediaRecognizer();

// Start speech recognition from microphone audio source
_Recognizer->StartRecognition();

// Stop speech recognition from microphone audio source
_Recognizer->StopRecognition();

// Transcribe from audio file
_Recognizer->TranscribeAudio(L"myaudio.wav");

// Free the recognizer
delete _Recognizer;
    

// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit->CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit->CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit->CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit->CreateWindowsMediaRecognizer();

// Start speech recognition from microphone audio source
_Recognizer->StartRecognition();

// Stop speech recognition from microphone audio source
_Recognizer->StopRecognition();

// Transcribe from audio file
_Recognizer->TranscribeAudio("myaudio.wav");

// Free the recognizer
delete _Recognizer;

// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer := _SpeechKit.CreateDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer := _SpeechKit.CreateMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer := _SpeechKit.CreateSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer := _SpeechKit.CreateWindowsMediaRecognizer();

// Start speech recognition from microphone audio source
_Recognizer.StartRecognition();

// Stop speech recognition from microphone audio source
_Recognizer.StopRecognition();

// Transcribe from audio file
_Recognizer.TranscribeAudio('myaudio.wav');

// Free the recognizer
_SpeechKit.Destroy();
    

// Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit.createDgnRecognizer();
// Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit.createMSPRecognizer();
// Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit.createSAPI5Recognizer();
// Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit.createWindowsMediaRecognizer();

// Start speech recognition from microphone audio source
_Recognizer.startRecognition();

// Stop speech recognition from microphone audio source
_Recognizer.stopRecognition();

// Transcribe from audio file
_Recognizer.transcribeAudio("myaudio.wav");

// Free the recognizer
_Recognizer.dispose();

// Create iOS Recognizer
_recognizer = [_speechKit createiOSRecognizer];

// Start speech recognition from microphone audio source
[_recognizer startRecognition];

// Free the recognizer
[_recognizer dispose];

' Create Nuance Dragon NaturallySpeaking Recognizer
_Recognizer = _SpeechKit.CreateDgnRecognizer()
' Or, Create Microsoft Speech Platform Recognizer
_Recognizer = _SpeechKit.CreateMSPRecognizer()
' Or, Create Microsoft SAPI5 (Desktop) Recognizer
_Recognizer = _SpeechKit.CreateSAPI5Recognizer()
' Or, Create Microsoft WindowsMedia Recognizer
_Recognizer = _SpeechKit.CreateWindowsMediaRecognizer()

' Start speech recognition from microphone audio source
_Recognizer.StartRecognition()

' Stop speech recognition from microphone audio source
_Recognizer.StopRecognition()

' Transcribe from audio file
_Recognizer.TranscribeAudio("myaudio.wav")

' Free the recognizer
_Recognizer.Dispose()

To know the progress or state of speech recognition and process the recognized speech, the application processes event callbacks.


public class MainActivity extends AppCompatActivity implements com.speechkit.JChantSpeechKitEvents
{
        ...
        // Set the callback
        _Recognizer.setChantSpeechKitEvents(this);
        // Register Callbacks for engine init
        _Recognizer.registerCallback(ChantSpeechKitCallback.CCSRRecognitionDictation);
        ...
    @Override
    public void recognitionDictation(Object o, RecognitionDictationEventArgs recognitionDictationEventArgs)
    {
        // Display recognized speech
        final EditText textBox1 = (EditText) findViewById(R.id.textbox1);
        if ((textBox1 != null) && (recognitionDictationEventArgs.getText() != null)) {
            textBox1.append( recognitionDictationEventArgs.getText() + "\n" );
        }
        ...
   }
}

// Register Event Handler
_Recognizer.RecognitionDictation += Recognizer_RecognitionDictation;

private void Recognizer_RecognitionDictation(object sender, RecognitionDictationEventArgs e)
    {
        if ((e != null) && (e.Text != string.Empty))
        {
            textBox1.Text += e.Text;
            textBox1.Text += " ";
            // Make Visible
            textBox1.SelectionStart = textBox1.Text.Length;
        }
    }

// Register Event Handler
_Recognizer->SetRecognitionDictation(RecognitionDictation);

void CALLBACK RecognitionDictation(void* Sender, CRecognitionDictationEventArgs* Args)
{
	CDictationDlg* dlg = (CDictationDlg*)AfxGetApp()->GetMainWnd();
	if (dlg != NULL)
	{
		// Add text in the text box
		if ((Args != NULL) && (wcslen(Args->GetText()) > 0))
		{
			CString sText;
			CEdit* pEdit = (CEdit*)dlg->GetDlgItem(IDC_EDIT1);
			pEdit->GetWindowText(sText);
			sText += Args->GetText();
			pEdit->SetWindowText(sText);
			// Make Visible
			pEdit->SetSel(sText.GetLength(), sText.GetLength());
		}
	}
}
    

// Register Event Handler
_Recognizer->SetRecognitionDictation(RecognitionDictation);

void CALLBACK RecognitionDictation(void* Sender, CRecognitionDictationEventArgs* Args)
{
	// Add text in the text box
	if ((Args != NULL) && (Args->GetText().Length() > 0))
	{
		Form1->Memo1->Text = Form1->Memo1->Text + Args->GetText();
	}
}

// Register event handler
_Recognizer.RecognitionDictation := RecognitionDictation;

procedure TForm1.RecognitionDictation(Sender: TObject; Args: TRecognitionDictationEventArgs);
begin
    // Add text in the text box
    If ((Args <> nil) and (Length(Args.Text) > 0)) then
    begin
      Form1.Memo1.Text := Form1.Memo1.Text + Args.Text;
    end;
end;
    

public class Frame1 extends JFrame implements com.speechkit.JChantSpeechKitEvents

// Set the callback
_Recognizer.setChantSpeechKitEvents(this);
// Register Callbacks for receiving recognized speech.
_Recognizer.registerCallback(ChantSpeechKitCallback.CCSRRecognitionDictation);

public void recognitionDictation(Object sender, RecognitionDictationEventArgs args)
{
    if ((args != null) && (args.getText() != null))
    {
		jTextArea1.append(args.getText());
		// Make Visible
		jTextArea1.setCaretPosition(jTextArea1.getText().length());
	}
}

// Set the callback
[_recognizer setDelegate:(id<SPChantRecognizerDelegate>)self];

-(void)recognitionDictation:(NSObject *)sender args:(SPRecognitionDictationEventArgs *)args;
{
    NSString* newText = [NSString stringWithFormat:@"%&@%& ", [_textView1 text], [args text]];
    [_textView1 setText:newText];
}

Dim WithEvents _Recognizer As NSAPI5Recognizer = Nothing

Private Sub Recognizer_RecognitionDictation(ByVal sender As System.Object, ByVal e As RecognitionDictationEventArgs) Handles _Recognizer.RecognitionDictation
    If (e.Text <> String.Empty) Then
        textBox1.Text += e.Text
        textBox1.Text += " "
        ' Make Visible
        textBox1.SelectionStart = textBox1.Text.Length
    End If
End Sub