How do I search audio for specific words, phrases, and subphrases?

Last reviewed: 7/8/2022

HOW Article ID: H072216

The information in this article applies to:

  • Audio Search 3

Summary

Search audio recording for specific words, phrases, and subphrases to obtain the offset positions in the audio file.

More Information

Chant Developer Workbench 2022 includes Audio Search application helper to simplify the process of searching recorded speech for specific words, phrases, and subphrases.

There are two types of audio searches supported: transcription and word spotting.

With transcription searches, an audio recording is transcribed first. Then one or more searches may be performed with simple string or Regular Expression pattern search terms. Matches are returned as offsets in seconds.

With word spotting searches, the audio is word spotted with a list of keywords (i.e., single words or phrases) first. Then one or more searches may be performed to match a keyword originally used in the word spotting. Matches are returned as offsets in seconds.


// Instantiate AudioSearch
NAudioSearch _AudioSearch = new NAudioSearch();
if (_AudioSearch != null)
{
    // Set credentials
    _AudioSearch.SetCredentials("Credentials");
    NSAPI5Recognizer _Recognizer = _AudioSearch.CreateSAPI5Recognizer();
    if (_Recognizer != null)
    {
        _Recognizer.Transcribe("myaudio.wav");
        int type = (int)ChantAudioSearchType.ASTPhrase; 
        int options = (int)ChantAudioSearchOption.ASOExactMatch; 
        // Get time offsets in seconds ss.mmm
        List<double> results = _Recognizer.Find("grocery", type, options);
        ...
        _Recognizer.WordSpot("myaudio.wav", "grocery, gas station, post office, bank");
        // Get time offsets in seconds ss.mmm
        results = _Recognizer.Find("grocery");
    }
}

// Instantiate AudioSearch object
CAudioSearch* _AudioSearch = new CAudioSearch();
if (_AudioSearch =! NULL)
{
	// Set credentials
	_AudioSearch->SetCredentials(L"Credentials");
	// Create recognizer
	CSAPI5Recognizer* _Recognizer = _AudioSearch->CreateSAPI5Recognizer();
	if (_Recognizer != NULL)
	{
        _Recognizer->Transcribe(L"myaudio.wav");
        int type = ASTPhrase; 
        int options = ASOExactMatch; 
        // Get time offsets in seconds ss.mmm
        vector<double> results = _Recognizer->Find(L"grocery", type, options);
        ...
        _Recognizer->WordSpot(L"myaudio.wav", L"grocery, gas station, post office, bank");
        // Get time offsets in seconds ss.mmm
        results = _Recognizer->Find(L"grocery");
	}
}

// Instantiate AudioSearch object
CAudioSearch* _AudioSearch = new CAudioSearch();
if (_AudioSearch =! NULL)
{
	// Set credentials
	_AudioSearch->SetCredentials("Credentials");
	// Create recognizer
	CSAPI5Recognizer* _Recognizer = _AudioSearch->CreateSAPI5Recognizer();
	if (_Recognizer != NULL)
	{
        _Recognizer->Transcribe("myaudio.wav");
        int type = ASTPhrase; 
        int options = ASOExactMatch; 
        // Get time offsets in seconds ss.mmm
        vector<double> results = _Recognizer->Find("grocery", type, options);
        ...
        _Recognizer->WordSpot("myaudio.wav", "grocery, gas station, post office, bank");
        // Get time offsets in seconds ss.mmm
        results = _Recognizer->Find("grocery");
	}
}

var
  _AudioSearch: TAudioSearch;
  _Recognizer: TSAPI5Recognizer;
  results: TList<Double>
  findType: Integer;
  findOptions: Integer;
begin
    // Instantiate AudioSearch object
    _AudioSearch := TAudioSearch.Create();
    if (_AudioSearch <> nil) then
    begin
        // Set credentials
        _AudioSearch.SetCredentials('Credentials');
        // Create recognizer
        _Recognizer := _AudioSearch.CreateSAPI5Recognizer();
        if (_Recognizer <> nil) then
        begin
            _Recognizer.Transcribe('myaudio.wav');
            findType = ASTPhrase; 
            findOptions = ASOExactMatch; 
            // Get time offsets in seconds ss.mmm
            results = _Recognizer.Find('grocery', findType, findOptions);
            ...
            _Recognizer.WordSpot('myaudio.wav', 'grocery, gas station, post office, bank');
            // Get time offsets in seconds ss.mmm
            results = _Recognizer.Find('grocery');
        end;
    end;
end;

// Create AudioSearch object
JAudioSearch _AudioSearch = new JAudioSearch();
// Set credentials
_AudioSearch.setCredentials("Credentials");
JSAPI5Recognizer _Recognizer = _AudioSearch.createSAPI5Recognizer();
if (_Recognizer != null)
{
        _Recognizer.transcribe("myaudio.wav");
        int type = ChantAudioSearchType.ASTPhrase; 
        int options = ChantAudioSearchOption.ASOExactMatch; 
        // Get time offsets in seconds ss.mmm
        ArrayList<Double> results = _Recognizer.find("grocery", type, options);
        ...
        _Recognizer.wordSpot("myaudio.wav", "grocery, gas station, post office, bank");
        // Get time offsets in seconds ss.mmm
        results = _Recognizer.find("grocery");
}

Dim _AudioSearch As NAudioSearch
Dim _Recognizer As NSAPI5Recognizer
Dim results As List(Of Double)
Dim type As Integer
Dim options As Integer
    ' Instantiate AudioSearch
    _AudioSearch = New NAudioSearch()
    If (_AudioSearch IsNot Nothing) Then
        ' Set credentials
        _AudioSearch.SetCredentials("Credentials")
        _Recognizer = _AudioSearch.CreateSAPI5Recognizer()
        If (_Recognizer IsNot Nothing) Then
            _Recognizer.Transcribe("myaudio.wav")
            type = ChantAudioSearchType.ASTPhrase
            options = ChantAudioSearchOption.ASOExactMatch
            // Get time offsets in seconds ss.mmm
            results = _Recognizer.Find("grocery", type, options)
            ...
            _Recognizer.WordSpot("myaudio.wav", "grocery, gas station, post office, bank")
            // Get time offsets in seconds ss.mmm
            results = _Recognizer.Find("grocery")
        End If
    End If

Syntax Helpers

The search type value may be specified with one of ChantAudioSearchType constants:

  • ASTPhrase
  • ASTRegEx

The search options may be specified with one or more ChantAudioSearchOption constants:

  • ASOPartialMatch
  • ASOExactMatch
  • ASOCaseSensitive

Development and Deployment

Audio Search applications require the Audio Search library and the applicable SpeechKit Speech API library:

  • C++Builder, C++, and Delphi applications require the Audio Search library (CAudioSearch.dll or CAudioSearchX64.dll) and the applicable SpeechKit Speech API library in the same directory as the application .exe.
  • Java applications require the audiosearch.jar and chant.shared.jar in the target system Java JRE lib directory and/or ensure the classpath includes the path where the talklisten.jar and chant.shared.jar libraries are placed on the target system. The Audio Search library (JAudioSearch.dll or JAudioSearchX64.dll) and the applicable SpeechKit Speech API library must be in the target system Java JRE bin directory.
  • C# and VB .NET applications require the assembly library (Chant.AudioSearch.Windows) embedded in the application or located in the same directory as the application .exe. The Audio Search library (NAudioSearch.dll or NAudioSearchX64.dll) must be registered as a COM library on the target system and the applicable SpeechKit Speech API library in the same directory as the application .exe.

Speech API SpeechKit Speech API class SpeechKit Speech API library
Microsoft SAPI 5xSAPI5RecognizerCSpeechKit.SAPI5.dll or CSpeechKitX64.SAPI5.dll
Microsoft Speech PlatformxMSPRecognizerCSpeechKit.MSP.dll or CSpeechKitX64.MSP.dll
Nuance Dragon NaturallySpeakingxDgnRecognizerCSpeechKit.Dgn.dll or CSpeechKitX64.Dgn.dll

where x is C for C++Builder and C++; T for Delphi; J for Java; and N for .NET.