Last reviewed: 3/23/2024 10:11:25 AM

Event Handling

To track synthesis operations, applications receive event notifications. Event availability varies among Speech APIs.

EventEvent ArgumentsDescription
ApiErrorChantAPIErrorEventArgsNotifies the application of an API error
AudioDestStartAudioEventArgsNotifies the application the synthesizer has started audio playback or writing to the file
AudioDestStopAudioEventArgsNotifies the application the synthesizer has stopped audio playback or writing to the file
BeginSynthesisBeginSynthesisEventArgsNotifies the application that speech synthesis has begun
DoneTTSEventArgsNotifies the application the synthesizer has completed synthesizing speech audio from text
InitCompleteTTSEventArgsNotifies the application that speech engine enumeration is complete
PauseTTSEventArgsNotifies the application the synthesizer request was paused.
PhonemePhonemeEventArgsNotifies the application the synthesizer with a phoneme of the current synthesized speech audio
PlaybackFailedTTSEventArgsNotifies the application that a synthesis error occured
RangeStartRangeStartEventArgsNotifies the application the synthesizer is processing a text range
ResumeTTSEventArgsNotifies the application the synthesizer request was resumed.
SentenceBoundarySentenceBoundaryEventArgsNotifies the application the synthesizer has generated a sentence boundary in the current synthesized speech audio
StartedTTSEventArgsNotifies the application the synthesizer has started processing
StoppedTTSEventArgsNotifies the application the synthesizer has stopped processing
TTSBookMarkTTSBookMarkEventArgsNotifies the application the synthesizer has detected a bookmark in the current audio
TTSCancelCancelEventArgsNotifies the application the synthesis request was canceled
VisemeVisemeEventArgsNotifies the application the synthesizer has generated a Viseme for the current audio
VisualVisualEventArgsNotifies the application the synthesizer has generated a LIPSYNC for the current audio
VoiceChangeTTSEventArgsNotifies the application the synthesizer has detected a voice change
WordPositionWordPositionEventArgsNotifies the application of the word position in the source text for the current audio

Some events provide data values that are returned in argument objects. Argument data availability varies among Speech APIs.

  • AudioEventArgs
    • File - File name
    • AndroidAudioEventArgs
    • AVFAudioEventArgs
    • MCSAudioDestEventArgs
      • ResultId - Result identifier
    • WindowsMediaAudioEventArgs
    • WindowsAudioEventArgs
      • AudioStreamOffset - Audio stream offset
      • AudioTimeOffset - Audio time offset
  • BeginSynthesisEventArgs
    • UtteranceId - Utterance identifier
    • SampleRateInHz - Sample rate in hertz
    • AudioFormat - Audio format
    • ChannelCount - Channel count
  • CancelEventArgs
    • AVFCancelEventArgs
      • Text - Synthesized speech text
    • MCSCancelEventArgs
      • ErrorCode - Cancel error code
      • ErrorDetails - Cancel error details
      • Reason - Cancel reason
      • MCSTTSCancelEventArgs
        • ResultId - Result identifier
  • ChantAPIErrorEventArgs
    • Function - API funtion name
    • Message - API error message
    • RC - API error return code
  • PhonemeEventArgs
    • CurrentPhonemeID - Phoneme ID of the current audio
    • CurrentPhoneme - Phoneme of the current audio
    • NextPhonemeID - Phoneme ID of the upcoming audio
    • NextPhoneme - Phoneme of the upcoming audio
    • Duration - Playback duration of the current phoneme
    • Hints - Hints of the current phoneme
    • AudioStreamOffset - Audio stream offset
    • AudioTimeOffset - Audio time offset
  • PlaybackEventArgs
    • AndroidPlaybackEventArgs
      • Text - Synthesized speech text
      • UtteranceId - Utterance identifier
    • WindowsMediaPlaybackEventArgs
      • Error - Playback error
      • ErrorMessage - Playback error message
      • ExtendedErrorCode - Playback extended error
  • RangeStartEventArgs
    • AndroidRangeStartEventArgs
      • UtteranceId - Utterance identifier
      • Start - Range start
      • End - Range end
      • Frame - Range frame
    • AVFRangeStartEventArgs
      • Utterance - Utterance identifier
      • Location - Range start
      • Length - Range length
  • SentenceBoundaryEventArgs
    • Length - Synthesized speech text
    • Position - Synthesized text position
    • MCSSentenceBoundaryEventArgs
      • Text - Word text
      • ResultId - Result identifier
      • AudioOffset - Audio stream offset
      • Duration - Audio time length
    • WindowsSentenceBoundaryEventArgs
      • AudioStreamOffset - Audio stream offset
      • AudioTimeOffset - Audio time offset
    • WindowsMediaSentenceBoundaryEventArgs
      • StartTime - Audio time offset
  • TTSBookMarkEventArgs
    • Text - Bookmark text
    • MCSTTSBookMarkEventArgs
      • AudioOffset - Audio stream offset
      • ResultId - Result identifier
    • WindowsMediaTTSBookMarkEventArgs
      • MarkID - Bookmark identifier
      • MarkString - Bookmark value
      • AudioStreamOffset - Audio stream offset
      • AudioTimeOffset - Audio time offset
    • WindowsTTSBookMarkEventArgs
      • StartTime - Audio time offset
  • TTSEventArgs
    • Text - Synthesized speech text
    • AndroidTTSEventArgs
      • UtteranceId - Utterance Id
      • Interrupted - Synthesis interrupted when stopped
      • ErrorCode - Error code
    • AVFTTSEventArgs
    • MCSTTSEventArgs
      • AudioDuration - Audio duration
      • ResultId - Result identifier
    • WindowsTTSEventArgs
      • AudioStreamOffset - Audio stream offset
      • AudioTimeOffset - Audio time offset
    • WindowsMediaTTSEventArgs
  • VisemeEventArgs
    • MCSVisemeEventArgs
      • Animation - Viseme animation
      • AudioOffset - Audio offset
      • ResultId - Result identifier
      • VisemeId - Vismeme identifier
    • WindowsVisemeEventArgs
      • CurrentSAPIViseme - SAPI Viseme for the current audio
      • NextSAPIViseme - SAPI Viseme for upcoming audio
      • CurrentDisneyViseme - Disney Viseme for the current audio
      • NextDisneyViseme - Disney Vismeme for upcoming audio
      • Duration - Playback duration of the current Viseme
      • Hints - Hints for the current Viseme
      • AudioStreamOffset - Audio stream offset
      • AudioTimeOffset - Audio time offset
  • VisualEventArgs
    • JawOpen - The JawOpen position
    • TeethUpVisible - The TeethUpVisible position
    • TeethLoVisible - The TeethLoVisible position
    • MouthHeight - The MouthHeight position
    • MouthWidth - The MouthWidth position
    • MouthUpturn - The MouthUpturn position
    • TonguePos - The TonguePos position
    • LipTension - The LipTension position
    • AudioStreamOffset - Audio stream offset
    • AudioTimeOffset - Audio time offset
  • WordPositionEventArgs
    • Length - Word length in the current audio
    • Position - Word position in the current audio
    • MCSWordPositionEventArgs
      • ResultId - Result identifier
      • Text - Word text
      • AudioOffset - Word offset in the current audio
      • BoundaryType - Word boundary type
      • Duration - Word duration
    • WindowsWordPositionEventArgs
      • AudioStreamOffset - Audio stream offset
      • AudioTimeOffset - Audio time offset
    • WindowsMediaWordPositionEventArgs
      • StartTime - Audio time offset

Event notifications are recieved in callback routines as follows:


_Synthesizer = _SpeechKit.createChantSynthesizer();
if (_Synthesizer != null)
{
    // Set the callback object
    _Synthesizer.setChantSpeechKitEvents(this);
    // Register for callbacks
    _Synthesizer.registerCallback(ChantSpeechKitCallback.CCTTSInitComplete);
}

_Synthesizer = _SpeechKit.CreateChantSynthesizer();
if (_Synthesizer != null)
{
    _Synthesizer.WordPosition += Synthesizer_WordPosition;
}

_Synthesizer = _SpeechKit->CreateChantSynthesizer();
if (_Synthesizer != NULL)
{
    // Register Event Handlers
    _Synthesizer->SetWordPosition(WordPosition);
}
    

_Synthesizer = _SpeechKit->CreateChantSynthesizer();
if (_Synthesizer != NULL)
{
    // Register Event Handlers
    _Synthesizer->SetWordPosition(WordPosition);
}

_Synthesizer := _SpeechKit.CreateChantSynthesizer();
if (_Synthesizer <> nil) then
begin
    // Register Event Handlers
    _Synthesizer.WordPosition := WordPosition;
end;
    

_Synthesizer = _SpeechKit.createChantSynthesizer();
if (_Synthesizer != null)
{
    // Set the callback object
    _Synthesizer.setChantSpeechKitEvents(this);
    // Register for callbacks
    _Synthesizer.registerCallback(ChantSpeechKitCallback.CCTTSWordPosition);
}

_synthesizer = [_speechKit createChantSynthesizer];
if (_synthesizer != nil)
{
    [_synthesizer setDelegate:(id<SPChantSynthesizerDelegate>)self];
}

_Synthesizer = _SpeechKit!.createChantSynthesizer()
if (_Synthesizer != nil)
{
    _Synthesizer!.delegate = self
}

_Synthesizer = _SpeechKit.CreateChantSynthesizer()
// Declaring the event handlers routines with Handles clause in VB automatically registers for the event notifications
Private Sub Synthesizer_WordPosition(ByVal sender As System.Object, ByVal e As WordPositionEventArgs) Handles _Synthesizer.WordPosition
    

The synthesizer object sends all notifications to the event handlers. All event data is contained in a arguments object.


                @Override
public void initComplete(Object o, TTSEventArgs ttsEventArgs)
{
    if (_Synthesizer.getChantEngines() != null)
    {
        for (JChantEngine engine : _Synthesizer.getChantEngines())
        {
            // Add name to list
            _Engines.add(engine.getName());
        }
    }
    ...
}
    

private void Synthesizer_WordPosition(object sender, WordPositionEventArgs e)
{
    if (e != null)
    {
        int startPosition = e.Position;
        int wordLength = e.Length;
        ...
    }
}
    

void CALLBACK WordPosition(void* Sender, CWordPositionEventArgs* Args)
{
    if (Args != NULL) 
    {
        int startPosition = Args->GetPosition();
        int wordLength = Args->GetLength();
        ...
    }
}
    

void WordPosition(void* Sender, CWordPositionEventArgs* Args)
{
    if (Args != NULL) 
    {
        int startPosition = Args->GetPosition();
        int wordLength = Args->GetLength();
        ...
    }
}
    

procedure TForm1.WordPosition(Sender: TObject; Args: TWordPositionEventArgs);
var
  startPosition: Integer;
  wordLength: Integer;
begin
    If (Args <> nil) then
    begin
      startPosition := args.Position;
      wordLength := args.Length;
      ...
    end;
end;
    

public void wordPosition(Object sender, WordPositionEventArgs args)
{
    if (args != null)
    {
        int startPosition = args.getPosition();
        int wordLength = args.getLength();
        ...
    }
}

-(void)rangeStart:(NSObject*)sender args:(SPRangeStartEventArgs*)args
{
    [_textView1 setSelectedRange:NSMakeRange([args location], [args length])];
}

func rangeStart(sender: SPChantSynthesizer, args: SPRangeStartEventArgs)
{
    self.textView1.selectedRange = (NSRange(location: args.location, length: args.length))
}
    

Private Sub Synthesizer_WordPosition(ByVal sender As System.Object, ByVal e As WordPositionEventArgs) Handles _Synthesizer.WordPosition
    Dim startPosition As Integer
    Dim wordLength As Integer
    If (e IsNot Nothing) Then
        startPosition = e.Position
        wordLength = e.Length
        ...
    End If
End Sub