Jam*_*mes 9 speech-recognition objective-c ios
我有一个在XCode/Objective C中开发的iOS应用程序.它使用iOS Speech API处理连续语音识别.它正在工作,但我想在语音开始时将麦克风图标变为红色,我还想检测语音何时结束.
我实现了接口SFSpeechRecognitionTaskDelegate,它给出了回调onDetectedSpeechStart和speechRecognitionTask:didHypothesizeTranscription:但是这些不会发生,直到第一个单词的结尾被处理,而不是在语音的最开始.
我想检测一下演讲的开头(或任何噪音).我认为应该可以从installTapOnBus:来自AVAudioPCMBuffer,但我不知道如何检测这是否是静音与可能是语音的噪音.
此外,当人们停止说话时,语音API不会给出事件,即静音检测,它只是记录,直到它超时.我通过检查最后一次事件之间的时间来检测静音,但不确定它们是否是更好的方法.
代码在这里,
NSError * outError;
AVAudioSession *audioSession = [AVAudioSession sharedInstance];
[audioSession setCategory: AVAudioSessionCategoryPlayAndRecord withOptions:AVAudioSessionCategoryOptionDefaultToSpeaker error:&outError];
[audioSession setMode: AVAudioSessionModeMeasurement error:&outError];
[audioSession setActive: true withOptions: AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&outError];
SFSpeechAudioBufferRecognitionRequest* speechRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
if (speechRequest == nil) {
NSLog(@"Unable to create SFSpeechAudioBufferRecognitionRequest.");
return;
}
audioEngine = [[AVAudioEngine alloc] init];
AVAudioInputNode* inputNode = [audioEngine inputNode];
speechRequest.shouldReportPartialResults = true;
// iOS speech does not detect end of speech, so must track silence.
lastSpeechDetected = -1;
speechTask = [speechRecognizer recognitionTaskWithRequest: speechRequest delegate: self];
[inputNode installTapOnBus:0 bufferSize: 4096 format: [inputNode outputFormatForBus:0] block:^(AVAudioPCMBuffer* buffer, AVAudioTime* when) {
long millis = [[NSDate date] timeIntervalSince1970] * 1000;
if (lastSpeechDetected != -1 && ((millis - lastSpeechDetected) > 1000)) {
lastSpeechDetected = -1;
[speechTask finish];
return;
}
[speechRequest appendAudioPCMBuffer: buffer];
}];
[audioEngine prepare];
[audioEngine startAndReturnError: &outError];
Run Code Online (Sandbox Code Playgroud)
这是我们最终得到的有效代码。
关键是安装TapOnBus(),然后是检测音量的魔术代码,
浮动体积 = fabsf(*buffer.floatChannelData[0]);
-(void) doActualRecording {
NSLog(@"doActualRecording");
@try {
//if (!recording) {
if (audioEngine != NULL) {
[audioEngine stop];
[speechTask cancel];
AVAudioInputNode* inputNode = [audioEngine inputNode];
[inputNode removeTapOnBus: 0];
}
recording = YES;
micButton.selected = YES;
//NSLog(@"Starting recording... SFSpeechRecognizer Available? %d", [speechRecognizer isAvailable]);
NSError * outError;
//NSLog(@"AUDIO SESSION CATEGORY0: %@", [[AVAudioSession sharedInstance] category]);
AVAudioSession* audioSession = [AVAudioSession sharedInstance];
[audioSession setCategory: AVAudioSessionCategoryPlayAndRecord withOptions:AVAudioSessionCategoryOptionDefaultToSpeaker error:&outError];
[audioSession setMode: AVAudioSessionModeMeasurement error:&outError];
[audioSession setActive: true withOptions: AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&outError];
SFSpeechAudioBufferRecognitionRequest* speechRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
//NSLog(@"AUDIO SESSION CATEGORY1: %@", [[AVAudioSession sharedInstance] category]);
if (speechRequest == nil) {
NSLog(@"Unable to create SFSpeechAudioBufferRecognitionRequest.");
return;
}
speechDetectionSamples = 0;
// This some how fixes a crash on iPhone 7
// Seems like a bug in iOS ARC/lack of gc
AVAudioEngine* temp = audioEngine;
audioEngine = [[AVAudioEngine alloc] init];
AVAudioInputNode* inputNode = [audioEngine inputNode];
speechRequest.shouldReportPartialResults = true;
// iOS speech does not detect end of speech, so must track silence.
lastSpeechDetected = -1;
speechTask = [speechRecognizer recognitionTaskWithRequest: speechRequest delegate: self];
[inputNode installTapOnBus:0 bufferSize: 4096 format: [inputNode outputFormatForBus:0] block:^(AVAudioPCMBuffer* buffer, AVAudioTime* when) {
@try {
long long millis = [[NSDate date] timeIntervalSince1970] * 1000;
if (lastSpeechDetected != -1 && ((millis - lastSpeechDetected) > 1000)) {
lastSpeechDetected = -1;
[speechTask finish];
return;
}
[speechRequest appendAudioPCMBuffer: buffer];
//Calculate volume level
if ([buffer floatChannelData] != nil) {
float volume = fabsf(*buffer.floatChannelData[0]);
if (volume >= speechDetectionThreshold) {
speechDetectionSamples++;
if (speechDetectionSamples >= speechDetectionSamplesNeeded) {
//Need to change mic button image in main thread
[[NSOperationQueue mainQueue] addOperationWithBlock:^ {
[micButton setImage: [UIImage imageNamed: @"micRecording"] forState: UIControlStateSelected];
}];
}
} else {
speechDetectionSamples = 0;
}
}
}
@catch (NSException * e) {
NSLog(@"Exception: %@", e);
}
}];
[audioEngine prepare];
[audioEngine startAndReturnError: &outError];
NSLog(@"Error %@", outError);
//}
}
@catch (NSException * e) {
NSLog(@"Exception: %@", e);
}
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
1420 次 |
| 最近记录: |