NoB*_*ugs 5 c# speech-recognition naudio sound-recognition naudio-framework
Microsoft语音系统有一个很好的示例代码,但是添加回送以记录正在播放的内容而不是通过麦克风记录的内容时,我遇到了问题。提供视频的文字说明,例如不在扬声器上播放时。似乎是要执行此操作的库,但是我在将其推到识别器的音频流时遇到类型错误:
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
captureStream.Flush();
};
capture.StartRecording();
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
recognizer.SetInputToAudioStream(captureStream, new SpeechAudioFormatInfo(
capture.WaveFormat.AverageBytesPerSecond, AudioBitsPerSample.Sixteen, AudioChannel.Stereo));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}
Run Code Online (Sandbox Code Playgroud)
已更新如您在修订后的代码中所见,它至少现在正在编译,但不能识别任何内部或外部语音。实际上,它输出:
384000
32
Run Code Online (Sandbox Code Playgroud)
因此,由于AudioBitsPerSample上没有“ thirtytwo”,也许我什至不能使用NAudio类来获取系统音频?
更新根据另一个答案,这似乎有些奏效,但并没有收到太多,我想它可能正在发送慢速或快速音频?
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class FakeStreamer : Stream
{
public bool bExit = false;
Stream stream;
Stream client;
public FakeStreamer(Stream client)
{
this.client = client;
this.stream = client;
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return stream.CanWrite; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int len = 0, c = count;
while (c > 0 && !bExit)
{
try
{
len = stream.Read(buffer, offset, c);
}
catch (Exception e)
{
Console.WriteLine("ouch");
}
/*if (!client.Connected || len == 0)
{
//Exit read loop
return 0;
}*/
offset += len;
c -= len;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
stream.Write(buffer, offset, count);
}
public override void Close()
{
stream.Close();
base.Close();
}
public override void Flush()
{
stream.Flush();
}
}
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
Stream buffStream = new FakeStreamer(captureStream);
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
};
capture.StartRecording();
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
//recognizer.SetInputToDefaultAudioDevice();
recognizer.SetInputToAudioStream(buffStream, new SpeechAudioFormatInfo(
capture.WaveFormat.AverageBytesPerSecond/4, AudioBitsPerSample.Eight, AudioChannel.Stereo));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}
Run Code Online (Sandbox Code Playgroud)
更新3-尝试将音频流重新编码为语音识别将执行的操作:很遗憾,如您所见,它无法获取捕获的重新编码音频...
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class FakeStreamer : Stream
{
public bool bExit = false;
Stream stream;
Stream client;
public FakeStreamer(Stream client)
{
this.client = client;
this.stream = client;
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return stream.CanWrite; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int len = 0, c = count;
while (c > 0 && !bExit)
{
try
{
len = stream.Read(buffer, offset, c);
}
catch (Exception e)
{
Console.WriteLine("ouch");
}
/*if (!client.Connected || len == 0)
{
//Exit read loop
return 0;
}*/
offset += len;
c -= len;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
stream.Write(buffer, offset, count);
}
public override void Close()
{
stream.Close();
base.Close();
}
public override void Flush()
{
stream.Flush();
}
}
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
//Stream buffStream = new FakeStreamer(captureStream);
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
};
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
var newFormat = new WaveFormat(8000, 16, 1);
//using (var conversionStream = new WaveFormatConversionStream(newFormat, capture)
var resampler = new MediaFoundationResampler(new NAudio.Wave.RawSourceWaveStream(captureStream,capture.WaveFormat), newFormat);
Stream captureConvertStream = new System.IO.MemoryStream();
resampler.ResamplerQuality = 60;
//WaveFileWriter.WriteWavFileToStream(captureConvertStream, resampler);
//recognizer.SetInputToDefaultAudioDevice();
Stream buffStream = new FakeStreamer(captureConvertStream);
recognizer.SetInputToAudioStream(buffStream, new SpeechAudioFormatInfo(
8000, AudioBitsPerSample.Sixteen, AudioChannel.Mono));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
capture.StartRecording();
var arr = new byte[128];
while (resampler.Read(arr, 0, arr.Length) > 0)
{
captureConvertStream.Write(arr, 0, arr.Length);
Console.WriteLine("Never getting here");
}
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}
Run Code Online (Sandbox Code Playgroud)