为捕获过滤器添加音频功能

Mar*_*ark 3 windows audio directshow visual-c++ audio-capture

我正在尝试向捕获源过滤器添加音频功能,以便制作带有音频的虚拟摄像头。从TMHrdp的代码开始,我用另一个引脚对其进行了扩展,称为“音频”:

CUnknown * WINAPI CVCam::CreateInstance(LPUNKNOWN lpunk, HRESULT *phr)
{
    ASSERT(phr);
    CUnknown *punk = new CVCam(lpunk, phr);
    return punk;
}

CVCam::CVCam(LPUNKNOWN lpunk, HRESULT *phr) : CSource(LPCSTR(FILTER_NAME), lpunk, CLSID_VirtualCam)
{
    ASSERT(phr);
    CAutoLock cAutoLock(&m_cStateLock);
    m_paStreams = (CSourceStream **) new CVCamStream*[2];
    m_paStreams[0] = new CVCamStream(phr, this, L"Video");
    m_paStreams[1] = new CVAudioStream(phr, this, L"Audio");
}

HRESULT CVCam::QueryInterface(REFIID riid, void **ppv)
{
    if (riid == _uuidof(IAMStreamConfig) || riid == _uuidof(IKsPropertySet))
    {
        HRESULT hr;
        hr = m_paStreams[0]->QueryInterface(riid, ppv);
        if (hr != S_OK) return hr;
        hr = m_paStreams[1]->QueryInterface(riid, ppv);
        if (hr != S_OK) return hr;
    }
    else return CSource::QueryInterface(riid, ppv);

    return S_OK;
}

CVAudioStream::CVAudioStream(HRESULT *phr, CVCam *pParent, LPCWSTR pPinName) : CSourceStream(LPCSTR(pPinName), phr, pParent, pPinName), m_pParent(pParent)
{
    GetMediaType(0, &m_mt);
}

CVAudioStream::~CVAudioStream()
{
}

HRESULT CVAudioStream::QueryInterface(REFIID riid, void **ppv)
{
    if (riid == _uuidof(IAMStreamConfig)) *ppv = (IAMStreamConfig*)this;
    else if (riid == _uuidof(IKsPropertySet)) *ppv = (IKsPropertySet*)this;
    else if (riid == _uuidof(IAMBufferNegotiation)) *ppv = (IAMBufferNegotiation*)this;
    else return CSourceStream::QueryInterface(riid, ppv);

    AddRef();
    return S_OK;
}

HRESULT CVAudioStream::FillBuffer(IMediaSample *pms)
{
    // fill buffer with Windows audio samples
    return NOERROR;
}

STDMETHODIMP CVAudioStream::Notify(IBaseFilter * pSender, Quality q)
{
    return E_NOTIMPL;
}

HRESULT CVAudioStream::SetMediaType(const CMediaType *pmt)
{
    HRESULT hr = CSourceStream::SetMediaType(pmt);
    return hr;
}

HRESULT setupPwfex(WAVEFORMATEX *pwfex, AM_MEDIA_TYPE *pmt) {
    pwfex->wFormatTag = WAVE_FORMAT_PCM;
    pwfex->cbSize = 0;              
    pwfex->nChannels = 2;

    HRESULT hr;
    pwfex->nSamplesPerSec = 11025;
    pwfex->wBitsPerSample = 16;       
    pwfex->nBlockAlign = (WORD)((pwfex->wBitsPerSample * pwfex->nChannels) / 8);
    pwfex->nAvgBytesPerSec = pwfex->nSamplesPerSec * pwfex->nBlockAlign;
    hr = ::CreateAudioMediaType(pwfex, pmt, FALSE);
    return hr;
}

/*HRESULT CVAudioStream::setAsNormal(CMediaType *pmt) 
{
    WAVEFORMATEX *pwfex;
    pwfex = (WAVEFORMATEX *)pmt->AllocFormatBuffer(sizeof(WAVEFORMATEX));
    ZeroMemory(pwfex, sizeof(WAVEFORMATEX));
    if (NULL == pwfex) return E_OUTOFMEMORY;
    return setupPwfex(pwfex, pmt);
}*/

HRESULT CVAudioStream::GetMediaType(int iPosition, CMediaType *pmt)
{
    if (iPosition < 0) return E_INVALIDARG;
    if (iPosition > 0) return VFW_S_NO_MORE_ITEMS;

    if (iPosition == 0)
    {
        *pmt = m_mt;
        return S_OK;
    }

    WAVEFORMATEX *pwfex = (WAVEFORMATEX *)pmt->AllocFormatBuffer(sizeof(WAVEFORMATEX));
    setupPwfex(pwfex, pmt);
    return S_OK;
}

HRESULT CVAudioStream::CheckMediaType(const CMediaType *pMediaType)
{
    int cbFormat = pMediaType->cbFormat;
    if (*pMediaType != m_mt) return E_INVALIDARG;
    return S_OK;
}

const int WaveBufferChunkSize = 16 * 1024;

HRESULT CVAudioStream::DecideBufferSize(IMemAllocator *pAlloc, ALLOCATOR_PROPERTIES *pProperties)
{
    CheckPointer(pAlloc, E_POINTER);
    CheckPointer(pProperties, E_POINTER);

    WAVEFORMATEX *pwfexCurrent = (WAVEFORMATEX*)m_mt.Format();

    pProperties->cBuffers = 1;
    pProperties->cbBuffer = expectedMaxBufferSize;

    ALLOCATOR_PROPERTIES Actual;
    HRESULT hr = pAlloc->SetProperties(pProperties, &Actual);
    if (FAILED(hr)) return hr;

    if (Actual.cbBuffer < pProperties->cbBuffer) return E_FAIL;
    return NOERROR; 
}

HRESULT CVAudioStream::OnThreadCreate()
{
    //GetMediaType(0, &m_mt); 

    //HRESULT hr = LoopbackCaptureSetup();
    //if (FAILED(hr)) return hr;
    return NOERROR;
} 

HRESULT STDMETHODCALLTYPE CVAudioStream::SetFormat(AM_MEDIA_TYPE *pmt)
{
    if (!pmt) return S_OK;
    if (CheckMediaType((CMediaType *)pmt) != S_OK) return E_FAIL; 
    m_mt = *pmt;

    IPin* pin;
    ConnectedTo(&pin);
    if (pin)
    {
        IFilterGraph *pGraph = m_pParent->GetGraph();
        pGraph->Reconnect(this);
    }

    return S_OK;
}

HRESULT STDMETHODCALLTYPE CVAudioStream::GetFormat(AM_MEDIA_TYPE **ppmt)
{
    *ppmt = CreateMediaType(&m_mt);
    return S_OK;
}

HRESULT STDMETHODCALLTYPE CVAudioStream::GetNumberOfCapabilities(int *piCount, int *piSize)
{
    *piCount = 1;
    *piSize = sizeof(AUDIO_STREAM_CONFIG_CAPS);
    return S_OK;
}

HRESULT STDMETHODCALLTYPE CVAudioStream::GetStreamCaps(int iIndex, AM_MEDIA_TYPE **pmt, BYTE *pSCC)
{
    if (iIndex < 0) return E_INVALIDARG;
    if (iIndex > 0) return S_FALSE;
    if (pSCC == NULL) return E_POINTER;

    *pmt = CreateMediaType(&m_mt);
    if (*pmt == NULL) return E_OUTOFMEMORY;

    DECLARE_PTR(WAVEFORMATEX, pAudioFormat, (*pmt)->pbFormat);
    AM_MEDIA_TYPE * pm = *pmt;
    setupPwfex(pAudioFormat, pm);

    AUDIO_STREAM_CONFIG_CAPS* pASCC = (AUDIO_STREAM_CONFIG_CAPS*)pSCC;
    ZeroMemory(pSCC, sizeof(AUDIO_STREAM_CONFIG_CAPS));

    pASCC->guid = MEDIATYPE_Audio;
    pASCC->MaximumChannels = pAudioFormat->nChannels;
    pASCC->MinimumChannels = pAudioFormat->nChannels;
    pASCC->ChannelsGranularity = 1; // doesn't matter
    pASCC->MaximumSampleFrequency = pAudioFormat->nSamplesPerSec;
    pASCC->MinimumSampleFrequency = pAudioFormat->nSamplesPerSec;
    pASCC->SampleFrequencyGranularity = 11025; // doesn't matter
    pASCC->MaximumBitsPerSample = pAudioFormat->wBitsPerSample;
    pASCC->MinimumBitsPerSample = pAudioFormat->wBitsPerSample;
    pASCC->BitsPerSampleGranularity = 16; // doesn't matter

    return S_OK;
}

HRESULT CVAudioStream::Set(REFGUID guidPropSet, DWORD dwID, void *pInstanceData, DWORD cbInstanceData, void *pPropData, DWORD cbPropData)
{
    return E_NOTIMPL;
}

HRESULT CVAudioStream::Get(
    REFGUID guidPropSet,
    DWORD dwPropID,     
    void *pInstanceData,
    DWORD cbInstanceData,
    void *pPropData,     
    DWORD cbPropData,    
    DWORD *pcbReturned   
)
{
    if (guidPropSet != AMPROPSETID_Pin)             return E_PROP_SET_UNSUPPORTED;
    if (dwPropID != AMPROPERTY_PIN_CATEGORY)        return E_PROP_ID_UNSUPPORTED;
    if (pPropData == NULL && pcbReturned == NULL)   return E_POINTER;

    if (pcbReturned) *pcbReturned = sizeof(GUID);
    if (pPropData == NULL)          return S_OK; 
    if (cbPropData < sizeof(GUID))  return E_UNEXPECTED;

    *(GUID *)pPropData = PIN_CATEGORY_CAPTURE;
    return S_OK;
}

HRESULT CVAudioStream::QuerySupported(REFGUID guidPropSet, DWORD dwPropID, DWORD *pTypeSupport)
{
    if (guidPropSet != AMPROPSETID_Pin) return E_PROP_SET_UNSUPPORTED;
    if (dwPropID != AMPROPERTY_PIN_CATEGORY) return E_PROP_ID_UNSUPPORTED;
    if (pTypeSupport) *pTypeSupport = KSPROPERTY_SUPPORT_GET;
    return S_OK;
}
Run Code Online (Sandbox Code Playgroud)

我的第一个问题是当我在 GraphStudioNext 中插入过滤器并打开其属性页面时。音频引脚显示以下(不正确)信息:

majorType = GUID_NULL
subType = GUID_NULL
formattype = GUID_NULL
Run Code Online (Sandbox Code Playgroud)

当然,我无法将任何内容连接到该引脚,因为无效。我期待这样的事情,MEDIATYPE_Audio因为我设置了它:

DEFINE_GUID(CLSID_VirtualCam, 0x8e14549a, 0xdb61, 0x4309, 0xaf, 0xa1, 0x35, 0x78, 0xe9, 0x27, 0xe9, 0x33);

const AMOVIESETUP_MEDIATYPE AMSMediaTypesVideo = 
{
    &MEDIATYPE_Video,
    &MEDIASUBTYPE_NULL
};

const AMOVIESETUP_MEDIATYPE AMSMediaTypesAudio =
{
    &MEDIATYPE_Audio,
    &MEDIASUBTYPE_NULL
};

const AMOVIESETUP_PIN AMSPinVCam[] =
{
    {
        L"Video",             // Pin string name
        FALSE,                 // Is it rendered
        TRUE,                  // Is it an output
        FALSE,                 // Can we have none
        FALSE,                 // Can we have many
        &CLSID_NULL,           // Connects to filter
        NULL,                  // Connects to pin
        1,                     // Number of types
        &AMSMediaTypesVideo      // Pin Media types
    },
    {
        L"Audio",             // Pin string name
        FALSE,                 // Is it rendered
        TRUE,                  // Is it an output
        FALSE,                 // Can we have none
        FALSE,                 // Can we have many
        &CLSID_NULL,           // Connects to filter
        NULL,                  // Connects to pin
        1,                     // Number of types
        &AMSMediaTypesAudio      // Pin Media types
    }
};

const AMOVIESETUP_FILTER AMSFilterVCam =
{
    &CLSID_VirtualCam,  // Filter CLSID
    FILTER_NAME,     // String name
    MERIT_DO_NOT_USE,      // Filter merit
    2,                     // Number pins
    AMSPinVCam             // Pin details
};

CFactoryTemplate g_Templates[] = 
{
    {
        FILTER_NAME,
        &CLSID_VirtualCam,
        CVCam::CreateInstance,
        NULL,
        &AMSFilterVCam
    },
};

int g_cTemplates = sizeof(g_Templates) / sizeof(g_Templates[0]);

STDAPI RegisterFilters( BOOL bRegister )
{
    HRESULT hr = NOERROR;
    WCHAR achFileName[MAX_PATH];
    char achTemp[MAX_PATH];
    ASSERT(g_hInst != 0);

    if( 0 == GetModuleFileNameA(g_hInst, achTemp, sizeof(achTemp))) return AmHresultFromWin32(GetLastError());
    MultiByteToWideChar(CP_ACP, 0L, achTemp, lstrlenA(achTemp) + 1, achFileName, NUMELMS(achFileName));

    hr = CoInitialize(0);
    if(bRegister)
    {
        hr = AMovieSetupRegisterServer(CLSID_VirtualCam, FILTER_NAME, achFileName, L"Both", L"InprocServer32");
    }

    if( SUCCEEDED(hr) )
    {
        IFilterMapper2 *fm = 0;
        hr = CreateComObject( CLSID_FilterMapper2, IID_IFilterMapper2, fm );
        if( SUCCEEDED(hr) )
        {
            if(bRegister)
            {
                IMoniker *pMoniker = 0;
                REGFILTER2 rf2;
                rf2.dwVersion = 1;
                rf2.dwMerit = MERIT_DO_NOT_USE;
                rf2.cPins = 2;
                rf2.rgPins = AMSPinVCam;
                hr = fm->RegisterFilter(CLSID_VirtualCam, FILTER_NAME, &pMoniker, &CLSID_VideoInputDeviceCategory, NULL, &rf2);
            }
            else
            {
                hr = fm->UnregisterFilter(&CLSID_VideoInputDeviceCategory, 0, CLSID_VirtualCam);
            }
        }

      if(fm) fm->Release();
    }

    if( SUCCEEDED(hr) && !bRegister ) hr = AMovieSetupUnregisterServer( CLSID_VirtualCam );

    CoFreeUnusedLibraries();
    CoUninitialize();
    return hr;
}
Run Code Online (Sandbox Code Playgroud)

第二个问题:还有一个“延迟”选项卡,但是当我点击它时,GraphStudioNext永远挂起,VS 调试器(附加到该进程)什么也没说。什么代码控制这个选项卡?

更新

解决了第一个问题:

HRESULT CVAudioStream::GetMediaType(int iPosition, CMediaType *pmt)
{
    if (iPosition < 0) return E_INVALIDARG;
    if (iPosition > 0) return VFW_S_NO_MORE_ITEMS;

    WAVEFORMATEX *pwfex = (WAVEFORMATEX *)pmt->AllocFormatBuffer(sizeof(WAVEFORMATEX));
    setupPwfex(pwfex, pmt);

    pmt->SetType(&MEDIATYPE_Audio);
    pmt->SetFormatType(&FORMAT_WaveFormatEx);
    pmt->SetTemporalCompression(FALSE);

    pmt->SetSubtype(&MEDIASUBTYPE_PCM);
    pmt->SetSampleSize(pwfex->nBlockAlign);

    return S_OK;
}
Run Code Online (Sandbox Code Playgroud)

Rom*_* R. 5

简短版本:Microsoft 并没有真正提供 API 来提供虚拟音频设备,因此它被应用程序很好地接受,就好像它是一个真正的音频捕获设备一样。

如果虚拟视频捕获过滤器由于历史原因经常起作用,那么音频就不是这样了。实现音频设备的内核级驱动程序是添加应用程序可识别的音频设备的方法。


显示延迟选项卡是因为您假装正在实现IAMBufferNegotiation接口:

if (riid == _uuidof(IAMBufferNegotiation)) *ppv = (IAMBufferNegotiation*)this;
Run Code Online (Sandbox Code Playgroud)

实现可能是不正确的,这会导致某些意外行为(冻结、崩溃等)。

在同一个过滤器上添加音频引脚是可能的,但可能不是最好的主意,如果您希望选择流作为人工源。这在一般情况下是有道理的,但真实设备几乎从不公开这样的音频流。

长话短说,唯一可以像这样利用音频流的应用程序是您自己开发的应用程序:没有众所周知的应用程序试图在视频源过滤器上定位音频引脚。由于这个原因 IAMStreamConfig,尤其是IKsPropertySet在这种引脚上的实现是无用的。

您将无法在 Audio Capture Sources 类别下注册过滤器,因为您注册了过滤器,并且此过滤器首先公开视频输出引脚,然后才会有一些辅助音频。如果您的目标是通过 DirectShow 使用音频的应用程序(由于超出本问题范围的原因,这种情况已经非常罕见),您应该开发一个单独的源过滤器。您当然可以让这两个过滤器在幕后相互通信以协作提供某些提要,但就 DirectShow 而言,过滤器通常显示为独立的。

...真实的网络摄像头也暴露了两个不同的过滤器,这就是为什么在像 Skype 这样的应用程序中,我们必须在视频和音频设备下进行选择。
创建两个完全不同的项目和过滤器是否更好:一个用于视频,一个用于音频?

真实和典型的相机:

在此处输入图片说明

由于“真正的”物理相机通常配备内核级驱动程序,因此它们在 DirectShow 中的存在是通过WDM 视频捕获过滤器实现的,该过滤器充当代理并在您将注册的同一类别视频捕获源下枚举相机驱动程序的“DirectShow 包装器”虚拟摄像机。

也就是说,这种设计使您能够在可用设备列表中混合真实和虚拟摄像机,当涉及到视频捕获时,基于 DirectShow 的应用程序使用这些设备。这种方法有其局限性,我之前在此问题中进行了描述,并参考了 Virtual DirectShow Sources 的适用性

由于 DirectShow 的继任者 Media Foundation 总体上没有很好的接受度,而且 Media Foundation 既没有良好的向后兼容性,也没有提供良好的视频捕获扩展性,因此包括微软自己在内的众多应用程序仍在通过 DirectShow 消费视频捕获。反之亦然,那些研究 Windows 视频捕获 API 的人也经常对 DirectShow 感兴趣,而不是“当前”API,因为示例和相关信息的可用性、API 可扩展性、应用程序集成选项。

然而,音频并非如此。在 DirectShow 开发停止时,DirectShow 音频捕获还不是一流的。Windows Vista 为音频WASAPI引入了新的 API,而 DirectShow 没有接收到新 API 的相应连接,无论是用于音频捕获还是播放。音频本身更简单,而且 WASAPI 功能强大且对开发人员友好,因此开发人员开始转向新的 API 来执行音频相关任务。使用 DirectShow 进行音频捕获的应用程序要少得多,而且您实现的虚拟音频源很可能会失败:对于通过 WASAPI 使用音频捕获的应用程序,您的设备将保持“不可见”。即使应用程序具有 Windows XP 的回退代码补丁以通过 DirectShow 进行音频捕获,在较新的操作系统中也很难让您感到宽慰。

继续阅读 StackOverflow 上的音频:

此外,您不必为视频和音频过滤器设置单独的项目。您可以将它们混合在同一个项目中,它们可以是单独注册的独立过滤器。