几个关键点:
1、声卡采播,是采集声卡环回声音,也就是本机电脑播放音乐的声音,同时要能播放到广播;
来源: 使用Core Audio实现对声卡输出的捕捉 涉及的接口有: IMMDeviceEnumerator IMMDevice IAudioClient IAudioCaptureClient 主要过程: 创建多媒体设备枚举器(IMMDeviceEnumerator) 通过多媒体设备枚举器获取声卡接口(IMMDevice) 通过声卡接口获取声卡客户端接口(IAudioClient) 通过声卡客户端接口(IAudioClient)可获取声卡输出的音频参数、初始化声卡、获取声卡输出缓冲区的大小、开启/停止对声卡输出的采集 通过声卡采集客户端接口(IAudioCaptureClient)可获取采集的声卡输出数据,并对内部缓冲区进行控制
deepseek给的代码:
#include <windows.h> #include <mmdeviceapi.h> #include <audioclient.h> #include <functiondiscoverykeys_devpkey.h> #include <iostream> #include <fstream> // 释放COM接口的辅助函数 template <class T> void SafeRelease(T **ppT) { if (*ppT) { (*ppT)->Release(); *ppT = NULL; } } class AudioCapture { private: IMMDeviceEnumerator *pEnumerator; IMMDevice *pDevice; IAudioClient *pAudioClient; IAudioCaptureClient *pCaptureClient; WAVEFORMATEX *pwfx; HANDLE hEvent; bool isCapturing; public: AudioCapture() : pEnumerator(NULL), pDevice(NULL), pAudioClient(NULL), pCaptureClient(NULL), pwfx(NULL), isCapturing(false) { hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); } ~AudioCapture() { StopCapture(); SafeRelease(&pCaptureClient); SafeRelease(&pAudioClient); SafeRelease(&pDevice); SafeRelease(&pEnumerator); if (pwfx) CoTaskMemFree(pwfx); if (hEvent) CloseHandle(hEvent); } HRESULT Initialize() { HRESULT hr = S_OK; // 初始化COM hr = CoInitializeEx(NULL, COINIT_MULTITHREADED); if (FAILED(hr)) return hr; // 创建设备枚举器 hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator); if (FAILED(hr)) return hr; // 获取默认音频渲染设备(扬声器) hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice); if (FAILED(hr)) return hr; // 激活音频客户端 hr = pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient); if (FAILED(hr)) return hr; // 获取混合格式 hr = pAudioClient->GetMixFormat(&pwfx); if (FAILED(hr)) return hr; // 打印音频格式信息 PrintFormatInfo(); return hr; } HRESULT StartCapture() { HRESULT hr = S_OK; // 初始化音频客户端进行循环录制 hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, 0, 0, pwfx, NULL); if (FAILED(hr)) return hr; // 获取采集客户端 hr = pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient); if (FAILED(hr)) return hr; // 开始采集 hr = pAudioClient->Start(); if (FAILED(hr)) return hr; isCapturing = true; std::cout << "开始采集声卡输出..." << std::endl; return hr; } void StopCapture() { if (pAudioClient && isCapturing) { pAudioClient->Stop(); isCapturing = false; std::cout << "停止采集声卡输出" << std::endl; } } HRESULT CaptureDataToFile(const char* filename, DWORD captureDurationMs = 5000) { std::ofstream file(filename, std::ios::binary); if (!file.is_open()) { std::cerr << "无法创建文件: " << filename << std::endl; return E_FAIL; } // 写入WAV文件头 WriteWavHeader(file, captureDurationMs); DWORD flags; UINT32 packetLength = 0; BYTE *pData; UINT32 numFramesAvailable; DWORD captureCount = 0; const DWORD targetCount = captureDurationMs / 10; // 假设每10ms检查一次 std::cout << "正在录制音频到文件: " << filename << std::endl; while (isCapturing && captureCount < targetCount) { // 等待数据就绪 Sleep(10); // 获取采集到的数据 HRESULT hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL); if (SUCCEEDED(hr) && numFramesAvailable > 0) { // 计算数据大小 UINT32 dataSize = numFramesAvailable * pwfx->nBlockAlign; // 写入音频数据 file.write((const char*)pData, dataSize); // 释放缓冲区 pCaptureClient->ReleaseBuffer(numFramesAvailable); std::cout << "采集到 " << dataSize << " 字节数据" << std::endl; } captureCount++; } file.close(); std::cout << "音频录制完成" << std::endl; return S_OK; } private: void PrintFormatInfo() { if (pwfx) { std::cout << "音频格式信息:" << std::endl; std::cout << " 格式标签: 0x" << std::hex << pwfx->wFormatTag << std::dec << std::endl; std::cout << " 声道数: " << pwfx->nChannels << std::endl; std::cout << " 采样率: " << pwfx->nSamplesPerSec << " Hz" << std::endl; std::cout << " 每秒字节数: " << pwfx->nAvgBytesPerSec << std::endl; std::cout << " 块对齐: " << pwfx->nBlockAlign << std::endl; std::cout << " 位深度: " << pwfx->wBitsPerSample << std::endl; } } void WriteWavHeader(std::ofstream &file, DWORD durationMs) { if (!pwfx) return; // 计算数据大小 DWORD dataSize = pwfx->nAvgBytesPerSec * durationMs / 1000; dataSize = (dataSize / pwfx->nBlockAlign) * pwfx->nBlockAlign; // 对齐到块边界 // RIFF块 file.write("RIFF", 4); DWORD chunkSize = 36 + dataSize; file.write((const char*)&chunkSize, 4); file.write("WAVE", 4); // fmt块 file.write("fmt ", 4); DWORD fmtChunkSize = 16; file.write((const char*)&fmtChunkSize, 4); file.write((const char*)&pwfx->wFormatTag, 2); file.write((const char*)&pwfx->nChannels, 2); file.write((const char*)&pwfx->nSamplesPerSec, 4); file.write((const char*)&pwfx->nAvgBytesPerSec, 4); file.write((const char*)&pwfx->nBlockAlign, 2); file.write((const char*)&pwfx->wBitsPerSample, 2); // data块 file.write("data", 4); file.write((const char*)&dataSize, 4); } }; // 使用示例 int main() { std::cout << "声卡输出捕捉示例" << std::endl; AudioCapture capture; // 初始化音频设备 HRESULT hr = capture.Initialize(); if (FAILED(hr)) { std::cerr << "初始化失败: " << hr << std::endl; return -1; } // 开始采集 hr = capture.StartCapture(); if (FAILED(hr)) { std::cerr << "启动采集失败: " << hr << std::endl; return -1; } // 录制5秒音频到文件 capture.CaptureDataToFile("output.wav", 5000); // 停止采集 capture.StopCapture(); // 清理COM CoUninitialize(); std::cout << "程序执行完成" << std::endl; return 0; }
有几个问题,录制的wav不能播放,修改之后,录制的wav还是不能播放,主要是声卡输出的PCM不是普通的PCM格式,增加检测和转换函数:
// 将32位浮点数转换为16位PCM HRESULT ConvertFloatToPCM16(const BYTE* input, BYTE* output, UINT32 frameCount) { if (!input || !output) return E_POINTER; const float* floatData = (const float*)input; int16_t* pcmData = (int16_t*)output; for (UINT32 i = 0; i < frameCount * pwfx->nChannels; i++) { // 将[-1.0, 1.0]的浮点数转换为[-32768, 32767]的16位整数 float sample = floatData[i]; // 限制在合法范围内 if (sample > 1.0f) sample = 1.0f; if (sample < -1.0f) sample = -1.0f; // 转换并四舍五入 pcmData[i] = (int16_t)(sample * 32767.0f + 0.5f); } return S_OK; } // 通用的格式转换函数 HRESULT ConvertAudioFormat(const BYTE* input, UINT32 inputSize, std::vector<BYTE>& output) { if (!input) return E_POINTER; UINT32 frameCount = inputSize / pwfx->nBlockAlign; UINT32 outputSize = frameCount * targetFormat.nBlockAlign; output.resize(outputSize); // 根据原始格式进行不同的转换 if (pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT || (pwfex && IsEqualGUID(pwfex->SubFormat, KSDATAFORMAT_SUBTYPE_IEEE_FLOAT))) { // 32位浮点数转16位PCM return ConvertFloatToPCM16(input, output.data(), frameCount); } else if (pwfx->wFormatTag == WAVE_FORMAT_PCM) { // 如果是其他位深度的PCM,这里可以添加相应的转换逻辑 // 目前只处理32位浮点数 std::cerr << "不支持的PCM位深度: " << pwfx->wBitsPerSample << std::endl; return E_NOTIMPL; } return E_NOTIMPL; }
将采集的声卡数据重采样为广播板能播放支持的samplerate和channel。
重采样的初始化:注意channel_layout 与channel的区别。
swr_alloc_set_opts(swrCtx, AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_S16, SAMPLE_RATE, av_get_default_channel_layout(pwfx->nChannels), convert_win_format_to_av_format(pwfx->wFormatTag, pwfx->wBitsPerSample), pwfx->nSamplesPerSec, 0, NULL);
播放卡顿优化:
不压缩PCM数据;
给声卡喂数据不及时修改;
2、声卡录播,则是实时采集麦克风声音;
本地验证声卡采播功能正常,换了个电脑,跑起来效果就不一样了,分析主要还是电脑配置差异,需要提高声卡采集线程的优先级,减少延时时间:
HANDLE hThread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)ThreadCapturePostToUser, NULL, 1, 0); //创建子线程 if (hThread) { SetThreadPriority(hThread, THREAD_PRIORITY_HIGHEST); // 设置线程优先级为最高 ResumeThread(hThread); //启动子线程 }
参考:
https://blog.csdn.net/qq_41915225/article/details/86004805
https://github.com/quanwstone/Wave_Audio
-------------------广告线---------------
项目、合作,欢迎勾搭,邮箱:promall@qq.com
本文为呱牛笔记原创文章,转载无需和我联系,但请注明来自呱牛笔记 ,it3q.com