从麦克风中获取音频,对其进行下采样,对其进行编码(使用 AAC)并通过套接字传输

Mad*_*mer 5 core-audio avfoundation ios swift avaudioengine

在过去的几周里,我一直试图从许多不同的例子中拼凑出一个解决方案,这将使我能够:

  1. 从麦克风捕获音频
  2. 将其下采样到 8khz
  3. 使用 AAC 对每个缓冲区进行编码
  4. 通过套接字发送结果

我见过的几乎所有示例都涉及将音频编码到文件中,我这样做是为了测试,但不是我需要的。我需要取一个数据包,编码并传输它

MicrophoneService

这基本上设置了 aAVAudioEngine并将a附加AVAudioMixerNode到它,这对音频进行了下采样。

然后将结果放入阻塞队列 ( AudioEncoderQueue) 以便编码服务可以对缓冲区进行编码

import Foundation
import AVFoundation

// Base on /sf/ask/2771681111/
// https://github.com/onmyway133/notes/issues/367

class MicrophoneService {

    let audioEngine = AVAudioEngine()

    init() {
        do {
            try AVAudioSession.sharedInstance().setPreferredSampleRate(16000)
        } catch let error {
            print(error)
        }

        let engineInputNode = audioEngine.inputNode
        let bus = 0
        let engineInputNodeFormat = engineInputNode.outputFormat(forBus: bus)

        //engineInputNode.installTap(onBus: bus, bufferSize: 1024, format: engineInputNodeFormat) { (buffer, time) in
        //  AudioEncoderQueue.shared.put(buffer)
        //}

        let mixer = AVAudioMixerNode()
        audioEngine.attach(mixer)

        let mixerOutputFormat = AVAudioFormat(standardFormatWithSampleRate: 8000, channels: 1)

        audioEngine.connect(engineInputNode, to: mixer, format: engineInputNodeFormat)
        audioEngine.connect(mixer, to: audioEngine.outputNode, format: mixerOutputFormat)

        mixer.installTap(onBus: bus, bufferSize: 1024 * 4, format: mixerOutputFormat) { (buffer: AVAudioPCMBuffer, time: AVAudioTime) in
            AudioEncoderQueue.shared.put(buffer)
        }
    }

    func start() throws {
        stop()
        audioEngine.prepare()
        try audioEngine.start()
    }

    func stop() {
        audioEngine.stop()
    }

}
Run Code Online (Sandbox Code Playgroud)

AudioEncoderService

这基本上从编码队列中弹出下一个音频数据包,对其进行编码并将其放入传输队列(TransportQueue

import Foundation
import AVFoundation
import Cadmus

class AudioEncoderService {

    fileprivate var stopped: Bool = false
    // This is for debuging     
    fileprivate let audioFile: AVAudioFile

    init() throws {
        // The audio file is used to ensure that I'm getting audio from the microphone
        // As well as test the hardware can receive the data through a seperate C program
        var url = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!
        url.appendPathComponent("Test.aac", isDirectory: false)

        let settings: [String: Any] = [
            AVFormatIDKey: NSNumber(value: kAudioFormatMPEG4AAC),
            AVSampleRateKey: NSNumber(value: 8000),
            AVNumberOfChannelsKey: NSNumber(value: 1),
            AVEncoderBitRatePerChannelKey: NSNumber(value: 16),
            AVEncoderAudioQualityKey: NSNumber(value: AVAudioQuality.high.rawValue)
        ]
        audioFile = try AVAudioFile(forWriting: url, settings: settings)
    }

    func start() {
        DispatchQueue.global(qos: .userInitiated).async {
            self.encodeAudio()
        }
    }

    func stop() {
        stopped = true
    }

    func encodeAudio() {
        repeat {
            do {
                if let buffer = AudioEncoderQueue.shared.take() {
                    // I normally write the buffer to the AVAudioFile here, but I've removed for brevity
                    let data = Data(bytes: encodedBuffer.data, count: Int(encodedBuffer.byteLength))
                    TransportQueue.shared.put(data)
                }
            } catch let error {
                print(error)
            }
        } while !stopped
    }

    func encode(_ buffer: AVAudioPCMBuffer) throws -> AVAudioCompressedBuffer? {
        return try AudioUtilities.convertToAAC(from: buffer)
    }

    func toData(buffer: AVAudioPCMBuffer) -> Data {
        let audioBuffer = buffer.audioBufferList.pointee.mBuffers
        return Data(bytes: audioBuffer.mData!, count: Int(audioBuffer.mDataByteSize))
    }

// Prevois downsampling attempt     
//  func resample(_ buffer: AVAudioPCMBuffer) throws -> AVAudioPCMBuffer? {
//      guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: resampleFormat, frameCapacity: AVAudioFrameCount(resampleFormat.sampleRate * 2.0)) else { return nil }
//
//      let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
//          outStatus.pointee = AVAudioConverterInputStatus.haveData
//          return buffer
//      }
//
//      var error: NSError? = nil
//      resampleConverter.convert(to: pcmBuffer, error: &error, withInputFrom: inputBlock)
//
//      guard let resampleError = error else { return pcmBuffer }
//
//      throw resampleError
//  }

}
Run Code Online (Sandbox Code Playgroud)

AudioUtilities

这是音频缓冲区使用 AAC 编码并转换为Data. 这是基于许多 SO 帖子和一些调整

import Foundation
import AVFoundation

// /sf/ask/3595208921/
// /sf/ask/3595208921/

class AudioUtilities {

    static func AACFormat() -> AVAudioFormat? {

        var outDesc = AudioStreamBasicDescription(
            mSampleRate: 8000,
            mFormatID: kAudioFormatMPEG4AAC,
            mFormatFlags: 0,
            mBytesPerPacket: 0,
            mFramesPerPacket: 0,
            mBytesPerFrame: 0,
            mChannelsPerFrame: 1,
            mBitsPerChannel: 0,
            mReserved: 0)

        let outFormat = AVAudioFormat(streamDescription: &outDesc)
        return outFormat
    }

    static var lpcmToAACConverter: AVAudioConverter! = nil

    static func convertToAAC(from buffer: AVAudioBuffer) throws -> AVAudioCompressedBuffer? {

        let outputFormat = AACFormat()
        //init converter once
        if lpcmToAACConverter == nil {
            let inputFormat = buffer.format

            lpcmToAACConverter = AVAudioConverter(from: inputFormat, to: outputFormat!)
            lpcmToAACConverter.bitRate = 8000
        }
        let outBuffer = AVAudioCompressedBuffer(format: outputFormat!,
                                                packetCapacity: 8,
                                                maximumPacketSize: lpcmToAACConverter.maximumOutputPacketSize)
                                                //maximumPacketSize: 768)

        try self.convert(withConverter: lpcmToAACConverter,
                                         from: buffer,
                                         to: outBuffer)

        return outBuffer
    }

    private static func convert(withConverter: AVAudioConverter, from sourceBuffer: AVAudioBuffer, to destinationBuffer: AVAudioBuffer) throws {
        // input each buffer only once
        var newBufferAvailable = true

        let inputBlock : AVAudioConverterInputBlock = {
            inNumPackets, outStatus in
            if newBufferAvailable {
                outStatus.pointee = .haveData
                newBufferAvailable = false
                return sourceBuffer
            } else {
                outStatus.pointee = .noDataNow
                return nil
            }
        }

        var outError: NSError? = nil
        let status = withConverter.convert(to: destinationBuffer, error: &outError, withInputFrom: inputBlock)

        switch status {
        case .haveData: break
        case .inputRanDry: print("Input run dry")
        case .endOfStream: print("End of stream")
        case .error: print("!! Error")
        @unknown default: break
        }

        guard let error = outError else {
            return
        }
        throw error
    }
}
Run Code Online (Sandbox Code Playgroud)

TransportService

这基本上从传输队列中获取数据并通过套接字将其发送出去。每个数据包都由一个自定义标头处理,由于它无关紧要,因此不包括在内。

import Foundation
import SwiftSocket

class TransportService {

    fileprivate var stopped: Bool = false

    var socket: TCPClient?
    let config: DeviceConfiguration

    init(config: DeviceConfiguration) {
        self.config = config
    }

    func start() throws {
        stop()
        stopped = false
        socket = TCPClient(address: config.ipAddress, port: Int32(config.port))
        print("Connect to \(config.ipAddress) : \(config.port)")
        switch socket!.connect(timeout: 30) {
        case .success:
            DispatchQueue.global(qos: .userInitiated).async {
                self.transportData()
            }
        case .failure(let error):
            socket = nil
            throw error
        }
    }

    func stop() {
        stopped = true
        socket?.close()
        socket = nil
    }

    func transportData() {
        guard let socket = socket else { return }

        let headerLength = MemoryLayout<SPECIAL_HEADER>.size
        var header = SPECIAL_HEADER()
        // Populate the header properties
        // ...
        header.header_length = Int32(headerLength)
        header.sample_rate = 8000

        var count = 0

        repeat {
            guard let data = TransportQueue.shared.take() else { return }
            guard data.count > 0 else { return }

            header.packet_sn = Int32(count)
            header.data_length = Int32(data.count)

            var headerData = Data(bytes: &header, count: headerLength)
            headerData.append(data)
            print("\(count); \(header.data_length); \(headerData.count)")
            let result = socket.send(data: headerData)
            switch result {
            case .success: break
            case .failure(let error):
                log(error: "\(error)")
                return
            }
            count += 1


            Thread.sleep(forTimeInterval: 0.1)
        } while !stopped
        print("!! Stopped")
    }

}
Run Code Online (Sandbox Code Playgroud)

注意:我已经使用测试文件独立测试了传输服务并从中提取了每个服务,所以我知道这部分工作正常。这只是为了完成基本的整体画面。

问题...

  1. 我不断地Input run dry从我的编码器中获取信息,无论我在MicrophoneService
  2. 硬件端没有音频(同样,我已经使用静态文件测试了传输工作流程并且它确实有效)(不,我不控制硬件,但我有可以发送 AAC 文件的测试源代码, 一帧一帧, 使用TransportService它的工作)

要求...

音频必须是 8khz、单声道并使用 AAC 编码。

问题...

这是正确的方法还是我应该采取另一种方法来捕获音频,对其进行下采样并对其进行编码?

有什么我可以做的事情来限制Input ran dry或我不应该关心的次数吗?

归档时间:

查看次数:

683 次

最近记录:

6 年,9 月 前