使用Aubio实时采样音频,无需停止录制音频和视频iPhone/iPad

Bri*_*den 9 audio objective-c ios aubio swift

Swift 2.2
Xcode 7.3
Aubio 0.4.3(aubio-0.4.3~const.iosuniversal_framework)
iOS 9.3目标
测试设备 - iPad Air
bufferSize:2048
numSamplesInBuffer:1024
采样率:44100

注意事项:

我故意在我即将发布的代码示例中留下AVCaptureVideo代码,以便任何人更简单地阅读我的问题都不会忘记我尝试使用相同的录制AvCaptureSession捕获音频和视频并实时采样音频
我已经完全测试了Aubio - > Onset,特别是使用了sample.caf(核心音频格式)声音文件以及使用AvAudioRecorder保存到文件(也是.caf)的录音,它可以在真实设备上正常工作(iPad Air) ).Aubio在测试中工作的一个非常重要的考虑因素是我使用new_aubio_source创建了一个基于URI或文件的样本.在我的"真实"版本中,我试图在不将音频数据保存到文件的情况下对声音缓冲区进行采样.
使用Aubio的可能替代方法.如果我可以开始将AudioBuffers存储为有效的核心音频格式(.caf)文件,Aubio会工作,不确定使用基于文件的解决方案是否足够快,但经过数天的研究后我还没想出如何存储func captureOutput(captureOutput: AVCaptureOutput, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection)CmSampleBufferRefs提交.这包括使用NSData,它永远不会将有效的.caf存储到文件中.
与之前的警告相关,我还没有找到一种方法来使用AvFoundation超级有用的对象,如AVAudioRecorder(它将存储一个很好的.caf文件),因为它取决于你停止录制/捕获会话.
如果您删除所有视频捕获代码,您可以在模拟器上运行,请在下面评论,如果您希望您没有Apple设备,我将准备一个模拟器版本的代码.必须在实时设备上测试相机功能.

以下代码成功启动了音频和视频AVCaptureSession,AVCaptureSession委托func captureOutput(captureOutput: AVCaptureOutput, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection)正在为音频和视频调用.当一个.提供音频CMSampleBufferRef示例我试图将该示例转换为AudioBuffer并传递给Aubio方法aubio_onset_do.我正在使用单例aubio_onset COpaquePointer.

在这段代码中,我尝试用两种不同的方式用音频缓冲区数据调用aubio_onset_do.

方法1 - 下面代码的当前方式是let useTimerAndNSMutableData = false.这意味着在我的prepareAudioBuffer函数中我将audioBuffer.mData传递给sampleAudioForOnsets.这种方法永远不会失败,但也没有检测到任何启动,我怀疑因为样本量不够大.

方法2如果let useTimerAndNSMutableData = true我sampleAudioForOnsets每隔1秒调用一次最终调用,允许有时间使用AudioBuffer.mDatas构建NSMutableData.使用这种方法,我试图给aubio_onset_do一个足够大的样本来检测onsets,使用一个计时器和NSMutableData这个方法导致aubio_onset_do很快崩溃:

(EXC_BAD_ACCESS(code = 1))

import UIKit
import AVFoundation

class AvRecorderViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate, AVCaptureAudioDataOutputSampleBufferDelegate, AVAudioRecorderDelegate, AVAudioPlayerDelegate {


    var captureSession: AVCaptureSession!
    var imageView:UIImageView!
    var customLayer:CALayer!
    var prevLayer:AVCaptureVideoPreviewLayer!

    let samplingFrequency = Int32(30)
    var aubioOnset:COpaquePointer? = nil
    let pathToSoundSample = FileUtility.getPathToAudioSampleFile()
    var onsetCount = 0
    let testThres:smpl_t = 0.03
    let nsMutableData: NSMutableData = NSMutableData()
    var sampleRate:UInt32!
    var bufferSize:UInt32!
    let useTimerAndNSMutableData = false

    override func viewDidLoad() {
        super.viewDidLoad()

        if FileUtility.fileExistsAtPath(pathToSoundSample) {
            print("sample file exists")
            FileUtility.deleteFileByNsurl(NSURL(fileURLWithPath: pathToSoundSample))
        }
        setupCapture()

        if useTimerAndNSMutableData {
            //create timer for sampling audio
            NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: #selector(timerFiredPrepareForAubioOnsetSample), userInfo: nil, repeats: true)
        }
    }

    override func didReceiveMemoryWarning() {
        super.didReceiveMemoryWarning()
        // Dispose of any resources that can be recreated.
    }

    override func viewWillTransitionToSize(size: CGSize, withTransitionCoordinator coordinator: UIViewControllerTransitionCoordinator) {
        super.viewWillTransitionToSize(size, withTransitionCoordinator: coordinator)

        coordinator.animateAlongsideTransition({ (context) -> Void in

            }, completion: { (context) -> Void in

        })
    }

    override func viewWillLayoutSubviews() {
        prevLayer.frame = self.view.bounds

        if prevLayer.connection.supportsVideoOrientation {
            prevLayer.connection.videoOrientation = MediaUtility.interfaceOrientationToVideoOrientation(UIApplication.sharedApplication().statusBarOrientation)
        }
    }

    func timerFiredPrepareForAubioOnsetSample() {
        if nsMutableData.length <= 0 {
            return
        }

        let data = UnsafeMutablePointer<smpl_t>(nsMutableData.bytes)
        sampleAudioForOnsets(data, length: UInt32(nsMutableData.length))
    }

    func setupCapture() {
        let captureDeviceVideo: AVCaptureDevice = AVCaptureDevice.defaultDeviceWithMediaType(AVMediaTypeVideo)
        let captureDeviceAudio: AVCaptureDevice = AVCaptureDevice.defaultDeviceWithMediaType(AVMediaTypeAudio)
        var captureVideoInput: AVCaptureDeviceInput
        var captureAudioInput: AVCaptureDeviceInput

        //video setup
        if captureDeviceVideo.isTorchModeSupported(.On) {
            try! captureDeviceVideo.lockForConfiguration()

            /*if captureDeviceVideo.position == AVCaptureDevicePosition.Front {
                captureDeviceVideo.position == AVCaptureDevicePosition.Back
            }*/

            //configure frame rate
            /*We specify a minimum duration for each frame (play with this settings to avoid having too many frames waiting
             in the queue because it can cause memory issues). It is similar to the inverse of the maximum framerate.
             In this example we set a min frame duration of 1/10 seconds so a maximum framerate of 10fps. We say that
             we are not able to process more than 10 frames per second.*/
            captureDeviceVideo.activeVideoMaxFrameDuration = CMTimeMake(1, samplingFrequency)
            captureDeviceVideo.activeVideoMinFrameDuration = CMTimeMake(1, samplingFrequency)
            captureDeviceVideo.unlockForConfiguration()
        }

        //try and create audio and video inputs
        do {
            try captureVideoInput = AVCaptureDeviceInput(device: captureDeviceVideo)
            try captureAudioInput = AVCaptureDeviceInput(device: captureDeviceAudio)

        } catch {
            print("cannot record")
            return
        }

        /*setup the output*/
        let captureVideoDataOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
        let captureAudioDataOutput: AVCaptureAudioDataOutput = AVCaptureAudioDataOutput()

        /*While a frame is processes in -captureVideoDataOutput:didOutputSampleBuffer:fromConnection: delegate methods no other frames are added in the queue.
         If you don't want this behaviour set the property to false */
        captureVideoDataOutput.alwaysDiscardsLateVideoFrames = true

        // Set the video output to store frame in BGRA (It is supposed to be faster)
        let videoSettings: [NSObject : AnyObject] = [kCVPixelBufferPixelFormatTypeKey:Int(kCVPixelFormatType_32BGRA)]

        captureVideoDataOutput.videoSettings = videoSettings

        /*And we create a capture session*/
        captureSession = AVCaptureSession()

        //and configure session
        captureSession.sessionPreset = AVCaptureSessionPresetHigh

        /*We add audio/video input and output to session*/
        captureSession.addInput(captureVideoInput)
        captureSession.addInput(captureAudioInput)
        captureSession.addOutput(captureVideoDataOutput)
        captureSession.addOutput(captureAudioDataOutput)

        //not sure if I need this or not, found on internet
        captureSession.commitConfiguration()


        /*We create a serial queue to handle the processing of our frames*/
        var queue: dispatch_queue_t
        queue = dispatch_queue_create("queue", DISPATCH_QUEUE_SERIAL)

        //setup delegate
        captureVideoDataOutput.setSampleBufferDelegate(self, queue: queue)
        captureAudioDataOutput.setSampleBufferDelegate(self, queue: queue)


        /*We add the Custom Layer (We need to change the orientation of the layer so that the video is displayed correctly)*/
        customLayer = CALayer()
        customLayer.frame = self.view.bounds
        customLayer.transform = CATransform3DRotate(CATransform3DIdentity, CGFloat(M_PI) / 2.0, 0, 0, 1)
        customLayer.contentsGravity = kCAGravityResizeAspectFill
        view.layer.addSublayer(self.customLayer)

        /*We add the imageView*/
        imageView = UIImageView()
        imageView.frame = CGRectMake(0, 0, 100, 100)
        view!.addSubview(self.imageView)

        /*We add the preview layer*/
        prevLayer = AVCaptureVideoPreviewLayer()
        prevLayer = AVCaptureVideoPreviewLayer(session: self.captureSession)
        prevLayer.frame = CGRectMake(100, 0, 100, 100)
        prevLayer.videoGravity = AVLayerVideoGravityResizeAspectFill
        view.layer.addSublayer(self.prevLayer)

        /*We start the capture*/
        captureSession.startRunning()

    }

    // MARK: AVCaptureSession delegates

    func captureOutput(captureOutput: AVCaptureOutput, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection) {

        if (captureOutput is AVCaptureAudioDataOutput) {
            prepareAudioBuffer(sampleBuffer)
        }

        //not relevant to my Stack Overflow question
        /*if (captureOutput is AVCaptureVideoDataOutput) {
            displayVideo(sampleBuffer)
        }*/

    }

    func captureOutput(captureOutput: AVCaptureOutput!, didDropSampleBuffer sampleBuffer: CMSampleBuffer!, fromConnection connection: AVCaptureConnection!) {
        print("frame dropped")
    }

    private func sampleAudioForOnsets(data: UnsafeMutablePointer<smpl_t>, length: UInt32) {
        print("\(#function)")

        //let samples = new_fvec(512)
        var total_frames : uint_t = 0
        let out_onset = new_fvec (1)
        var read : uint_t = 0

        //singleton of aubio_onset
        if aubioOnset == nil {
            let method = ("default" as NSString).UTF8String
            aubioOnset = new_aubio_onset(UnsafeMutablePointer<Int8>(method), bufferSize, 512, UInt32(sampleRate))
            aubio_onset_set_threshold(aubioOnset!, testThres)
        }

        var sample: fvec_t = fvec_t(length: length, data: data)

        //do not need the while loop but I have left it in here because it will be quite familiar to people that have used Aubio before and may help jog their
        //memory, such as reminding people familiar with Aubio that the aubio_source_do is normally used to "seek" through a sample
        while true {
            //aubio_source_do(COpaquePointer(source), samples, &read)

            //new_aubio_onset hop_size is 512, will aubio_onset_do move through a fvec_t sample at a 512 hop without an aubio_source_do call?
            aubio_onset_do(aubioOnset!, &sample, out_onset)

            if (fvec_get_sample(out_onset, 0) != 0) {
                print(String(format: ">>> %.2f", aubio_onset_get_last_s(aubioOnset!)))
                onsetCount += 1
            }

            total_frames += read

            //always will break the first iteration, only reason for while loop is to demonstate the normal use of aubio using aubio_source_do to read
            if (read < 512) {
                break
            }
        }

        print("done, total onsetCount: \(onsetCount)")

        if onsetCount > 1 {
            print("we are getting onsets")
        }
    }

    // MARK: - Private Helpers

    private func prepareAudioBuffer(sampleBuffer: CMSampleBufferRef) {

        let numSamplesInBuffer = CMSampleBufferGetNumSamples(sampleBuffer)
        bufferSize = UInt32(CMSampleBufferGetTotalSampleSize(sampleBuffer))
        var blockBuffer:CMBlockBufferRef? = nil
        var audioBufferList = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer(mNumberChannels: 0, mDataByteSize: 0, mData: nil))
        var status:OSStatus
        let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer)!
        let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription)
        sampleRate = UInt32(asbd.memory.mSampleRate)

        print("bufferSize: \(bufferSize)")
        print("numSamplesInBuffer: \(numSamplesInBuffer)")
        print("Sample Rate: \(sampleRate)")
        print("assetWriter.status: ")

        status = CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
            sampleBuffer,
            nil,
            &audioBufferList,
            sizeof(audioBufferList.dynamicType), // instead of UInt(sizeof(audioBufferList.dynamicType))
            nil,
            nil,
            UInt32(kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment),
            &blockBuffer
        )


        let audioBuffers = UnsafeBufferPointer<AudioBuffer>(start: &audioBufferList.mBuffers, count: Int(audioBufferList.mNumberBuffers))

        for audioBuffer in audioBuffers {

            if useTimerAndNSMutableData {
                //NSDATA APPEND, NSMutableData is building and will be analyzed at timer interbal
                let frame = UnsafePointer<Float32>(audioBuffer.mData)
                nsMutableData.appendBytes(frame, length: Int(audioBuffer.mDataByteSize))
            }else{
                //this never fails but there are never any onsets either, cannot tell if the audio sampling is just not long enough
                //or if the data really isn't valid data
                //smpl_t is a Float
                let data = UnsafeMutablePointer<smpl_t>(audioBuffer.mData)
                sampleAudioForOnsets(data, length: audioBuffer.mDataByteSize)
            }

        }

    }

}

Run Code Online (Sandbox Code Playgroud)

归档时间：	9 年，4 月前
查看次数：	712 次
最近记录：	9 年，4 月前