从音频文件中提取仪表级别

Question

从音频文件中提取仪表级别

Pet*_*rbo 14 audio avaudioplayer audiotoolbox ios swift

我需要从文件中提取音频表级别,以便在播放音频之前渲染级别.我知道AVAudioPlayer可以在播放音频文件时获取此信息

func averagePower(forChannel channelNumber: Int) -> Float.

Run Code Online (Sandbox Code Playgroud)

但在我的情况下,我想[Float]事先获得一个米级.

Answer 1

iel*_*ani 13

斯威夫特4

需要一部iPhone：

0.538s来处理具有持续时间和采样率的8MBytemp3播放器4min47s44,100
0.170s来处理具有持续时间和采样率的712KBytemp3播放器22s44,100
0.089s处理caf通过afconvert -f caff -d LEI16 audio.mp3 audio.caf在终端中使用此命令转换上面的文件而创建的文件。

让我们开始：

A）声明将要包含有关音频资产的必要信息的此类：

/// Holds audio information used for building waveforms
final class AudioContext {

    /// The audio asset URL used to load the context
    public let audioURL: URL

    /// Total number of samples in loaded asset
    public let totalSamples: Int

    /// Loaded asset
    public let asset: AVAsset

    // Loaded assetTrack
    public let assetTrack: AVAssetTrack

    private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
        self.audioURL = audioURL
        self.totalSamples = totalSamples
        self.asset = asset
        self.assetTrack = assetTrack
    }

    public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) {
        let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])

        guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
            fatalError("Couldn't load AVAssetTrack")
        }

        asset.loadValuesAsynchronously(forKeys: ["duration"]) {
            var error: NSError?
            let status = asset.statusOfValue(forKey: "duration", error: &error)
            switch status {
            case .loaded:
                guard
                    let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
                    let audioFormatDesc = formatDescriptions.first,
                    let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
                    else { break }

                let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
                let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
                completionHandler(audioContext)
                return

            case .failed, .cancelled, .loading, .unknown:
                print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
            }

            completionHandler(nil)
        }
    }
}

Run Code Online (Sandbox Code Playgroud)

我们将使用其异步函数load，并将其结果处理给完成处理程序。

B）导入AVFoundation并Accelerate在您的视图控制器中：

import AVFoundation
import Accelerate

Run Code Online (Sandbox Code Playgroud)

C）声明视图控制器中的噪声级别（以dB为单位）：

let noiseFloor: Float = -80

Run Code Online (Sandbox Code Playgroud)

例如，任何比这-80dB还小的都将被视为沉默。

D）以下功能采用音频环境并产生所需的dB功率。targetSamples默认设置为100，您可以更改它以满足您的UI需求：

func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
    guard let audioContext = audioContext else {
        fatalError("Couldn't create the audioContext")
    }

    let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples/3

    guard let reader = try? AVAssetReader(asset: audioContext.asset)
        else {
            fatalError("Couldn't initialize the AVAssetReader")
    }

    reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
                                   duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))

    let outputSettingsDict: [String : Any] = [
        AVFormatIDKey: Int(kAudioFormatLinearPCM),
        AVLinearPCMBitDepthKey: 16,
        AVLinearPCMIsBigEndianKey: false,
        AVLinearPCMIsFloatKey: false,
        AVLinearPCMIsNonInterleaved: false
    ]

    let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
                                                outputSettings: outputSettingsDict)
    readerOutput.alwaysCopiesSampleData = false
    reader.add(readerOutput)

    var channelCount = 1
    let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
    for item in formatDescriptions {
        guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
            fatalError("Couldn't get the format description")
        }
        channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
    }

    let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
    let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)

    var outputSamples = [Float]()
    var sampleBuffer = Data()

    // 16-bit samples
    reader.startReading()
    defer { reader.cancelReading() }

    while reader.status == .reading {
        guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
            let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
                break
        }
        // Append audio sample buffer into our current sample buffer
        var readBufferLength = 0
        var readBufferPointer: UnsafeMutablePointer<Int8>?
        CMBlockBufferGetDataPointer(readBuffer, 0, &readBufferLength, nil, &readBufferPointer)
        sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
        CMSampleBufferInvalidate(readSampleBuffer)

        let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
        let downSampledLength = totalSamples / samplesPerPixel
        let samplesToProcess = downSampledLength * samplesPerPixel

        guard samplesToProcess > 0 else { continue }

        processSamples(fromData: &sampleBuffer,
                       outputSamples: &outputSamples,
                       samplesToProcess: samplesToProcess,
                       downSampledLength: downSampledLength,
                       samplesPerPixel: samplesPerPixel,
                       filter: filter)
        //print("Status: \(reader.status)")
    }

    // Process the remaining samples at the end which didn't fit into samplesPerPixel
    let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
    if samplesToProcess > 0 {
        let downSampledLength = 1
        let samplesPerPixel = samplesToProcess
        let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)

        processSamples(fromData: &sampleBuffer,
                       outputSamples: &outputSamples,
                       samplesToProcess: samplesToProcess,
                       downSampledLength: downSampledLength,
                       samplesPerPixel: samplesPerPixel,
                       filter: filter)
        //print("Status: \(reader.status)")
    }

    // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
    guard reader.status == .completed || true else {
        fatalError("Couldn't read the audio file")
    }

    return outputSamples
}

Run Code Online (Sandbox Code Playgroud)

E） render使用此功能对音频文件中的数据进行降采样，然后转换为分贝：

func processSamples(fromData sampleBuffer: inout Data,
                    outputSamples: inout [Float],
                    samplesToProcess: Int,
                    downSampledLength: Int,
                    samplesPerPixel: Int,
                    filter: [Float]) {
    sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in
        var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)

        let sampleCount = vDSP_Length(samplesToProcess)

        //Convert 16bit int samples to floats
        vDSP_vflt16(samples, 1, &processingBuffer, 1, sampleCount)

        //Take the absolute values to get amplitude
        vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)

        //get the corresponding dB, and clip the results
        getdB(from: &processingBuffer)

        //Downsample and average
        var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
        vDSP_desamp(processingBuffer,
                    vDSP_Stride(samplesPerPixel),
                    filter, &downSampledData,
                    vDSP_Length(downSampledLength),
                    vDSP_Length(samplesPerPixel))

        //Remove processed samples
        sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)

        outputSamples += downSampledData
    }
}

Run Code Online (Sandbox Code Playgroud)

F）依次调用此函数以获取相应的dB，并将结果裁剪为[noiseFloor, 0]：

func getdB(from normalizedSamples: inout [Float]) {
    // Convert samples to a log scale
    var zero: Float = 32768.0
    vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)

    //Clip to [noiseFloor, 0]
    var ceil: Float = 0.0
    var noiseFloorMutable = noiseFloor
    vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}

Run Code Online (Sandbox Code Playgroud)

G）最后，您可以像下面这样获得音频的波形：

guard let path = Bundle.main.path(forResource: "audio", ofType:"mp3") else {
    fatalError("Couldn't find the file path")
}
let url = URL(fileURLWithPath: path)
var outputArray : [Float] = []
AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
    guard let audioContext = audioContext else {
        fatalError("Couldn't create the audioContext")
    }
    outputArray = self.render(audioContext: audioContext, targetSamples: 300)
})

Run Code Online (Sandbox Code Playgroud)

不要忘记那AudioContext.load(fromAudioURL:)是异步的。

此解决方案是由William Entriken从此回购中合成的。所有的荣誉归他所有。

迅捷5

这是更新为Swift 5语法的相同代码：

import AVFoundation import Accelerate /// Holds audio information used for building waveforms final class AudioContext { /// The audio asset URL used to load the context public let audioURL: URL /// Total number of samples in loaded asset public let totalSamples: Int /// Loaded asset public let asset: AVAsset // Loaded assetTrack public let assetTrack: AVAssetTrack private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) { self.audioURL = audioURL self.totalSamples = totalSamples self.asset = asset self.assetTrack = assetTrack } public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) { let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)]) guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else { fatalError("Couldn't load AVAssetTrack") } asset.loadValuesAsynchronously(forKeys: ["duration"]) { var error: NSError? let status = asset.statusOfValue(forKey: "duration", error: &error) switch status { case .loaded: guard let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription], let audioFormatDesc = formatDescriptions.first, let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc) else { break } let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale)) let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack) completionHandler(audioContext) return case .failed, .cancelled, .loading, .unknown: print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")") } completionHandler(nil) } } } let noiseFloor: Float = -80 func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{ guard let audioContext = audioContext else { fatalError("Couldn't create the audioContext") } let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples/3 guard let reader = try? AVAssetReader(asset: audioContext.asset) else { fatalError("Couldn't initialize the AVAssetReader") } reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale), duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale)) let outputSettingsDict: [String : Any] = [ AVFormatIDKey: Int(kAudioFormatLinearPCM), AVLinearPCMBitDepthKey: 16, AVLinearPCMIsBigEndianKey: false, AVLinearPCMIsFloatKey: false, AVLinearPCMIsNonInterleaved: false ] let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack, outputSettings: outputSettingsDict) readerOutput.alwaysCopiesSampleData = false reader.add(readerOutput) var channelCount = 1 let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription] for item in formatDescriptions { guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else { fatalError("Couldn't get the format description") } channelCount = Int(fmtDesc.pointee.mChannelsPerFrame) } let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples) let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel) var outputSamples = [Float]() var sampleBuffer = Data() // 16-bit samples reader.startReading() defer { reader.cancelReading() } while reader.status == .reading { guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(), let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else { break } // Append audio sample buffer into our current sample buffer var readBufferLength = 0 var readBufferPointer: UnsafeMutablePointer<Int8>? CMBlockBufferGetDataPointer(readBuffer, atOffset: 0, lengthAtOffsetOut: &readBufferLength, totalLengthOut: nil, dataPointerOut: &readBufferPointer) sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength)) CMSampleBufferInvalidate(readSampleBuffer) let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size let downSampledLength = totalSamples / samplesPerPixel let samplesToProcess = downSampledLength * samplesPerPixel guard samplesToProcess > 0 else { continue } processSamples(fromData: &sampleBuffer, outputSamples: &outputSamples, samplesToProcess: samplesToProcess, downSampledLength: downSampledLength, samplesPerPixel: samplesPerPixel, filter: filter) //print("Status: \(reader.status)") } // Process the remaining samples at the end which didn't fit into samplesPerPixel let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size if samplesToProcess > 0 { let downSampledLength = 1 let samplesPerPixel = samplesToProcess let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel) processSamples(fromData: &sampleBuffer, outputSamples: &outputSamples, samplesToProcess: samplesToProcess, downSampledLength: downSampledLength, samplesPerPixel: samplesPerPixel, filter: filter) //print("Status: \(reader.status)") } // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown) guard reader.status == .completed || true else { fatalError("Couldn't read the audio file") } return outputSamples } func processSamples(fromData sampleBuffer: inout Data, outputSamples: inout [Float], samplesToProcess: Int, downSampledLength: Int, samplesPerPixel: Int, filter: [Float]) { sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess) let sampleCount = vDSP_Length(samplesToProcess) //Create an UnsafePointer<Int16> from samples let unsafeBufferPointer = samples.bindMemory(to: Int16.self) let unsafePointer = unsafeBufferPointer.baseAddress! //Convert 16bit int samples to floats vDSP_vflt16(unsafePointer, 1, &processingBuffer, 1, sampleCount) //Take the absolute values to get amplitude vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount) //get the corresponding dB, and clip the results getdB(from: &processingBuffer) //Downsample and average var downSampledData = [Float](repeating: 0.0, count: downSampledLength) vDSP_desamp(processingBuffer, vDSP_Stride(samplesPerPixel), filter, &downSampledData, vDSP_Length(downSampledLength), vDSP_Length(samplesPerPixel)) //Remove processed samples sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size) outputSamples += downSampledData } } func getdB(from normalizedSamples: inout [Float]) { // Convert samples to a log scale var zero: Float = 32768.0 vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1) //Clip to [noiseFloor, 0] var ceil: Float = 0.0 var noiseFloorMutable = noiseFloor vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count)) }
Run Code Online (Sandbox Code Playgroud)

旧解决方案

您可以使用以下功能来预渲染音频文件的电平，而无需播放它：

func averagePowers(audioFileURL: URL, forChannel channelNumber: Int, completionHandler: @escaping(_ success: [Float]) -> ()) { let audioFile = try! AVAudioFile(forReading: audioFileURL) let audioFilePFormat = audioFile.processingFormat let audioFileLength = audioFile.length //Set the size of frames to read from the audio file, you can adjust this to your liking let frameSizeToRead = Int(audioFilePFormat.sampleRate/20) //This is to how many frames/portions we're going to divide the audio file let numberOfFrames = Int(audioFileLength)/frameSizeToRead //Create a pcm buffer the size of a frame guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFilePFormat, frameCapacity: AVAudioFrameCount(frameSizeToRead)) else { fatalError("Couldn't create the audio buffer") } //Do the calculations in a background thread, if you don't want to block the main thread for larger audio files DispatchQueue.global(qos: .userInitiated).async { //Th

Answer 2

Jak*_*kub 10

首先,这是繁重的操作,因此需要一些操作系统时间和资源来完成此操作.在下面的示例中,我将使用标准帧速率和采样,但如果您例如只想显示条形图作为指示,您应该采用远远少于采样的示例

好的,所以你不需要播放声音来分析它.所以在这个我根本不会使用AVAudioPlayer我假设我会跟踪URL:

    let path = Bundle.main.path(forResource: "example3.mp3", ofType:nil)!
    let url = URL(fileURLWithPath: path)

Run Code Online (Sandbox Code Playgroud)

然后我将使用AVAudioFile将跟踪信息导入AVAudioPCMBuffer.每当你在缓冲区中拥有它时,你都拥有关于你的轨道的所有信息:

func buffer(url: URL) {
    do {
        let track = try AVAudioFile(forReading: url)
        let format = AVAudioFormat(commonFormat:.pcmFormatFloat32, sampleRate:track.fileFormat.sampleRate, channels: track.fileFormat.channelCount,  interleaved: false)
        let buffer = AVAudioPCMBuffer(pcmFormat: format!, frameCapacity: UInt32(track.length))!
        try track.read(into : buffer, frameCount:UInt32(track.length))
        self.analyze(buffer: buffer)
    } catch {
        print(error)
    }
}

Run Code Online (Sandbox Code Playgroud)

你可能会注意到有analyze方法.您应该在缓冲区中接近floatChannelData变量.这是一个简单的数据,因此您需要解析它.我将发布一个方法,下面解释一下:

func analyze(buffer: AVAudioPCMBuffer) {
    let channelCount = Int(buffer.format.channelCount)
    let frameLength = Int(buffer.frameLength)
    var result = Array(repeating: [Float](repeatElement(0, count: frameLength)), count: channelCount)
    for channel in 0..<channelCount {
        for sampleIndex in 0..<frameLength {
            let sqrtV = sqrt(buffer.floatChannelData![channel][sampleIndex*buffer.stride]/Float(buffer.frameLength))
            let dbPower = 20 * log10(sqrtV)
            result[channel][sampleIndex] = dbPower
        }
    }
}

Run Code Online (Sandbox Code Playgroud)

其中涉及一些计算(重一点).当我在几个月前研究类似的解决方案时,我遇到了这个教程:https://www.raywenderlich.com/5154-avaudioengine-tutorial-for-ios-getting-started那里有这种计算的优秀解释我粘贴在上面并在我的项目中使用的部分代码,所以我想在这里归功于作者:Scott McAlister

归档时间：	7 年，6 月前
查看次数：	1787 次
最近记录：	6 年，5 月前