使用 AVMutableComposition 和 AVAssetExportSession 创建的 MP4 视频可以在 Quicktime 中使用,但在所有其他视频工具中显示为损坏

Jam*_*ter 3 mp4 ffmpeg avfoundation avmutablecomposition swift

我正在尝试在 macOS 上使用 Swift、AVFoundation 和 AVKit 将多个电影的片段一起编辑到一个剪辑中。下面的 Swift 代码是我正在尝试做的事情的一个很好的例子:

\n\n
import AVFoundation\nimport AVKit\n\nlet source1 = AVAsset(url: URL(string: "https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4")!)\nlet source2 = AVAsset(url: URL(string: "http://techslides.com/demos/sample-videos/small.mp4")!)\n\nlet comp = AVMutableComposition()\n\ncomp.addMutableTrack(withMediaType: .video, preferredTrackID: kCMPersistentTrackID_Invalid)\ncomp.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid)\n\nfunc cmtime(_ i: Double) -> CMTime {\n    return CMTime(seconds: i, preferredTimescale: 600)\n}\n\nfunc insertSecond(into: AVMutableComposition, from: AVAsset, start: CMTime, at: CMTime) throws {\n    let videoTrack = into.tracks(withMediaType: .video).first!\n    let audioTrack = into.tracks(withMediaType: .audio).first!\n    try videoTrack.insertTimeRange(\n        CMTimeRange(start: start , duration: cmtime(1.0)),\n        of: from.tracks(withMediaType: .video).first!,\n        at: at\n    )\n    try audioTrack.insertTimeRange(\n        CMTimeRange(start: start, duration: cmtime(1.0)),\n        of: from.tracks(withMediaType: .audio).first!,\n        at: at\n    )\n}\n\ntry insertSecond(into: comp, from: source1, start: cmtime(3.0), at: cmtime(0.0))\ntry insertSecond(into: comp, from: source2, start: cmtime(2.0), at: cmtime(1.0))\ntry insertSecond(into: comp, from: source1, start: cmtime(100.0), at: cmtime(2.0))\ntry insertSecond(into: comp, from: source2, start: cmtime(3.0), at: cmtime(3.0))\ntry insertSecond(into: comp, from: source1, start: cmtime(350.0), at: cmtime(4.0))\n\n\nif let sess = AVAssetExportSession(asset: comp, presetName: "AVAssetExportPresetHighestQuality") {\n    sess.outputURL = URL(fileURLWithPath: "/tmp/output.mp4")\n    sess.outputFileType = .mp4\n    sess.exportAsynchronously {\n        print("done")\n        print(sess.error ?? "success")\n    }\n}\n
Run Code Online (Sandbox Code Playgroud)\n\n

运行此代码确实会output.mp4成功生成一个文件,并且该文件可以在 Quicktime 中毫无问题地播放。您应该能够将上述代码粘贴到 Playground 中以重现视频(源视频都是托管在网络上的公开可用的示例视频)。我还在这里将其上传到 S3,因此您可以下载并分析它,而无需自己运行代码。

\n\n

但是,尝试使用任何其他视频软件打开或处理它会导致错误。

\n\n

VLC 将尝试播放该文件,但播放起来非常困难。视频经常卡顿,与音频不同步,包含 Quicktime 根本不显示的帧,并且完全跳过某些部分。

\n\n

Firefox 也会尝试播放文件,但显然无法正确解码并且视频输出有问题。Chrome 在播放第一秒后就冻结了。

\n\n

ffprobe我尝试使用和进一步诊断ffmpeg

\n\n

跑步ffprobe -show_frames output.mp4 1>/dev/null节目:

\n\n
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x7fe841801800] DTS -24000 < 24000 out of order\n[h264 @ 0x7fe843022800] reference count overflow\n[h264 @ 0x7fe843022800] decode_slice_header error\n[h264 @ 0x7fe843022800] no frame!\n[h264 @ 0x7fe843022800] deblocking_filter_idc 6 out of range\n[h264 @ 0x7fe843022800] decode_slice_header error\n[h264 @ 0x7fe843022800] no frame!\n[h264 @ 0x7fe843022800] deblocking_filter_idc 6 out of range\n[h264 @ 0x7fe843022800] decode_slice_header error\n[h264 @ 0x7fe843022800] no frame!\n[h264 @ 0x7fe843022800] top block unavailable for requested intra mode -1\n[h264 @ 0x7fe843022800] error while decoding MB 5 0, bytestream 947\n[h264 @ 0x7fe843022800] concealing 3600 DC, 3600 AC, 3600 MV errors in P frame\n[h264 @ 0x7fe843022800] mmco: unref short failure\n[h264 @ 0x7fe843022800] cabac_init_idc 4 overflow\n[h264 @ 0x7fe843022800] decode_slice_header error\n[h264 @ 0x7fe843022800] no frame!\n[h264 @ 0x7fe843022800] deblocking filter parameters -43 0 out of range\n[h264 @ 0x7fe843022800] decode_slice_header error\n[h264 @ 0x7fe843022800] no frame!\n
Run Code Online (Sandbox Code Playgroud)\n\n

尝试使用 ffmpeg ( ) 转码为另一种格式ffmpeg -i output.mp4 output.avi会出现很多警告和错误:

\n\n
[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 73, current: 71; changing to 74. This may result in incorrect timestamps in the output file.\n[mov,mp4,m4a,3gp,3g2,mj2 @ 0x7fe1f4802800] DTS -24000 < 24000 out of order\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 74, current: 72; changing to 75. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 75, current: 73; changing to 76. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 76, current: 74; changing to 77. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 77, current: 75; changing to 78. This may result in incorrect timestamps in the output file.\n[h264 @ 0x7fe1f4849600] reference count overflow\n[h264 @ 0x7fe1f4849600] decode_slice_header error\n[h264 @ 0x7fe1f4849600] no frame!\nError while decoding stream #0:1: Invalid data found when processing input\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 145, current: 143; changing to 146. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 146, current: 144; changing to 147. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 147, current: 145; changing to 148. This may result in incorrect timestamps in the output file.\n[h264 @ 0x7fe1f483d800] deblocking_filter_idc 6 out of range\n[h264 @ 0x7fe1f483d800] decode_slice_header error\n[h264 @ 0x7fe1f483d800] no frame!\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 148, current: 146; changing to 149. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 149, current: 147; changing to 150. This may result in incorrect timestamps in the output file.\n[h264 @ 0x7fe1f4849600] deblocking_filter_idc 6 out of range\n[h264 @ 0x7fe1f4849600] decode_slice_header error\n[h264 @ 0x7fe1f4849600] no frame!\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 150, current: 148; changing to 151. This may result in incorrect timestamps in the output file.\nError while decoding stream #0:1: Invalid data found when processing input\n    Last message repeated 1 times\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 151, current: 149; changing to 152. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 152, current: 150; changing to 153. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 153, current: 151; changing to 154. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 154, current: 152; changing to 155. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 155, current: 153; changing to 156. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 156, current: 154; changing to 157. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 157, current: 155; changing to 158. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 158, current: 156; changing to 159. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 159, current: 157; changing to 160. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 160, current: 158; changing to 161. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 161, current: 159; changing to 162. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 162, current: 160; changing to 163. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 163, current: 161; changing to 164. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 164, current: 162; changing to 165. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 165, current: 163; changing to 166. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 166, current: 164; changing to 167. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 167, current: 165; changing to 168. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 168, current: 166; changing to 169. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 169, current: 167; changing to 170. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 170, current: 168; changing to 171. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 171, current: 169; changing to 172. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 172, current: 170; changing to 173. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 173, current: 171; changing to 174. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 174, current: 172; changing to 175. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 175, current: 173; changing to 176. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 176, current: 174; changing to 177. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 177, current: 175; changing to 178. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 178, current: 176; changing to 179. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 179, current: 177; changing to 180. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 180, current: 178; changing to 181. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 181, current: 179; changing to 182. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 182, current: 180; changing to 183. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 183, current: 181; changing to 184. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 184, current: 182; changing to 185. This may result in incorrect timestamps in the output file.\n[h264 @ 0x7fe1f483d800] top block unavailable for requested intra mode -1\n[h264 @ 0x7fe1f483d800] error while decoding MB 5 0, bytestream 947\n[h264 @ 0x7fe1f483d800] concealing 3600 DC, 3600 AC, 3600 MV errors in P frame\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 185, current: 183; changing to 186. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 186, current: 184; changing to 187. This may result in incorrect timestamps in the output file.\n[h264 @ 0x7fe1f4849600] mmco: unref short failure\n[h264 @ 0x7fe1f4849600] cabac_init_idc 4 overflow\n[h264 @ 0x7fe1f4849600] decode_slice_header error\n[h264 @ 0x7fe1f4849600] no frame!\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 187, current: 185; changing to 188. This may result in incorrect timestamps in the output file.\n[h264 @ 0x7fe1f485fa00] deblocking filter parameters -43 0 out of range\n[h264 @ 0x7fe1f485fa00] decode_slice_header error\n[h264 @ 0x7fe1f485fa00] no frame!\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 188, current: 186; changing to 189. This may result in incorrect timestamps in the output file.\nError while decoding stream #0:1: Invalid data found when processing input\n    Last message repeated 1 times\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 189, current: 187; changing to 190. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 190, current: 188; changing to 191. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 191, current: 189; changing to 192. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 192, current: 190; changing to 193. This may result in incorrect timestamps in the output file.\n[avi @ 0x7fe1f5804e00] Non-monotonous DTS in output stream 0:1; previous: 193, current: 191; changing to 194. This may result in incorrect timestamps in the output file.\n
Run Code Online (Sandbox Code Playgroud)\n\n

上面的代码只是一个示例,我在该代码的许多变体中都看到了不同严重程度的类似问题。我尝试过很多事情,包括:

\n\n
    \n
  • 使用本地文件 URL 而不是 HTTPS url
  • \n
  • 使用.mov而不是.mp4文件
  • \n
  • 使用AVMovieand AVMutableMovie(以及各种设置调整,例如设置AVURLAssetPreferPreciseDurationAndTimingKey为 true)而不是AVMutableComposition
  • \n
  • 调整设置(预设等)AVAssetExportSession
  • \n
  • CMTime以不同的方式构造对象
  • \n
\n\n

但无济于事\xe2\x80\x94我似乎无法让 AVFoundation 生成其他工具可以处理的视频文件。

\n\n

任何帮助都值得赞赏,即使只是关于输出文件编码的异常情况的任何想法,如果您不能或不想运行上述 Swift 代码来自己重现它,您可以在此处下载该文件。

\n

NoH*_*its 5

我同意其他播放器中的播放问题源于每个轨道有多个格式描述。但实际上,在合成之前不需要对曲目进行昂贵的转码,AVFoundation 可以为您做到这一点......如果您愿意跳过一些麻烦。

关键是一个AVMutableComposition特定媒体类型可以有多个轨道,并且AVAssetExportSession可以将这些组合“混合”为每种媒体类型的一个轨道。AVFoundation 通过提供mutableTrackCompatibleWithTrack:. 因此,当您想要从给定的源轨道插入片段时,您可以请求AVMutableComposition合适的目标轨道,如果没有返回,则添加一个新轨道。

如前所述,有以下几点需要牢记:

  • 您无法将给定目标轨道插入到其当前末尾之外的“无效时间”中的某个位置。要解决此问题,请记下目标轨道的当前结束时间,在该时间附加片段,然后在前一个轨道结束处插入正确持续时间的空片段。下面的示例显示了这一点,在简化的假设下,您总是附加。如果您想插入现有轨道中的任何位置,则需要更复杂的逻辑。

  • 实际上,必须将AVAssetExportSession所有内容混合为每种媒体类型只有一个轨道,您必须在导出会话上设置 AVAudioMix 和 AVVideoComposition。

下面的示例代码基于您的原始示例,生成一个 output.mp4,它可以在 VLC、Chrome 和 Firefox 中正确播放,并且在使用 ffmpeg 检查时不会抛出任何错误。

import AVFoundation
import Foundation

let source0 = AVAsset(url: URL(string: "https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4")!)
let source1 = AVAsset(url: URL(string: "http://techslides.com/demos/sample-videos/small.mp4")!)

let comp = AVMutableComposition()

func cmtime(_ i: Double) -> CMTime {
    return CMTime(seconds: i, preferredTimescale: 600)
}

func insertTrackSecond( srcAsset: AVAsset, dstComp: AVMutableComposition, mediaType: AVMediaType, start: CMTime, at: CMTime) throws {
    let srcTrack: AVAssetTrack = srcAsset.tracks(withMediaType: mediaType).first!

    // get a compatible destination track or, if not available, create a new one
    let dstTrack: AVMutableCompositionTrack = dstComp.mutableTrack(compatibleWith: srcTrack) ?? dstComp.addMutableTrack(withMediaType: mediaType, preferredTrackID: kCMPersistentTrackID_Invalid)!

    // can't insert into "void" time beyond the current end of track. Instead, note current end time, append there, and *after* appending, insert empty range
    var dstTrackEnd: CMTime = CMTimeRangeGetEnd( dstTrack.timeRange)
    if CMTIME_IS_INVALID( dstTrackEnd) {
        dstTrackEnd = kCMTimeZero
    }

    try dstTrack.insertTimeRange( CMTimeRangeMake( start, cmtime( 1.0)), of: srcTrack, at: dstTrackEnd)

    // now add empty range, if necessary
    if CMTimeCompare( dstTrackEnd, at) == -1 {
        dstTrack.insertEmptyTimeRange( CMTimeRangeFromTimeToTime( dstTrackEnd, at))
    }
}

func insertSecond( srcAsset: AVAsset, dstComp: AVMutableComposition, start: CMTime, at: CMTime) throws
{
    try insertTrackSecond(srcAsset: srcAsset, dstComp: dstComp, mediaType: .video, start: start, at: at)
    try insertTrackSecond(srcAsset: srcAsset, dstComp: dstComp, mediaType: .audio, start: start, at: at)
}

try insertSecond( srcAsset: source0, dstComp: comp, start: cmtime(3.0), at: cmtime(0.0))
try insertSecond( srcAsset: source1, dstComp: comp, start: cmtime(2.0), at: cmtime(1.0))
try insertSecond( srcAsset: source0, dstComp: comp, start: cmtime(100.0), at: cmtime(2.0))
try insertSecond( srcAsset: source1, dstComp: comp, start: cmtime(3.0), at: cmtime(3.0))
try insertSecond( srcAsset: source0, dstComp: comp, start: cmtime(350.0), at: cmtime(4.0))


if let sess = AVAssetExportSession(asset: comp, presetName: "AVAssetExportPresetHighestQuality") {
    sess.outputURL = URL(fileURLWithPath: "/tmp/output.mp4")
    sess.outputFileType = .mp4

    // this leaves smaller videotracks at the origin, in their "natural" size. Manipulate the "preferredTransform" property of the mutable composition tracks for nicer results
    sess.videoComposition = AVVideoComposition.init(propertiesOf: comp)

    // not assigning an audio mix results in an output with multiple audio tracks
    var inputParameters = [AVAudioMixInputParameters]()
    for audioTrack: AVAssetTrack in comp.tracks(withMediaType:.audio) {
        inputParameters.append( AVMutableAudioMixInputParameters.init(track:audioTrack))
    }
    let audioMix: AVMutableAudioMix = AVMutableAudioMix();
    audioMix.inputParameters = inputParameters;
    sess.audioMix = audioMix;

    let semaphore: DispatchSemaphore = DispatchSemaphore(value:0);

    sess.exportAsynchronously {
        print("done")
        print(sess.error ?? "success")
        semaphore.signal()
    }

    semaphore.wait()
}
Run Code Online (Sandbox Code Playgroud)

  • 令人沮丧的是,不,我不知道。在过去的十年里,我主要通过尝试和错误来学习这些东西,因为我必须这样做。当时的 QuickTime 文档中提到了“在空时间中不插入”,iirc。恕我直言,对于 AVFoundation,情况会更糟:过度设计的 API、不完整的文档、缺失的功能以及似乎从未引起任何关注的错误报告。不过,与任何其他框架相比,我更倾向于查看 AVFoundation 的 ObjC 头文件;那里的评论通常比 API 文档更准确(有时甚至矛盾)...... (2认同)