videotoolbox 硬解 H264 annexb

Annex-b

H264有AVCC跟 Annex-b两种格式,而苹果 videotoolbox 只能针对AVCC格式的nalu 做硬解码,而且解码像素格式必须是NV12, 也叫YUV420SP
一般的mp4 都是 avcc 的 h264编码,一般硬解没问题. 而Annex-b格式视频流就必须手动解析处理。

FFMpeg

ffmepg 中 提供了一个 mp4toannexb的filter 但是 反过来的就没有,其实ffmpeg 中就针对 Annex-b格式走videotoolbox硬解做了额外处理的
总体分了两块,一块是解析 extradata中的 vps sps pps 等nal,封装为AVCDecoderConfigurationRecord | HEVCDecoderConfigurationRecord格式的data数据
另一块就是转换packet中的 nalu数据格式,然后通过 videotoolbox api 进行解码操作。 详细直接看 libavcodec/videotoolbox.c源码

videotoolbox

这里主要是通过ffmepg完成解封装操作拿到 packet,手动解析packet 跟 extra 数据 完成videotoolbox的硬解码。
跟ffmpeg不一样的就是用到最新提供的api CMVideoFormatDescriptionCreateFromH264ParameterSets | CMVideoFormatDescriptionCreateFromHEVCParameterSets

还有一点需要注意,annexb格式的packet,每个packet中都有 sps 跟 pps nalu,解码的时候这块数据可以过滤掉

NALU

code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
class DecoderInfo {
var vps: UnsafeMutablePointer<UInt8> = UnsafeMutablePointer<UInt8>.allocate(capacity: 1)
var sps: UnsafeMutablePointer<UInt8> = UnsafeMutablePointer<UInt8>.allocate(capacity: 1)
var f_pps: UnsafeMutablePointer<UInt8> = UnsafeMutablePointer<UInt8>.allocate(capacity: 1)
var r_pps: UnsafeMutablePointer<UInt8> = UnsafeMutablePointer<UInt8>.allocate(capacity: 1)
var vps_size: Int32 = 0
var sps_size: Int32 = 0
var f_pps_size: Int32 = 0
var r_pps_size: Int32 = 0
}

func getNALUInfo(codecType:CMFormatDescription.MediaSubType, extraData: UnsafeMutablePointer<UInt8>,extraDataSize: Int32, decoderInfo: DecoderInfo) {

let decoderInfo = decoderInfo

let data = extraData
let size = extraDataSize;

var startCodeVPSIndex : Int32 = 0
//var startCodeVPSLen : Int32 = 0
var startCodeSPSIndex : Int32 = 0
var startCodeSPSLen : Int32 = 0
var startCodeFPPSIndex : Int32 = 0
var startCodeFPPSLen : Int32 = 0
var startCodeRPPSIndex : Int32 = 0
var startCodeRPPSLen : Int32 = 0
var nalu_type : Int = 0

for i in 0..<size {

if i >= 3 {
if data[Int(i)] == 0x01 && data[Int(i) - 1] == 0x00 && data[Int(i) - 2] == 0x00 && data[Int(i) - 3] == 0x00 {

if (codecType == .h264) {
if (startCodeSPSIndex == 0) {
startCodeSPSIndex = i;
startCodeSPSLen = 4;
continue
}
if (i > startCodeSPSIndex) {
startCodeFPPSIndex = i;
startCodeFPPSLen = 4;
continue
}
} else if (codecType == .hevc) {
if startCodeVPSIndex == 0 {
startCodeVPSIndex = i
//startCodeVPSLen = 4;
continue
}
if i > startCodeVPSIndex && startCodeSPSIndex == 0 {
startCodeSPSIndex = i
startCodeSPSLen = 4;
continue
}
if i > startCodeSPSIndex && startCodeFPPSIndex == 0 {
startCodeFPPSIndex = i
startCodeFPPSLen = 4;
continue
}
if i > startCodeFPPSIndex && startCodeRPPSIndex == 0 {
startCodeRPPSIndex = i
startCodeRPPSLen = 4;
continue
}
}
}
}

if i >= 2 {
if data[Int(i)] == 0x01 && data[Int(i) - 1] == 0x00 && data[Int(i) - 2] == 0x00 {

if (codecType == .h264) {
if (startCodeSPSIndex == 0) {
startCodeSPSIndex = i;
startCodeSPSLen = 3;
continue
}
if (i > startCodeSPSIndex) {
startCodeFPPSIndex = i;
startCodeFPPSLen = 3;
continue
}
} else if (codecType == .hevc) {
if startCodeVPSIndex == 0 {
startCodeVPSIndex = i
//startCodeVPSLen = 3;
continue
}
if i > startCodeVPSIndex && startCodeSPSIndex == 0 {
startCodeSPSIndex = i
startCodeSPSLen = 3;
continue
}
if i > startCodeSPSIndex && startCodeFPPSIndex == 0 {
startCodeFPPSIndex = i
startCodeFPPSLen = 3;
continue
}
if i > startCodeFPPSIndex && startCodeRPPSIndex == 0 {
startCodeRPPSIndex = i
startCodeRPPSLen = 3;
}
}
}
}
}

let spsSize = startCodeFPPSIndex - startCodeFPPSLen - startCodeSPSIndex
decoderInfo.sps_size = spsSize

if (codecType == .h264) {
let f_ppsSize = size - (startCodeFPPSIndex + 1)
decoderInfo.f_pps_size = f_ppsSize;

nalu_type = Int(data[Int(startCodeSPSIndex) + 1] & 0x1F)
if (nalu_type == 0x07) {
let sps =
withUnsafeMutablePointer(to: &data[Int(startCodeSPSIndex) + 1]) {$0}

decoderInfo.sps = UnsafeMutablePointer<UInt8>.allocate(capacity: Int(spsSize))
memcpy(decoderInfo.sps, sps, Int(spsSize))
}

nalu_type = Int(data[Int(startCodeFPPSIndex) + 1] & 0x1F)
if (nalu_type == 0x08) {
let pps =
withUnsafeMutablePointer(to: &data[Int(startCodeFPPSIndex) + 1]) {$0}

decoderInfo.f_pps = UnsafeMutablePointer<UInt8>.allocate(capacity: Int(f_ppsSize))
memcpy(decoderInfo.f_pps, pps, Int(spsSize))
}
} else {
let vpsSize = startCodeSPSIndex - startCodeSPSLen - startCodeVPSIndex
decoderInfo.vps_size = vpsSize

let f_ppsSize = (startCodeRPPSIndex != 0) ? (startCodeRPPSIndex - startCodeRPPSLen - startCodeFPPSIndex) : (size - (startCodeFPPSIndex + 1))
decoderInfo.f_pps_size = f_ppsSize

nalu_type = Int(data[Int(startCodeVPSIndex) + 1]) & 0x4F
if nalu_type == 0x40 {
let vps =
withUnsafeMutablePointer(to: &data[Int(startCodeVPSIndex) + 1]) {$0}

decoderInfo.vps = UnsafeMutablePointer<UInt8>.allocate(capacity: Int(vpsSize))
memcpy(decoderInfo.vps, vps, Int(vpsSize))
}

nalu_type = Int(data[Int(startCodeSPSIndex) + 1]) & 0x4F
if nalu_type == 0x42 {
let sps =
withUnsafeMutablePointer(to: &data[Int(startCodeSPSIndex) + 1]) {$0}

decoderInfo.sps = UnsafeMutablePointer<UInt8>.allocate(capacity: Int(spsSize))
memcpy(decoderInfo.sps, sps, Int(spsSize))
}

nalu_type = Int(data[Int(startCodeFPPSIndex) + 1]) & 0x4F
if nalu_type == 0x44 {

let fpps =
withUnsafeMutablePointer(to: &data[Int(startCodeFPPSIndex) + 1]) {$0}

decoderInfo.f_pps = UnsafeMutablePointer<UInt8>.allocate(capacity: Int(f_ppsSize))
memcpy(decoderInfo.f_pps, fpps, Int(f_ppsSize))
}

if startCodeRPPSIndex == 0 {
return
}

let r_ppsSize = size - (startCodeRPPSIndex + 1)
decoderInfo.r_pps_size = r_ppsSize

nalu_type = Int(data[Int(startCodeRPPSIndex) + 1]) & 0x4F
if nalu_type == 0x44 {
let rpps =
withUnsafeMutablePointer(to: &data[Int(startCodeRPPSIndex) + 1]) {$0}

decoderInfo.r_pps = UnsafeMutablePointer<UInt8>.allocate(capacity: Int(r_ppsSize))
memcpy(decoderInfo.r_pps, rpps, Int(r_ppsSize))
}
}
}

private var decoderInfo: DecoderInfo = DecoderInfo()

getNALUInfo(codecType:codecType,extraData: extradata, extraDataSize: extradataSize, decoderInfo: decoderInfo)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
var formatDescriptionOut: CMFormatDescription?

if codecType == .h264 {

let parameterSetPointers: [UnsafePointer<UInt8>] = [UnsafePointer<UInt8>(decoderInfo.sps), UnsafePointer<UInt8>(decoderInfo.f_pps)]
let parameterSetSizes: [Int] = [Int(decoderInfo.sps_size), Int(decoderInfo.f_pps_size)]
let status = CMVideoFormatDescriptionCreateFromH264ParameterSets(allocator: kCFAllocatorDefault,
parameterSetCount: 2,
parameterSetPointers: parameterSetPointers,
parameterSetSizes: parameterSetSizes,
nalUnitHeaderLength: 4,
formatDescriptionOut: &formatDescriptionOut)
} else {
if #available(iOS 11.0, *) {
let parameterSetPointers: [UnsafePointer<UInt8>] = [UnsafePointer<UInt8>(decoderInfo.vps), UnsafePointer<UInt8>(decoderInfo.sps), UnsafePointer<UInt8>(decoderInfo.f_pps)]
let parameterSetSizes: [Int] = [Int(decoderInfo.vps_size), Int(decoderInfo.sps_size), Int(decoderInfo.f_pps_size)]

let status = CMVideoFormatDescriptionCreateFromHEVCParameterSets(allocator: kCFAllocatorDefault,
parameterSetCount: 3,
parameterSetPointers: parameterSetPointers,
parameterSetSizes: parameterSetSizes,
nalUnitHeaderLength: 4,
extensions: nil,
formatDescriptionOut: &formatDescriptionOut)
} else {
let status = -1
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
var ioContext: UnsafeMutablePointer<AVIOContext>?
let status = avio_open_dyn_buf(&ioContext)
if status == 0 {

var nalStart = data
var i = 0
var start = 0

while i < size {

if i+2 < size {
if data[i] == 0x00 && data[i+1] == 0x00 && data[i+2] == 0x01 {
// 过滤掉 sps pps
if i+3 < size, (data[i+3] & 0x1f == 7 || data[i+3] & 0x1f == 8) {
i += 3
continue
}

if start == 0 {
start = i + 3
nalStart += i + 3
} else {
let len = i - start
avio_wb32(ioContext, UInt32(len))

avio_write(ioContext, nalStart, Int32(len))

start = i + 3

nalStart += len + 3
}
//
i += 3
continue
}
}

if i+3 < size {
if data[i] == 0x00 && data[i+1] == 0x00 && data[i+2] == 0x00 && data[i+3] == 0x01 {
// 过滤掉 sps pps
if i+4 < size, (data[i+4] & 0x1f == 7 || data[i+4] & 0x1f == 8) {
i += 4
continue
}

if start == 0 {
start = i + 4
nalStart += i + 4
} else {
let len = i - start
avio_wb32(ioContext, UInt32(len))

avio_write(ioContext, nalStart, Int32(len))

start = i + 4

nalStart += len + 4
}
//
i += 4
continue
}
}

i += 1
}

let len = size - start
avio_wb32(ioContext, UInt32(len))

avio_write(ioContext, nalStart, Int32(len))

var demuxBuffer: UnsafeMutablePointer<UInt8>?
let demuxSze = avio_close_dyn_buf(ioContext, &demuxBuffer)
return try createSampleBuffer(data: demuxBuffer, size: Int(demuxSze))
}

private func createSampleBuffer(data: UnsafeMutablePointer<UInt8>?, size: Int) throws -> CMSampleBuffer {
var blockBuffer: CMBlockBuffer?
var sampleBuffer: CMSampleBuffer?
// swiftlint:disable line_length
var status = CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault, memoryBlock: data, blockLength: size, blockAllocator: kCFAllocatorNull, customBlockSource: nil, offsetToData: 0, dataLength: size, flags: 0, blockBufferOut: &blockBuffer)
if status == noErr {
status = CMSampleBufferCreate(allocator: kCFAllocatorDefault, dataBuffer: blockBuffer, dataReady: true, makeDataReadyCallback: nil, refcon: nil, formatDescription: self, sampleCount: 1, sampleTimingEntryCount: 0, sampleTimingArray: nil, sampleSizeEntryCount: 0, sampleSizeArray: nil, sampleBufferOut: &sampleBuffer)
if let sampleBuffer {
return sampleBuffer
}
}
throw NSError(errorCode: .codecVideoReceiveFrame, avErrorCode: status)
// swiftlint:enable line_length
}

reference

extradata
Video Decoder
Swift-pointer