yading@10
|
1 =============================================
|
yading@10
|
2 Snow Video Codec Specification Draft 20080110
|
yading@10
|
3 =============================================
|
yading@10
|
4
|
yading@10
|
5 Introduction:
|
yading@10
|
6 =============
|
yading@10
|
7 This specification describes the Snow bitstream syntax and semantics as
|
yading@10
|
8 well as the formal Snow decoding process.
|
yading@10
|
9
|
yading@10
|
10 The decoding process is described precisely and any compliant decoder
|
yading@10
|
11 MUST produce the exact same output for a spec-conformant Snow stream.
|
yading@10
|
12 For encoding, though, any process which generates a stream compliant to
|
yading@10
|
13 the syntactical and semantic requirements and which is decodable by
|
yading@10
|
14 the process described in this spec shall be considered a conformant
|
yading@10
|
15 Snow encoder.
|
yading@10
|
16
|
yading@10
|
17 Definitions:
|
yading@10
|
18 ============
|
yading@10
|
19
|
yading@10
|
20 MUST the specific part must be done to conform to this standard
|
yading@10
|
21 SHOULD it is recommended to be done that way, but not strictly required
|
yading@10
|
22
|
yading@10
|
23 ilog2(x) is the rounded down logarithm of x with basis 2
|
yading@10
|
24 ilog2(0) = 0
|
yading@10
|
25
|
yading@10
|
26 Type definitions:
|
yading@10
|
27 =================
|
yading@10
|
28
|
yading@10
|
29 b 1-bit range coded
|
yading@10
|
30 u unsigned scalar value range coded
|
yading@10
|
31 s signed scalar value range coded
|
yading@10
|
32
|
yading@10
|
33
|
yading@10
|
34 Bitstream syntax:
|
yading@10
|
35 =================
|
yading@10
|
36
|
yading@10
|
37 frame:
|
yading@10
|
38 header
|
yading@10
|
39 prediction
|
yading@10
|
40 residual
|
yading@10
|
41
|
yading@10
|
42 header:
|
yading@10
|
43 keyframe b MID_STATE
|
yading@10
|
44 if(keyframe || always_reset)
|
yading@10
|
45 reset_contexts
|
yading@10
|
46 if(keyframe){
|
yading@10
|
47 version u header_state
|
yading@10
|
48 always_reset b header_state
|
yading@10
|
49 temporal_decomposition_type u header_state
|
yading@10
|
50 temporal_decomposition_count u header_state
|
yading@10
|
51 spatial_decomposition_count u header_state
|
yading@10
|
52 colorspace_type u header_state
|
yading@10
|
53 chroma_h_shift u header_state
|
yading@10
|
54 chroma_v_shift u header_state
|
yading@10
|
55 spatial_scalability b header_state
|
yading@10
|
56 max_ref_frames-1 u header_state
|
yading@10
|
57 qlogs
|
yading@10
|
58 }
|
yading@10
|
59 if(!keyframe){
|
yading@10
|
60 update_mc b header_state
|
yading@10
|
61 if(update_mc){
|
yading@10
|
62 for(plane=0; plane<2; plane++){
|
yading@10
|
63 diag_mc b header_state
|
yading@10
|
64 htaps/2-1 u header_state
|
yading@10
|
65 for(i= p->htaps/2; i; i--)
|
yading@10
|
66 |hcoeff[i]| u header_state
|
yading@10
|
67 }
|
yading@10
|
68 }
|
yading@10
|
69 update_qlogs b header_state
|
yading@10
|
70 if(update_qlogs){
|
yading@10
|
71 spatial_decomposition_count u header_state
|
yading@10
|
72 qlogs
|
yading@10
|
73 }
|
yading@10
|
74 }
|
yading@10
|
75
|
yading@10
|
76 spatial_decomposition_type s header_state
|
yading@10
|
77 qlog s header_state
|
yading@10
|
78 mv_scale s header_state
|
yading@10
|
79 qbias s header_state
|
yading@10
|
80 block_max_depth s header_state
|
yading@10
|
81
|
yading@10
|
82 qlogs:
|
yading@10
|
83 for(plane=0; plane<2; plane++){
|
yading@10
|
84 quant_table[plane][0][0] s header_state
|
yading@10
|
85 for(level=0; level < spatial_decomposition_count; level++){
|
yading@10
|
86 quant_table[plane][level][1]s header_state
|
yading@10
|
87 quant_table[plane][level][3]s header_state
|
yading@10
|
88 }
|
yading@10
|
89 }
|
yading@10
|
90
|
yading@10
|
91 reset_contexts
|
yading@10
|
92 *_state[*]= MID_STATE
|
yading@10
|
93
|
yading@10
|
94 prediction:
|
yading@10
|
95 for(y=0; y<block_count_vertical; y++)
|
yading@10
|
96 for(x=0; x<block_count_horizontal; x++)
|
yading@10
|
97 block(0)
|
yading@10
|
98
|
yading@10
|
99 block(level):
|
yading@10
|
100 mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0
|
yading@10
|
101 if(keyframe){
|
yading@10
|
102 intra=1
|
yading@10
|
103 }else{
|
yading@10
|
104 if(level!=max_block_depth){
|
yading@10
|
105 s_context= 2*left->level + 2*top->level + topleft->level + topright->level
|
yading@10
|
106 leaf b block_state[4 + s_context]
|
yading@10
|
107 }
|
yading@10
|
108 if(level==max_block_depth || leaf){
|
yading@10
|
109 intra b block_state[1 + left->intra + top->intra]
|
yading@10
|
110 if(intra){
|
yading@10
|
111 y_diff s block_state[32]
|
yading@10
|
112 cb_diff s block_state[64]
|
yading@10
|
113 cr_diff s block_state[96]
|
yading@10
|
114 }else{
|
yading@10
|
115 ref_context= ilog2(2*left->ref) + ilog2(2*top->ref)
|
yading@10
|
116 if(ref_frames > 1)
|
yading@10
|
117 ref u block_state[128 + 1024 + 32*ref_context]
|
yading@10
|
118 mx_context= ilog2(2*abs(left->mx - top->mx))
|
yading@10
|
119 my_context= ilog2(2*abs(left->my - top->my))
|
yading@10
|
120 mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)]
|
yading@10
|
121 mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)]
|
yading@10
|
122 }
|
yading@10
|
123 }else{
|
yading@10
|
124 block(level+1)
|
yading@10
|
125 block(level+1)
|
yading@10
|
126 block(level+1)
|
yading@10
|
127 block(level+1)
|
yading@10
|
128 }
|
yading@10
|
129 }
|
yading@10
|
130
|
yading@10
|
131
|
yading@10
|
132 residual:
|
yading@10
|
133 residual2(luma)
|
yading@10
|
134 residual2(chroma_cr)
|
yading@10
|
135 residual2(chroma_cb)
|
yading@10
|
136
|
yading@10
|
137 residual2:
|
yading@10
|
138 for(level=0; level<spatial_decomposition_count; level++){
|
yading@10
|
139 if(level==0)
|
yading@10
|
140 subband(LL, 0)
|
yading@10
|
141 subband(HL, level)
|
yading@10
|
142 subband(LH, level)
|
yading@10
|
143 subband(HH, level)
|
yading@10
|
144 }
|
yading@10
|
145
|
yading@10
|
146 subband:
|
yading@10
|
147 FIXME
|
yading@10
|
148
|
yading@10
|
149
|
yading@10
|
150
|
yading@10
|
151 Tag description:
|
yading@10
|
152 ----------------
|
yading@10
|
153
|
yading@10
|
154 version
|
yading@10
|
155 0
|
yading@10
|
156 this MUST NOT change within a bitstream
|
yading@10
|
157
|
yading@10
|
158 always_reset
|
yading@10
|
159 if 1 then the range coder contexts will be reset after each frame
|
yading@10
|
160
|
yading@10
|
161 temporal_decomposition_type
|
yading@10
|
162 0
|
yading@10
|
163
|
yading@10
|
164 temporal_decomposition_count
|
yading@10
|
165 0
|
yading@10
|
166
|
yading@10
|
167 spatial_decomposition_count
|
yading@10
|
168 FIXME
|
yading@10
|
169
|
yading@10
|
170 colorspace_type
|
yading@10
|
171 0
|
yading@10
|
172 this MUST NOT change within a bitstream
|
yading@10
|
173
|
yading@10
|
174 chroma_h_shift
|
yading@10
|
175 log2(luma.width / chroma.width)
|
yading@10
|
176 this MUST NOT change within a bitstream
|
yading@10
|
177
|
yading@10
|
178 chroma_v_shift
|
yading@10
|
179 log2(luma.height / chroma.height)
|
yading@10
|
180 this MUST NOT change within a bitstream
|
yading@10
|
181
|
yading@10
|
182 spatial_scalability
|
yading@10
|
183 0
|
yading@10
|
184
|
yading@10
|
185 max_ref_frames
|
yading@10
|
186 maximum number of reference frames
|
yading@10
|
187 this MUST NOT change within a bitstream
|
yading@10
|
188
|
yading@10
|
189 update_mc
|
yading@10
|
190 indicates that motion compensation filter parameters are stored in the
|
yading@10
|
191 header
|
yading@10
|
192
|
yading@10
|
193 diag_mc
|
yading@10
|
194 flag to enable faster diagonal interpolation
|
yading@10
|
195 this SHOULD be 1 unless it turns out to be covered by a valid patent
|
yading@10
|
196
|
yading@10
|
197 htaps
|
yading@10
|
198 number of half pel interpolation filter taps, MUST be even, >0 and <10
|
yading@10
|
199
|
yading@10
|
200 hcoeff
|
yading@10
|
201 half pel interpolation filter coefficients, hcoeff[0] are the 2 middle
|
yading@10
|
202 coefficients [1] are the next outer ones and so on, resulting in a filter
|
yading@10
|
203 like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ...
|
yading@10
|
204 the sign of the coefficients is not explicitly stored but alternates
|
yading@10
|
205 after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,...
|
yading@10
|
206 hcoeff[0] is not explicitly stored but found by subtracting the sum
|
yading@10
|
207 of all stored coefficients with signs from 32
|
yading@10
|
208 hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ...
|
yading@10
|
209 a good choice for hcoeff and htaps is
|
yading@10
|
210 htaps= 6
|
yading@10
|
211 hcoeff={40,-10,2}
|
yading@10
|
212 an alternative which requires more computations at both encoder and
|
yading@10
|
213 decoder side and may or may not be better is
|
yading@10
|
214 htaps= 8
|
yading@10
|
215 hcoeff={42,-14,6,-2}
|
yading@10
|
216
|
yading@10
|
217
|
yading@10
|
218 ref_frames
|
yading@10
|
219 minimum of the number of available reference frames and max_ref_frames
|
yading@10
|
220 for example the first frame after a key frame always has ref_frames=1
|
yading@10
|
221
|
yading@10
|
222 spatial_decomposition_type
|
yading@10
|
223 wavelet type
|
yading@10
|
224 0 is a 9/7 symmetric compact integer wavelet
|
yading@10
|
225 1 is a 5/3 symmetric compact integer wavelet
|
yading@10
|
226 others are reserved
|
yading@10
|
227 stored as delta from last, last is reset to 0 if always_reset || keyframe
|
yading@10
|
228
|
yading@10
|
229 qlog
|
yading@10
|
230 quality (logarthmic quantizer scale)
|
yading@10
|
231 stored as delta from last, last is reset to 0 if always_reset || keyframe
|
yading@10
|
232
|
yading@10
|
233 mv_scale
|
yading@10
|
234 stored as delta from last, last is reset to 0 if always_reset || keyframe
|
yading@10
|
235 FIXME check that everything works fine if this changes between frames
|
yading@10
|
236
|
yading@10
|
237 qbias
|
yading@10
|
238 dequantization bias
|
yading@10
|
239 stored as delta from last, last is reset to 0 if always_reset || keyframe
|
yading@10
|
240
|
yading@10
|
241 block_max_depth
|
yading@10
|
242 maximum depth of the block tree
|
yading@10
|
243 stored as delta from last, last is reset to 0 if always_reset || keyframe
|
yading@10
|
244
|
yading@10
|
245 quant_table
|
yading@10
|
246 quantiztation table
|
yading@10
|
247
|
yading@10
|
248
|
yading@10
|
249 Highlevel bitstream structure:
|
yading@10
|
250 =============================
|
yading@10
|
251 --------------------------------------------
|
yading@10
|
252 | Header |
|
yading@10
|
253 --------------------------------------------
|
yading@10
|
254 | ------------------------------------ |
|
yading@10
|
255 | | Block0 | |
|
yading@10
|
256 | | split? | |
|
yading@10
|
257 | | yes no | |
|
yading@10
|
258 | | ......... intra? | |
|
yading@10
|
259 | | : Block01 : yes no | |
|
yading@10
|
260 | | : Block02 : ....... .......... | |
|
yading@10
|
261 | | : Block03 : : y DC : : ref index: | |
|
yading@10
|
262 | | : Block04 : : cb DC : : motion x : | |
|
yading@10
|
263 | | ......... : cr DC : : motion y : | |
|
yading@10
|
264 | | ....... .......... | |
|
yading@10
|
265 | ------------------------------------ |
|
yading@10
|
266 | ------------------------------------ |
|
yading@10
|
267 | | Block1 | |
|
yading@10
|
268 | ... |
|
yading@10
|
269 --------------------------------------------
|
yading@10
|
270 | ------------ ------------ ------------ |
|
yading@10
|
271 || Y subbands | | Cb subbands| | Cr subbands||
|
yading@10
|
272 || --- --- | | --- --- | | --- --- ||
|
yading@10
|
273 || |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| ||
|
yading@10
|
274 || --- --- | | --- --- | | --- --- ||
|
yading@10
|
275 || --- --- | | --- --- | | --- --- ||
|
yading@10
|
276 || |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| ||
|
yading@10
|
277 || --- --- | | --- --- | | --- --- ||
|
yading@10
|
278 || --- --- | | --- --- | | --- --- ||
|
yading@10
|
279 || |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| ||
|
yading@10
|
280 || --- --- | | --- --- | | --- --- ||
|
yading@10
|
281 || --- --- | | --- --- | | --- --- ||
|
yading@10
|
282 || |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| ||
|
yading@10
|
283 || ... | | ... | | ... ||
|
yading@10
|
284 | ------------ ------------ ------------ |
|
yading@10
|
285 --------------------------------------------
|
yading@10
|
286
|
yading@10
|
287 Decoding process:
|
yading@10
|
288 =================
|
yading@10
|
289
|
yading@10
|
290 ------------
|
yading@10
|
291 | |
|
yading@10
|
292 | Subbands |
|
yading@10
|
293 ------------ | |
|
yading@10
|
294 | | ------------
|
yading@10
|
295 | Intra DC | |
|
yading@10
|
296 | | LL0 subband prediction
|
yading@10
|
297 ------------ |
|
yading@10
|
298 \ Dequantizaton
|
yading@10
|
299 ------------------- \ |
|
yading@10
|
300 | Reference frames | \ IDWT
|
yading@10
|
301 | ------- ------- | Motion \ |
|
yading@10
|
302 ||Frame 0| |Frame 1|| Compensation . OBMC v -------
|
yading@10
|
303 | ------- ------- | --------------. \------> + --->|Frame n|-->output
|
yading@10
|
304 | ------- ------- | -------
|
yading@10
|
305 ||Frame 2| |Frame 3||<----------------------------------/
|
yading@10
|
306 | ... |
|
yading@10
|
307 -------------------
|
yading@10
|
308
|
yading@10
|
309
|
yading@10
|
310 Range Coder:
|
yading@10
|
311 ============
|
yading@10
|
312
|
yading@10
|
313 Binary Range Coder:
|
yading@10
|
314 -------------------
|
yading@10
|
315 The implemented range coder is an adapted version based upon "Range encoding:
|
yading@10
|
316 an algorithm for removing redundancy from a digitised message." by G. N. N.
|
yading@10
|
317 Martin.
|
yading@10
|
318 The symbols encoded by the Snow range coder are bits (0|1). The
|
yading@10
|
319 associated probabilities are not fix but change depending on the symbol mix
|
yading@10
|
320 seen so far.
|
yading@10
|
321
|
yading@10
|
322
|
yading@10
|
323 bit seen | new state
|
yading@10
|
324 ---------+-----------------------------------------------
|
yading@10
|
325 0 | 256 - state_transition_table[256 - old_state];
|
yading@10
|
326 1 | state_transition_table[ old_state];
|
yading@10
|
327
|
yading@10
|
328 state_transition_table = {
|
yading@10
|
329 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27,
|
yading@10
|
330 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42,
|
yading@10
|
331 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57,
|
yading@10
|
332 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
|
yading@10
|
333 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
|
yading@10
|
334 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
|
yading@10
|
335 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118,
|
yading@10
|
336 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133,
|
yading@10
|
337 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
yading@10
|
338 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
yading@10
|
339 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
yading@10
|
340 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194,
|
yading@10
|
341 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209,
|
yading@10
|
342 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225,
|
yading@10
|
343 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240,
|
yading@10
|
344 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};
|
yading@10
|
345
|
yading@10
|
346 FIXME
|
yading@10
|
347
|
yading@10
|
348
|
yading@10
|
349 Range Coding of integers:
|
yading@10
|
350 -------------------------
|
yading@10
|
351 FIXME
|
yading@10
|
352
|
yading@10
|
353
|
yading@10
|
354 Neighboring Blocks:
|
yading@10
|
355 ===================
|
yading@10
|
356 left and top are set to the respective blocks unless they are outside of
|
yading@10
|
357 the image in which case they are set to the Null block
|
yading@10
|
358
|
yading@10
|
359 top-left is set to the top left block unless it is outside of the image in
|
yading@10
|
360 which case it is set to the left block
|
yading@10
|
361
|
yading@10
|
362 if this block has no larger parent block or it is at the left side of its
|
yading@10
|
363 parent block and the top right block is not outside of the image then the
|
yading@10
|
364 top right block is used for top-right else the top-left block is used
|
yading@10
|
365
|
yading@10
|
366 Null block
|
yading@10
|
367 y,cb,cr are 128
|
yading@10
|
368 level, ref, mx and my are 0
|
yading@10
|
369
|
yading@10
|
370
|
yading@10
|
371 Motion Vector Prediction:
|
yading@10
|
372 =========================
|
yading@10
|
373 1. the motion vectors of all the neighboring blocks are scaled to
|
yading@10
|
374 compensate for the difference of reference frames
|
yading@10
|
375
|
yading@10
|
376 scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8
|
yading@10
|
377
|
yading@10
|
378 2. the median of the scaled left, top and top-right vectors is used as
|
yading@10
|
379 motion vector prediction
|
yading@10
|
380
|
yading@10
|
381 3. the used motion vector is the sum of the predictor and
|
yading@10
|
382 (mvx_diff, mvy_diff)*mv_scale
|
yading@10
|
383
|
yading@10
|
384
|
yading@10
|
385 Intra DC Predicton:
|
yading@10
|
386 ======================
|
yading@10
|
387 the luma and chroma values of the left block are used as predictors
|
yading@10
|
388
|
yading@10
|
389 the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
|
yading@10
|
390 to reverse this in the decoder apply the following:
|
yading@10
|
391 block[y][x].dc[0] = block[y][x-1].dc[0] + y_diff;
|
yading@10
|
392 block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff;
|
yading@10
|
393 block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff;
|
yading@10
|
394 block[*][-1].dc[*]= 128;
|
yading@10
|
395
|
yading@10
|
396
|
yading@10
|
397 Motion Compensation:
|
yading@10
|
398 ====================
|
yading@10
|
399
|
yading@10
|
400 Halfpel interpolation:
|
yading@10
|
401 ----------------------
|
yading@10
|
402 halfpel interpolation is done by convolution with the halfpel filter stored
|
yading@10
|
403 in the header:
|
yading@10
|
404
|
yading@10
|
405 horizontal halfpel samples are found by
|
yading@10
|
406 H1[y][x] = hcoeff[0]*(F[y][x ] + F[y][x+1])
|
yading@10
|
407 + hcoeff[1]*(F[y][x-1] + F[y][x+2])
|
yading@10
|
408 + hcoeff[2]*(F[y][x-2] + F[y][x+3])
|
yading@10
|
409 + ...
|
yading@10
|
410 h1[y][x] = (H1[y][x] + 32)>>6;
|
yading@10
|
411
|
yading@10
|
412 vertical halfpel samples are found by
|
yading@10
|
413 H2[y][x] = hcoeff[0]*(F[y ][x] + F[y+1][x])
|
yading@10
|
414 + hcoeff[1]*(F[y-1][x] + F[y+2][x])
|
yading@10
|
415 + ...
|
yading@10
|
416 h2[y][x] = (H2[y][x] + 32)>>6;
|
yading@10
|
417
|
yading@10
|
418 vertical+horizontal halfpel samples are found by
|
yading@10
|
419 H3[y][x] = hcoeff[0]*(H2[y][x ] + H2[y][x+1])
|
yading@10
|
420 + hcoeff[1]*(H2[y][x-1] + H2[y][x+2])
|
yading@10
|
421 + ...
|
yading@10
|
422 H3[y][x] = hcoeff[0]*(H1[y ][x] + H1[y+1][x])
|
yading@10
|
423 + hcoeff[1]*(H1[y+1][x] + H1[y+2][x])
|
yading@10
|
424 + ...
|
yading@10
|
425 h3[y][x] = (H3[y][x] + 2048)>>12;
|
yading@10
|
426
|
yading@10
|
427
|
yading@10
|
428 F H1 F
|
yading@10
|
429 | | |
|
yading@10
|
430 | | |
|
yading@10
|
431 | | |
|
yading@10
|
432 F H1 F
|
yading@10
|
433 | | |
|
yading@10
|
434 | | |
|
yading@10
|
435 | | |
|
yading@10
|
436 F-------F-------F-> H1<-F-------F-------F
|
yading@10
|
437 v v v
|
yading@10
|
438 H2 H3 H2
|
yading@10
|
439 ^ ^ ^
|
yading@10
|
440 F-------F-------F-> H1<-F-------F-------F
|
yading@10
|
441 | | |
|
yading@10
|
442 | | |
|
yading@10
|
443 | | |
|
yading@10
|
444 F H1 F
|
yading@10
|
445 | | |
|
yading@10
|
446 | | |
|
yading@10
|
447 | | |
|
yading@10
|
448 F H1 F
|
yading@10
|
449
|
yading@10
|
450
|
yading@10
|
451 unavailable fullpel samples (outside the picture for example) shall be equal
|
yading@10
|
452 to the closest available fullpel sample
|
yading@10
|
453
|
yading@10
|
454
|
yading@10
|
455 Smaller pel interpolation:
|
yading@10
|
456 --------------------------
|
yading@10
|
457 if diag_mc is set then points which lie on a line between 2 vertically,
|
yading@10
|
458 horiziontally or diagonally adjacent halfpel points shall be interpolated
|
yading@10
|
459 linearls with rounding to nearest and halfway values rounded up.
|
yading@10
|
460 points which lie on 2 diagonals at the same time should only use the one
|
yading@10
|
461 diagonal not containing the fullpel point
|
yading@10
|
462
|
yading@10
|
463
|
yading@10
|
464
|
yading@10
|
465 F-->O---q---O<--h1->O---q---O<--F
|
yading@10
|
466 v \ / v \ / v
|
yading@10
|
467 O O O O O O O
|
yading@10
|
468 | / | \ |
|
yading@10
|
469 q q q q q
|
yading@10
|
470 | / | \ |
|
yading@10
|
471 O O O O O O O
|
yading@10
|
472 ^ / \ ^ / \ ^
|
yading@10
|
473 h2-->O---q---O<--h3->O---q---O<--h2
|
yading@10
|
474 v \ / v \ / v
|
yading@10
|
475 O O O O O O O
|
yading@10
|
476 | \ | / |
|
yading@10
|
477 q q q q q
|
yading@10
|
478 | \ | / |
|
yading@10
|
479 O O O O O O O
|
yading@10
|
480 ^ / \ ^ / \ ^
|
yading@10
|
481 F-->O---q---O<--h1->O---q---O<--F
|
yading@10
|
482
|
yading@10
|
483
|
yading@10
|
484
|
yading@10
|
485 the remaining points shall be bilinearly interpolated from the
|
yading@10
|
486 up to 4 surrounding halfpel and fullpel points, again rounding should be to
|
yading@10
|
487 nearest and halfway values rounded up
|
yading@10
|
488
|
yading@10
|
489 compliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma
|
yading@10
|
490 interpolation at least
|
yading@10
|
491
|
yading@10
|
492
|
yading@10
|
493 Overlapped block motion compensation:
|
yading@10
|
494 -------------------------------------
|
yading@10
|
495 FIXME
|
yading@10
|
496
|
yading@10
|
497 LL band prediction:
|
yading@10
|
498 ===================
|
yading@10
|
499 Each sample in the LL0 subband is predicted by the median of the left, top and
|
yading@10
|
500 left+top-topleft samples, samples outside the subband shall be considered to
|
yading@10
|
501 be 0. To reverse this prediction in the decoder apply the following.
|
yading@10
|
502 for(y=0; y<height; y++){
|
yading@10
|
503 for(x=0; x<width; x++){
|
yading@10
|
504 sample[y][x] += median(sample[y-1][x],
|
yading@10
|
505 sample[y][x-1],
|
yading@10
|
506 sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]);
|
yading@10
|
507 }
|
yading@10
|
508 }
|
yading@10
|
509 sample[-1][*]=sample[*][-1]= 0;
|
yading@10
|
510 width,height here are the width and height of the LL0 subband not of the final
|
yading@10
|
511 video
|
yading@10
|
512
|
yading@10
|
513
|
yading@10
|
514 Dequantizaton:
|
yading@10
|
515 ==============
|
yading@10
|
516 FIXME
|
yading@10
|
517
|
yading@10
|
518 Wavelet Transform:
|
yading@10
|
519 ==================
|
yading@10
|
520
|
yading@10
|
521 Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer
|
yading@10
|
522 transform and a integer approximation of the symmetric biorthogonal 9/7
|
yading@10
|
523 daubechies wavelet.
|
yading@10
|
524
|
yading@10
|
525 2D IDWT (inverse discrete wavelet transform)
|
yading@10
|
526 --------------------------------------------
|
yading@10
|
527 The 2D IDWT applies a 2D filter recursively, each time combining the
|
yading@10
|
528 4 lowest frequency subbands into a single subband until only 1 subband
|
yading@10
|
529 remains.
|
yading@10
|
530 The 2D filter is done by first applying a 1D filter in the vertical direction
|
yading@10
|
531 and then applying it in the horizontal one.
|
yading@10
|
532 --------------- --------------- --------------- ---------------
|
yading@10
|
533 |LL0|HL0| | | | | | | | | | | |
|
yading@10
|
534 |---+---| HL1 | | L0|H0 | HL1 | | LL1 | HL1 | | | |
|
yading@10
|
535 |LH0|HH0| | | | | | | | | | | |
|
yading@10
|
536 |-------+-------|->|-------+-------|->|-------+-------|->| L1 | H1 |->...
|
yading@10
|
537 | | | | | | | | | | | |
|
yading@10
|
538 | LH1 | HH1 | | LH1 | HH1 | | LH1 | HH1 | | | |
|
yading@10
|
539 | | | | | | | | | | | |
|
yading@10
|
540 --------------- --------------- --------------- ---------------
|
yading@10
|
541
|
yading@10
|
542
|
yading@10
|
543 1D Filter:
|
yading@10
|
544 ----------
|
yading@10
|
545 1. interleave the samples of the low and high frequency subbands like
|
yading@10
|
546 s={L0, H0, L1, H1, L2, H2, L3, H3, ... }
|
yading@10
|
547 note, this can end with a L or a H, the number of elements shall be w
|
yading@10
|
548 s[-1] shall be considered equivalent to s[1 ]
|
yading@10
|
549 s[w ] shall be considered equivalent to s[w-2]
|
yading@10
|
550
|
yading@10
|
551 2. perform the lifting steps in order as described below
|
yading@10
|
552
|
yading@10
|
553 5/3 Integer filter:
|
yading@10
|
554 1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w
|
yading@10
|
555 2. s[i] += (s[i-1] + s[i+1] )>>1; for all odd i < w
|
yading@10
|
556
|
yading@10
|
557 \ | /|\ | /|\ | /|\ | /|\
|
yading@10
|
558 \|/ | \|/ | \|/ | \|/ |
|
yading@10
|
559 + | + | + | + | -1/4
|
yading@10
|
560 /|\ | /|\ | /|\ | /|\ |
|
yading@10
|
561 / | \|/ | \|/ | \|/ | \|/
|
yading@10
|
562 | + | + | + | + +1/2
|
yading@10
|
563
|
yading@10
|
564
|
yading@10
|
565 Snow's 9/7 Integer filter:
|
yading@10
|
566 1. s[i] -= (3*(s[i-1] + s[i+1]) + 4)>>3; for all even i < w
|
yading@10
|
567 2. s[i] -= s[i-1] + s[i+1] ; for all odd i < w
|
yading@10
|
568 3. s[i] += ( s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w
|
yading@10
|
569 4. s[i] += (3*(s[i-1] + s[i+1]) )>>1; for all odd i < w
|
yading@10
|
570
|
yading@10
|
571 \ | /|\ | /|\ | /|\ | /|\
|
yading@10
|
572 \|/ | \|/ | \|/ | \|/ |
|
yading@10
|
573 + | + | + | + | -3/8
|
yading@10
|
574 /|\ | /|\ | /|\ | /|\ |
|
yading@10
|
575 / | \|/ | \|/ | \|/ | \|/
|
yading@10
|
576 (| + (| + (| + (| + -1
|
yading@10
|
577 \ + /|\ + /|\ + /|\ + /|\ +1/4
|
yading@10
|
578 \|/ | \|/ | \|/ | \|/ |
|
yading@10
|
579 + | + | + | + | +1/16
|
yading@10
|
580 /|\ | /|\ | /|\ | /|\ |
|
yading@10
|
581 / | \|/ | \|/ | \|/ | \|/
|
yading@10
|
582 | + | + | + | + +3/2
|
yading@10
|
583
|
yading@10
|
584 optimization tips:
|
yading@10
|
585 following are exactly identical
|
yading@10
|
586 (3a)>>1 == a + (a>>1)
|
yading@10
|
587 (a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2
|
yading@10
|
588
|
yading@10
|
589 16bit implementation note:
|
yading@10
|
590 The IDWT can be implemented with 16bits, but this requires some care to
|
yading@10
|
591 prevent overflows, the following list, lists the minimum number of bits needed
|
yading@10
|
592 for some terms
|
yading@10
|
593 1. lifting step
|
yading@10
|
594 A= s[i-1] + s[i+1] 16bit
|
yading@10
|
595 3*A + 4 18bit
|
yading@10
|
596 A + (A>>1) + 2 17bit
|
yading@10
|
597
|
yading@10
|
598 3. lifting step
|
yading@10
|
599 s[i-1] + s[i+1] 17bit
|
yading@10
|
600
|
yading@10
|
601 4. lifiting step
|
yading@10
|
602 3*(s[i-1] + s[i+1]) 17bit
|
yading@10
|
603
|
yading@10
|
604
|
yading@10
|
605 TODO:
|
yading@10
|
606 =====
|
yading@10
|
607 Important:
|
yading@10
|
608 finetune initial contexts
|
yading@10
|
609 flip wavelet?
|
yading@10
|
610 try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients
|
yading@10
|
611 try the MV length as context for coding the residual coefficients
|
yading@10
|
612 use extradata for stuff which is in the keyframes now?
|
yading@10
|
613 the MV median predictor is patented IIRC
|
yading@10
|
614 implement per picture halfpel interpolation
|
yading@10
|
615 try different range coder state transition tables for different contexts
|
yading@10
|
616
|
yading@10
|
617 Not Important:
|
yading@10
|
618 compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality)
|
yading@10
|
619 spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later)
|
yading@10
|
620
|
yading@10
|
621
|
yading@10
|
622 Credits:
|
yading@10
|
623 ========
|
yading@10
|
624 Michael Niedermayer
|
yading@10
|
625 Loren Merritt
|
yading@10
|
626
|
yading@10
|
627
|
yading@10
|
628 Copyright:
|
yading@10
|
629 ==========
|
yading@10
|
630 GPL + GFDL + whatever is needed to make this a RFC
|