annotate ffmpeg/doc/snow.txt @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 =============================================
yading@10 2 Snow Video Codec Specification Draft 20080110
yading@10 3 =============================================
yading@10 4
yading@10 5 Introduction:
yading@10 6 =============
yading@10 7 This specification describes the Snow bitstream syntax and semantics as
yading@10 8 well as the formal Snow decoding process.
yading@10 9
yading@10 10 The decoding process is described precisely and any compliant decoder
yading@10 11 MUST produce the exact same output for a spec-conformant Snow stream.
yading@10 12 For encoding, though, any process which generates a stream compliant to
yading@10 13 the syntactical and semantic requirements and which is decodable by
yading@10 14 the process described in this spec shall be considered a conformant
yading@10 15 Snow encoder.
yading@10 16
yading@10 17 Definitions:
yading@10 18 ============
yading@10 19
yading@10 20 MUST the specific part must be done to conform to this standard
yading@10 21 SHOULD it is recommended to be done that way, but not strictly required
yading@10 22
yading@10 23 ilog2(x) is the rounded down logarithm of x with basis 2
yading@10 24 ilog2(0) = 0
yading@10 25
yading@10 26 Type definitions:
yading@10 27 =================
yading@10 28
yading@10 29 b 1-bit range coded
yading@10 30 u unsigned scalar value range coded
yading@10 31 s signed scalar value range coded
yading@10 32
yading@10 33
yading@10 34 Bitstream syntax:
yading@10 35 =================
yading@10 36
yading@10 37 frame:
yading@10 38 header
yading@10 39 prediction
yading@10 40 residual
yading@10 41
yading@10 42 header:
yading@10 43 keyframe b MID_STATE
yading@10 44 if(keyframe || always_reset)
yading@10 45 reset_contexts
yading@10 46 if(keyframe){
yading@10 47 version u header_state
yading@10 48 always_reset b header_state
yading@10 49 temporal_decomposition_type u header_state
yading@10 50 temporal_decomposition_count u header_state
yading@10 51 spatial_decomposition_count u header_state
yading@10 52 colorspace_type u header_state
yading@10 53 chroma_h_shift u header_state
yading@10 54 chroma_v_shift u header_state
yading@10 55 spatial_scalability b header_state
yading@10 56 max_ref_frames-1 u header_state
yading@10 57 qlogs
yading@10 58 }
yading@10 59 if(!keyframe){
yading@10 60 update_mc b header_state
yading@10 61 if(update_mc){
yading@10 62 for(plane=0; plane<2; plane++){
yading@10 63 diag_mc b header_state
yading@10 64 htaps/2-1 u header_state
yading@10 65 for(i= p->htaps/2; i; i--)
yading@10 66 |hcoeff[i]| u header_state
yading@10 67 }
yading@10 68 }
yading@10 69 update_qlogs b header_state
yading@10 70 if(update_qlogs){
yading@10 71 spatial_decomposition_count u header_state
yading@10 72 qlogs
yading@10 73 }
yading@10 74 }
yading@10 75
yading@10 76 spatial_decomposition_type s header_state
yading@10 77 qlog s header_state
yading@10 78 mv_scale s header_state
yading@10 79 qbias s header_state
yading@10 80 block_max_depth s header_state
yading@10 81
yading@10 82 qlogs:
yading@10 83 for(plane=0; plane<2; plane++){
yading@10 84 quant_table[plane][0][0] s header_state
yading@10 85 for(level=0; level < spatial_decomposition_count; level++){
yading@10 86 quant_table[plane][level][1]s header_state
yading@10 87 quant_table[plane][level][3]s header_state
yading@10 88 }
yading@10 89 }
yading@10 90
yading@10 91 reset_contexts
yading@10 92 *_state[*]= MID_STATE
yading@10 93
yading@10 94 prediction:
yading@10 95 for(y=0; y<block_count_vertical; y++)
yading@10 96 for(x=0; x<block_count_horizontal; x++)
yading@10 97 block(0)
yading@10 98
yading@10 99 block(level):
yading@10 100 mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0
yading@10 101 if(keyframe){
yading@10 102 intra=1
yading@10 103 }else{
yading@10 104 if(level!=max_block_depth){
yading@10 105 s_context= 2*left->level + 2*top->level + topleft->level + topright->level
yading@10 106 leaf b block_state[4 + s_context]
yading@10 107 }
yading@10 108 if(level==max_block_depth || leaf){
yading@10 109 intra b block_state[1 + left->intra + top->intra]
yading@10 110 if(intra){
yading@10 111 y_diff s block_state[32]
yading@10 112 cb_diff s block_state[64]
yading@10 113 cr_diff s block_state[96]
yading@10 114 }else{
yading@10 115 ref_context= ilog2(2*left->ref) + ilog2(2*top->ref)
yading@10 116 if(ref_frames > 1)
yading@10 117 ref u block_state[128 + 1024 + 32*ref_context]
yading@10 118 mx_context= ilog2(2*abs(left->mx - top->mx))
yading@10 119 my_context= ilog2(2*abs(left->my - top->my))
yading@10 120 mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)]
yading@10 121 mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)]
yading@10 122 }
yading@10 123 }else{
yading@10 124 block(level+1)
yading@10 125 block(level+1)
yading@10 126 block(level+1)
yading@10 127 block(level+1)
yading@10 128 }
yading@10 129 }
yading@10 130
yading@10 131
yading@10 132 residual:
yading@10 133 residual2(luma)
yading@10 134 residual2(chroma_cr)
yading@10 135 residual2(chroma_cb)
yading@10 136
yading@10 137 residual2:
yading@10 138 for(level=0; level<spatial_decomposition_count; level++){
yading@10 139 if(level==0)
yading@10 140 subband(LL, 0)
yading@10 141 subband(HL, level)
yading@10 142 subband(LH, level)
yading@10 143 subband(HH, level)
yading@10 144 }
yading@10 145
yading@10 146 subband:
yading@10 147 FIXME
yading@10 148
yading@10 149
yading@10 150
yading@10 151 Tag description:
yading@10 152 ----------------
yading@10 153
yading@10 154 version
yading@10 155 0
yading@10 156 this MUST NOT change within a bitstream
yading@10 157
yading@10 158 always_reset
yading@10 159 if 1 then the range coder contexts will be reset after each frame
yading@10 160
yading@10 161 temporal_decomposition_type
yading@10 162 0
yading@10 163
yading@10 164 temporal_decomposition_count
yading@10 165 0
yading@10 166
yading@10 167 spatial_decomposition_count
yading@10 168 FIXME
yading@10 169
yading@10 170 colorspace_type
yading@10 171 0
yading@10 172 this MUST NOT change within a bitstream
yading@10 173
yading@10 174 chroma_h_shift
yading@10 175 log2(luma.width / chroma.width)
yading@10 176 this MUST NOT change within a bitstream
yading@10 177
yading@10 178 chroma_v_shift
yading@10 179 log2(luma.height / chroma.height)
yading@10 180 this MUST NOT change within a bitstream
yading@10 181
yading@10 182 spatial_scalability
yading@10 183 0
yading@10 184
yading@10 185 max_ref_frames
yading@10 186 maximum number of reference frames
yading@10 187 this MUST NOT change within a bitstream
yading@10 188
yading@10 189 update_mc
yading@10 190 indicates that motion compensation filter parameters are stored in the
yading@10 191 header
yading@10 192
yading@10 193 diag_mc
yading@10 194 flag to enable faster diagonal interpolation
yading@10 195 this SHOULD be 1 unless it turns out to be covered by a valid patent
yading@10 196
yading@10 197 htaps
yading@10 198 number of half pel interpolation filter taps, MUST be even, >0 and <10
yading@10 199
yading@10 200 hcoeff
yading@10 201 half pel interpolation filter coefficients, hcoeff[0] are the 2 middle
yading@10 202 coefficients [1] are the next outer ones and so on, resulting in a filter
yading@10 203 like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ...
yading@10 204 the sign of the coefficients is not explicitly stored but alternates
yading@10 205 after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,...
yading@10 206 hcoeff[0] is not explicitly stored but found by subtracting the sum
yading@10 207 of all stored coefficients with signs from 32
yading@10 208 hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ...
yading@10 209 a good choice for hcoeff and htaps is
yading@10 210 htaps= 6
yading@10 211 hcoeff={40,-10,2}
yading@10 212 an alternative which requires more computations at both encoder and
yading@10 213 decoder side and may or may not be better is
yading@10 214 htaps= 8
yading@10 215 hcoeff={42,-14,6,-2}
yading@10 216
yading@10 217
yading@10 218 ref_frames
yading@10 219 minimum of the number of available reference frames and max_ref_frames
yading@10 220 for example the first frame after a key frame always has ref_frames=1
yading@10 221
yading@10 222 spatial_decomposition_type
yading@10 223 wavelet type
yading@10 224 0 is a 9/7 symmetric compact integer wavelet
yading@10 225 1 is a 5/3 symmetric compact integer wavelet
yading@10 226 others are reserved
yading@10 227 stored as delta from last, last is reset to 0 if always_reset || keyframe
yading@10 228
yading@10 229 qlog
yading@10 230 quality (logarthmic quantizer scale)
yading@10 231 stored as delta from last, last is reset to 0 if always_reset || keyframe
yading@10 232
yading@10 233 mv_scale
yading@10 234 stored as delta from last, last is reset to 0 if always_reset || keyframe
yading@10 235 FIXME check that everything works fine if this changes between frames
yading@10 236
yading@10 237 qbias
yading@10 238 dequantization bias
yading@10 239 stored as delta from last, last is reset to 0 if always_reset || keyframe
yading@10 240
yading@10 241 block_max_depth
yading@10 242 maximum depth of the block tree
yading@10 243 stored as delta from last, last is reset to 0 if always_reset || keyframe
yading@10 244
yading@10 245 quant_table
yading@10 246 quantiztation table
yading@10 247
yading@10 248
yading@10 249 Highlevel bitstream structure:
yading@10 250 =============================
yading@10 251 --------------------------------------------
yading@10 252 | Header |
yading@10 253 --------------------------------------------
yading@10 254 | ------------------------------------ |
yading@10 255 | | Block0 | |
yading@10 256 | | split? | |
yading@10 257 | | yes no | |
yading@10 258 | | ......... intra? | |
yading@10 259 | | : Block01 : yes no | |
yading@10 260 | | : Block02 : ....... .......... | |
yading@10 261 | | : Block03 : : y DC : : ref index: | |
yading@10 262 | | : Block04 : : cb DC : : motion x : | |
yading@10 263 | | ......... : cr DC : : motion y : | |
yading@10 264 | | ....... .......... | |
yading@10 265 | ------------------------------------ |
yading@10 266 | ------------------------------------ |
yading@10 267 | | Block1 | |
yading@10 268 | ... |
yading@10 269 --------------------------------------------
yading@10 270 | ------------ ------------ ------------ |
yading@10 271 || Y subbands | | Cb subbands| | Cr subbands||
yading@10 272 || --- --- | | --- --- | | --- --- ||
yading@10 273 || |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| ||
yading@10 274 || --- --- | | --- --- | | --- --- ||
yading@10 275 || --- --- | | --- --- | | --- --- ||
yading@10 276 || |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| ||
yading@10 277 || --- --- | | --- --- | | --- --- ||
yading@10 278 || --- --- | | --- --- | | --- --- ||
yading@10 279 || |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| ||
yading@10 280 || --- --- | | --- --- | | --- --- ||
yading@10 281 || --- --- | | --- --- | | --- --- ||
yading@10 282 || |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| ||
yading@10 283 || ... | | ... | | ... ||
yading@10 284 | ------------ ------------ ------------ |
yading@10 285 --------------------------------------------
yading@10 286
yading@10 287 Decoding process:
yading@10 288 =================
yading@10 289
yading@10 290 ------------
yading@10 291 | |
yading@10 292 | Subbands |
yading@10 293 ------------ | |
yading@10 294 | | ------------
yading@10 295 | Intra DC | |
yading@10 296 | | LL0 subband prediction
yading@10 297 ------------ |
yading@10 298 \ Dequantizaton
yading@10 299 ------------------- \ |
yading@10 300 | Reference frames | \ IDWT
yading@10 301 | ------- ------- | Motion \ |
yading@10 302 ||Frame 0| |Frame 1|| Compensation . OBMC v -------
yading@10 303 | ------- ------- | --------------. \------> + --->|Frame n|-->output
yading@10 304 | ------- ------- | -------
yading@10 305 ||Frame 2| |Frame 3||<----------------------------------/
yading@10 306 | ... |
yading@10 307 -------------------
yading@10 308
yading@10 309
yading@10 310 Range Coder:
yading@10 311 ============
yading@10 312
yading@10 313 Binary Range Coder:
yading@10 314 -------------------
yading@10 315 The implemented range coder is an adapted version based upon "Range encoding:
yading@10 316 an algorithm for removing redundancy from a digitised message." by G. N. N.
yading@10 317 Martin.
yading@10 318 The symbols encoded by the Snow range coder are bits (0|1). The
yading@10 319 associated probabilities are not fix but change depending on the symbol mix
yading@10 320 seen so far.
yading@10 321
yading@10 322
yading@10 323 bit seen | new state
yading@10 324 ---------+-----------------------------------------------
yading@10 325 0 | 256 - state_transition_table[256 - old_state];
yading@10 326 1 | state_transition_table[ old_state];
yading@10 327
yading@10 328 state_transition_table = {
yading@10 329 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27,
yading@10 330 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42,
yading@10 331 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57,
yading@10 332 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
yading@10 333 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
yading@10 334 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
yading@10 335 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118,
yading@10 336 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133,
yading@10 337 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
yading@10 338 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
yading@10 339 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179,
yading@10 340 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194,
yading@10 341 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209,
yading@10 342 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225,
yading@10 343 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240,
yading@10 344 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};
yading@10 345
yading@10 346 FIXME
yading@10 347
yading@10 348
yading@10 349 Range Coding of integers:
yading@10 350 -------------------------
yading@10 351 FIXME
yading@10 352
yading@10 353
yading@10 354 Neighboring Blocks:
yading@10 355 ===================
yading@10 356 left and top are set to the respective blocks unless they are outside of
yading@10 357 the image in which case they are set to the Null block
yading@10 358
yading@10 359 top-left is set to the top left block unless it is outside of the image in
yading@10 360 which case it is set to the left block
yading@10 361
yading@10 362 if this block has no larger parent block or it is at the left side of its
yading@10 363 parent block and the top right block is not outside of the image then the
yading@10 364 top right block is used for top-right else the top-left block is used
yading@10 365
yading@10 366 Null block
yading@10 367 y,cb,cr are 128
yading@10 368 level, ref, mx and my are 0
yading@10 369
yading@10 370
yading@10 371 Motion Vector Prediction:
yading@10 372 =========================
yading@10 373 1. the motion vectors of all the neighboring blocks are scaled to
yading@10 374 compensate for the difference of reference frames
yading@10 375
yading@10 376 scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8
yading@10 377
yading@10 378 2. the median of the scaled left, top and top-right vectors is used as
yading@10 379 motion vector prediction
yading@10 380
yading@10 381 3. the used motion vector is the sum of the predictor and
yading@10 382 (mvx_diff, mvy_diff)*mv_scale
yading@10 383
yading@10 384
yading@10 385 Intra DC Predicton:
yading@10 386 ======================
yading@10 387 the luma and chroma values of the left block are used as predictors
yading@10 388
yading@10 389 the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
yading@10 390 to reverse this in the decoder apply the following:
yading@10 391 block[y][x].dc[0] = block[y][x-1].dc[0] + y_diff;
yading@10 392 block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff;
yading@10 393 block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff;
yading@10 394 block[*][-1].dc[*]= 128;
yading@10 395
yading@10 396
yading@10 397 Motion Compensation:
yading@10 398 ====================
yading@10 399
yading@10 400 Halfpel interpolation:
yading@10 401 ----------------------
yading@10 402 halfpel interpolation is done by convolution with the halfpel filter stored
yading@10 403 in the header:
yading@10 404
yading@10 405 horizontal halfpel samples are found by
yading@10 406 H1[y][x] = hcoeff[0]*(F[y][x ] + F[y][x+1])
yading@10 407 + hcoeff[1]*(F[y][x-1] + F[y][x+2])
yading@10 408 + hcoeff[2]*(F[y][x-2] + F[y][x+3])
yading@10 409 + ...
yading@10 410 h1[y][x] = (H1[y][x] + 32)>>6;
yading@10 411
yading@10 412 vertical halfpel samples are found by
yading@10 413 H2[y][x] = hcoeff[0]*(F[y ][x] + F[y+1][x])
yading@10 414 + hcoeff[1]*(F[y-1][x] + F[y+2][x])
yading@10 415 + ...
yading@10 416 h2[y][x] = (H2[y][x] + 32)>>6;
yading@10 417
yading@10 418 vertical+horizontal halfpel samples are found by
yading@10 419 H3[y][x] = hcoeff[0]*(H2[y][x ] + H2[y][x+1])
yading@10 420 + hcoeff[1]*(H2[y][x-1] + H2[y][x+2])
yading@10 421 + ...
yading@10 422 H3[y][x] = hcoeff[0]*(H1[y ][x] + H1[y+1][x])
yading@10 423 + hcoeff[1]*(H1[y+1][x] + H1[y+2][x])
yading@10 424 + ...
yading@10 425 h3[y][x] = (H3[y][x] + 2048)>>12;
yading@10 426
yading@10 427
yading@10 428 F H1 F
yading@10 429 | | |
yading@10 430 | | |
yading@10 431 | | |
yading@10 432 F H1 F
yading@10 433 | | |
yading@10 434 | | |
yading@10 435 | | |
yading@10 436 F-------F-------F-> H1<-F-------F-------F
yading@10 437 v v v
yading@10 438 H2 H3 H2
yading@10 439 ^ ^ ^
yading@10 440 F-------F-------F-> H1<-F-------F-------F
yading@10 441 | | |
yading@10 442 | | |
yading@10 443 | | |
yading@10 444 F H1 F
yading@10 445 | | |
yading@10 446 | | |
yading@10 447 | | |
yading@10 448 F H1 F
yading@10 449
yading@10 450
yading@10 451 unavailable fullpel samples (outside the picture for example) shall be equal
yading@10 452 to the closest available fullpel sample
yading@10 453
yading@10 454
yading@10 455 Smaller pel interpolation:
yading@10 456 --------------------------
yading@10 457 if diag_mc is set then points which lie on a line between 2 vertically,
yading@10 458 horiziontally or diagonally adjacent halfpel points shall be interpolated
yading@10 459 linearls with rounding to nearest and halfway values rounded up.
yading@10 460 points which lie on 2 diagonals at the same time should only use the one
yading@10 461 diagonal not containing the fullpel point
yading@10 462
yading@10 463
yading@10 464
yading@10 465 F-->O---q---O<--h1->O---q---O<--F
yading@10 466 v \ / v \ / v
yading@10 467 O O O O O O O
yading@10 468 | / | \ |
yading@10 469 q q q q q
yading@10 470 | / | \ |
yading@10 471 O O O O O O O
yading@10 472 ^ / \ ^ / \ ^
yading@10 473 h2-->O---q---O<--h3->O---q---O<--h2
yading@10 474 v \ / v \ / v
yading@10 475 O O O O O O O
yading@10 476 | \ | / |
yading@10 477 q q q q q
yading@10 478 | \ | / |
yading@10 479 O O O O O O O
yading@10 480 ^ / \ ^ / \ ^
yading@10 481 F-->O---q---O<--h1->O---q---O<--F
yading@10 482
yading@10 483
yading@10 484
yading@10 485 the remaining points shall be bilinearly interpolated from the
yading@10 486 up to 4 surrounding halfpel and fullpel points, again rounding should be to
yading@10 487 nearest and halfway values rounded up
yading@10 488
yading@10 489 compliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma
yading@10 490 interpolation at least
yading@10 491
yading@10 492
yading@10 493 Overlapped block motion compensation:
yading@10 494 -------------------------------------
yading@10 495 FIXME
yading@10 496
yading@10 497 LL band prediction:
yading@10 498 ===================
yading@10 499 Each sample in the LL0 subband is predicted by the median of the left, top and
yading@10 500 left+top-topleft samples, samples outside the subband shall be considered to
yading@10 501 be 0. To reverse this prediction in the decoder apply the following.
yading@10 502 for(y=0; y<height; y++){
yading@10 503 for(x=0; x<width; x++){
yading@10 504 sample[y][x] += median(sample[y-1][x],
yading@10 505 sample[y][x-1],
yading@10 506 sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]);
yading@10 507 }
yading@10 508 }
yading@10 509 sample[-1][*]=sample[*][-1]= 0;
yading@10 510 width,height here are the width and height of the LL0 subband not of the final
yading@10 511 video
yading@10 512
yading@10 513
yading@10 514 Dequantizaton:
yading@10 515 ==============
yading@10 516 FIXME
yading@10 517
yading@10 518 Wavelet Transform:
yading@10 519 ==================
yading@10 520
yading@10 521 Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer
yading@10 522 transform and a integer approximation of the symmetric biorthogonal 9/7
yading@10 523 daubechies wavelet.
yading@10 524
yading@10 525 2D IDWT (inverse discrete wavelet transform)
yading@10 526 --------------------------------------------
yading@10 527 The 2D IDWT applies a 2D filter recursively, each time combining the
yading@10 528 4 lowest frequency subbands into a single subband until only 1 subband
yading@10 529 remains.
yading@10 530 The 2D filter is done by first applying a 1D filter in the vertical direction
yading@10 531 and then applying it in the horizontal one.
yading@10 532 --------------- --------------- --------------- ---------------
yading@10 533 |LL0|HL0| | | | | | | | | | | |
yading@10 534 |---+---| HL1 | | L0|H0 | HL1 | | LL1 | HL1 | | | |
yading@10 535 |LH0|HH0| | | | | | | | | | | |
yading@10 536 |-------+-------|->|-------+-------|->|-------+-------|->| L1 | H1 |->...
yading@10 537 | | | | | | | | | | | |
yading@10 538 | LH1 | HH1 | | LH1 | HH1 | | LH1 | HH1 | | | |
yading@10 539 | | | | | | | | | | | |
yading@10 540 --------------- --------------- --------------- ---------------
yading@10 541
yading@10 542
yading@10 543 1D Filter:
yading@10 544 ----------
yading@10 545 1. interleave the samples of the low and high frequency subbands like
yading@10 546 s={L0, H0, L1, H1, L2, H2, L3, H3, ... }
yading@10 547 note, this can end with a L or a H, the number of elements shall be w
yading@10 548 s[-1] shall be considered equivalent to s[1 ]
yading@10 549 s[w ] shall be considered equivalent to s[w-2]
yading@10 550
yading@10 551 2. perform the lifting steps in order as described below
yading@10 552
yading@10 553 5/3 Integer filter:
yading@10 554 1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w
yading@10 555 2. s[i] += (s[i-1] + s[i+1] )>>1; for all odd i < w
yading@10 556
yading@10 557 \ | /|\ | /|\ | /|\ | /|\
yading@10 558 \|/ | \|/ | \|/ | \|/ |
yading@10 559 + | + | + | + | -1/4
yading@10 560 /|\ | /|\ | /|\ | /|\ |
yading@10 561 / | \|/ | \|/ | \|/ | \|/
yading@10 562 | + | + | + | + +1/2
yading@10 563
yading@10 564
yading@10 565 Snow's 9/7 Integer filter:
yading@10 566 1. s[i] -= (3*(s[i-1] + s[i+1]) + 4)>>3; for all even i < w
yading@10 567 2. s[i] -= s[i-1] + s[i+1] ; for all odd i < w
yading@10 568 3. s[i] += ( s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w
yading@10 569 4. s[i] += (3*(s[i-1] + s[i+1]) )>>1; for all odd i < w
yading@10 570
yading@10 571 \ | /|\ | /|\ | /|\ | /|\
yading@10 572 \|/ | \|/ | \|/ | \|/ |
yading@10 573 + | + | + | + | -3/8
yading@10 574 /|\ | /|\ | /|\ | /|\ |
yading@10 575 / | \|/ | \|/ | \|/ | \|/
yading@10 576 (| + (| + (| + (| + -1
yading@10 577 \ + /|\ + /|\ + /|\ + /|\ +1/4
yading@10 578 \|/ | \|/ | \|/ | \|/ |
yading@10 579 + | + | + | + | +1/16
yading@10 580 /|\ | /|\ | /|\ | /|\ |
yading@10 581 / | \|/ | \|/ | \|/ | \|/
yading@10 582 | + | + | + | + +3/2
yading@10 583
yading@10 584 optimization tips:
yading@10 585 following are exactly identical
yading@10 586 (3a)>>1 == a + (a>>1)
yading@10 587 (a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2
yading@10 588
yading@10 589 16bit implementation note:
yading@10 590 The IDWT can be implemented with 16bits, but this requires some care to
yading@10 591 prevent overflows, the following list, lists the minimum number of bits needed
yading@10 592 for some terms
yading@10 593 1. lifting step
yading@10 594 A= s[i-1] + s[i+1] 16bit
yading@10 595 3*A + 4 18bit
yading@10 596 A + (A>>1) + 2 17bit
yading@10 597
yading@10 598 3. lifting step
yading@10 599 s[i-1] + s[i+1] 17bit
yading@10 600
yading@10 601 4. lifiting step
yading@10 602 3*(s[i-1] + s[i+1]) 17bit
yading@10 603
yading@10 604
yading@10 605 TODO:
yading@10 606 =====
yading@10 607 Important:
yading@10 608 finetune initial contexts
yading@10 609 flip wavelet?
yading@10 610 try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients
yading@10 611 try the MV length as context for coding the residual coefficients
yading@10 612 use extradata for stuff which is in the keyframes now?
yading@10 613 the MV median predictor is patented IIRC
yading@10 614 implement per picture halfpel interpolation
yading@10 615 try different range coder state transition tables for different contexts
yading@10 616
yading@10 617 Not Important:
yading@10 618 compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality)
yading@10 619 spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later)
yading@10 620
yading@10 621
yading@10 622 Credits:
yading@10 623 ========
yading@10 624 Michael Niedermayer
yading@10 625 Loren Merritt
yading@10 626
yading@10 627
yading@10 628 Copyright:
yading@10 629 ==========
yading@10 630 GPL + GFDL + whatever is needed to make this a RFC