annotate ffmpeg/libavcodec/sonic.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Simple free lossless/lossy audio codec
yading@10 3 * Copyright (c) 2004 Alex Beregszaszi
yading@10 4 *
yading@10 5 * This file is part of FFmpeg.
yading@10 6 *
yading@10 7 * FFmpeg is free software; you can redistribute it and/or
yading@10 8 * modify it under the terms of the GNU Lesser General Public
yading@10 9 * License as published by the Free Software Foundation; either
yading@10 10 * version 2.1 of the License, or (at your option) any later version.
yading@10 11 *
yading@10 12 * FFmpeg is distributed in the hope that it will be useful,
yading@10 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 15 * Lesser General Public License for more details.
yading@10 16 *
yading@10 17 * You should have received a copy of the GNU Lesser General Public
yading@10 18 * License along with FFmpeg; if not, write to the Free Software
yading@10 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 20 */
yading@10 21 #include "avcodec.h"
yading@10 22 #include "get_bits.h"
yading@10 23 #include "golomb.h"
yading@10 24 #include "internal.h"
yading@10 25
yading@10 26 /**
yading@10 27 * @file
yading@10 28 * Simple free lossless/lossy audio codec
yading@10 29 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
yading@10 30 * Written and designed by Alex Beregszaszi
yading@10 31 *
yading@10 32 * TODO:
yading@10 33 * - CABAC put/get_symbol
yading@10 34 * - independent quantizer for channels
yading@10 35 * - >2 channels support
yading@10 36 * - more decorrelation types
yading@10 37 * - more tap_quant tests
yading@10 38 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
yading@10 39 */
yading@10 40
yading@10 41 #define MAX_CHANNELS 2
yading@10 42
yading@10 43 #define MID_SIDE 0
yading@10 44 #define LEFT_SIDE 1
yading@10 45 #define RIGHT_SIDE 2
yading@10 46
yading@10 47 typedef struct SonicContext {
yading@10 48 int lossless, decorrelation;
yading@10 49
yading@10 50 int num_taps, downsampling;
yading@10 51 double quantization;
yading@10 52
yading@10 53 int channels, samplerate, block_align, frame_size;
yading@10 54
yading@10 55 int *tap_quant;
yading@10 56 int *int_samples;
yading@10 57 int *coded_samples[MAX_CHANNELS];
yading@10 58
yading@10 59 // for encoding
yading@10 60 int *tail;
yading@10 61 int tail_size;
yading@10 62 int *window;
yading@10 63 int window_size;
yading@10 64
yading@10 65 // for decoding
yading@10 66 int *predictor_k;
yading@10 67 int *predictor_state[MAX_CHANNELS];
yading@10 68 } SonicContext;
yading@10 69
yading@10 70 #define LATTICE_SHIFT 10
yading@10 71 #define SAMPLE_SHIFT 4
yading@10 72 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
yading@10 73 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
yading@10 74
yading@10 75 #define BASE_QUANT 0.6
yading@10 76 #define RATE_VARIATION 3.0
yading@10 77
yading@10 78 static inline int divide(int a, int b)
yading@10 79 {
yading@10 80 if (a < 0)
yading@10 81 return -( (-a + b/2)/b );
yading@10 82 else
yading@10 83 return (a + b/2)/b;
yading@10 84 }
yading@10 85
yading@10 86 static inline int shift(int a,int b)
yading@10 87 {
yading@10 88 return (a+(1<<(b-1))) >> b;
yading@10 89 }
yading@10 90
yading@10 91 static inline int shift_down(int a,int b)
yading@10 92 {
yading@10 93 return (a>>b)+((a<0)?1:0);
yading@10 94 }
yading@10 95
yading@10 96 #if 1
yading@10 97 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
yading@10 98 {
yading@10 99 int i;
yading@10 100
yading@10 101 for (i = 0; i < entries; i++)
yading@10 102 set_se_golomb(pb, buf[i]);
yading@10 103
yading@10 104 return 1;
yading@10 105 }
yading@10 106
yading@10 107 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
yading@10 108 {
yading@10 109 int i;
yading@10 110
yading@10 111 for (i = 0; i < entries; i++)
yading@10 112 buf[i] = get_se_golomb(gb);
yading@10 113
yading@10 114 return 1;
yading@10 115 }
yading@10 116
yading@10 117 #else
yading@10 118
yading@10 119 #define ADAPT_LEVEL 8
yading@10 120
yading@10 121 static int bits_to_store(uint64_t x)
yading@10 122 {
yading@10 123 int res = 0;
yading@10 124
yading@10 125 while(x)
yading@10 126 {
yading@10 127 res++;
yading@10 128 x >>= 1;
yading@10 129 }
yading@10 130 return res;
yading@10 131 }
yading@10 132
yading@10 133 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
yading@10 134 {
yading@10 135 int i, bits;
yading@10 136
yading@10 137 if (!max)
yading@10 138 return;
yading@10 139
yading@10 140 bits = bits_to_store(max);
yading@10 141
yading@10 142 for (i = 0; i < bits-1; i++)
yading@10 143 put_bits(pb, 1, value & (1 << i));
yading@10 144
yading@10 145 if ( (value | (1 << (bits-1))) <= max)
yading@10 146 put_bits(pb, 1, value & (1 << (bits-1)));
yading@10 147 }
yading@10 148
yading@10 149 static unsigned int read_uint_max(GetBitContext *gb, int max)
yading@10 150 {
yading@10 151 int i, bits, value = 0;
yading@10 152
yading@10 153 if (!max)
yading@10 154 return 0;
yading@10 155
yading@10 156 bits = bits_to_store(max);
yading@10 157
yading@10 158 for (i = 0; i < bits-1; i++)
yading@10 159 if (get_bits1(gb))
yading@10 160 value += 1 << i;
yading@10 161
yading@10 162 if ( (value | (1<<(bits-1))) <= max)
yading@10 163 if (get_bits1(gb))
yading@10 164 value += 1 << (bits-1);
yading@10 165
yading@10 166 return value;
yading@10 167 }
yading@10 168
yading@10 169 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
yading@10 170 {
yading@10 171 int i, j, x = 0, low_bits = 0, max = 0;
yading@10 172 int step = 256, pos = 0, dominant = 0, any = 0;
yading@10 173 int *copy, *bits;
yading@10 174
yading@10 175 copy = av_mallocz(4* entries);
yading@10 176 if (!copy)
yading@10 177 return -1;
yading@10 178
yading@10 179 if (base_2_part)
yading@10 180 {
yading@10 181 int energy = 0;
yading@10 182
yading@10 183 for (i = 0; i < entries; i++)
yading@10 184 energy += abs(buf[i]);
yading@10 185
yading@10 186 low_bits = bits_to_store(energy / (entries * 2));
yading@10 187 if (low_bits > 15)
yading@10 188 low_bits = 15;
yading@10 189
yading@10 190 put_bits(pb, 4, low_bits);
yading@10 191 }
yading@10 192
yading@10 193 for (i = 0; i < entries; i++)
yading@10 194 {
yading@10 195 put_bits(pb, low_bits, abs(buf[i]));
yading@10 196 copy[i] = abs(buf[i]) >> low_bits;
yading@10 197 if (copy[i] > max)
yading@10 198 max = abs(copy[i]);
yading@10 199 }
yading@10 200
yading@10 201 bits = av_mallocz(4* entries*max);
yading@10 202 if (!bits)
yading@10 203 {
yading@10 204 // av_free(copy);
yading@10 205 return -1;
yading@10 206 }
yading@10 207
yading@10 208 for (i = 0; i <= max; i++)
yading@10 209 {
yading@10 210 for (j = 0; j < entries; j++)
yading@10 211 if (copy[j] >= i)
yading@10 212 bits[x++] = copy[j] > i;
yading@10 213 }
yading@10 214
yading@10 215 // store bitstream
yading@10 216 while (pos < x)
yading@10 217 {
yading@10 218 int steplet = step >> 8;
yading@10 219
yading@10 220 if (pos + steplet > x)
yading@10 221 steplet = x - pos;
yading@10 222
yading@10 223 for (i = 0; i < steplet; i++)
yading@10 224 if (bits[i+pos] != dominant)
yading@10 225 any = 1;
yading@10 226
yading@10 227 put_bits(pb, 1, any);
yading@10 228
yading@10 229 if (!any)
yading@10 230 {
yading@10 231 pos += steplet;
yading@10 232 step += step / ADAPT_LEVEL;
yading@10 233 }
yading@10 234 else
yading@10 235 {
yading@10 236 int interloper = 0;
yading@10 237
yading@10 238 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
yading@10 239 interloper++;
yading@10 240
yading@10 241 // note change
yading@10 242 write_uint_max(pb, interloper, (step >> 8) - 1);
yading@10 243
yading@10 244 pos += interloper + 1;
yading@10 245 step -= step / ADAPT_LEVEL;
yading@10 246 }
yading@10 247
yading@10 248 if (step < 256)
yading@10 249 {
yading@10 250 step = 65536 / step;
yading@10 251 dominant = !dominant;
yading@10 252 }
yading@10 253 }
yading@10 254
yading@10 255 // store signs
yading@10 256 for (i = 0; i < entries; i++)
yading@10 257 if (buf[i])
yading@10 258 put_bits(pb, 1, buf[i] < 0);
yading@10 259
yading@10 260 // av_free(bits);
yading@10 261 // av_free(copy);
yading@10 262
yading@10 263 return 0;
yading@10 264 }
yading@10 265
yading@10 266 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
yading@10 267 {
yading@10 268 int i, low_bits = 0, x = 0;
yading@10 269 int n_zeros = 0, step = 256, dominant = 0;
yading@10 270 int pos = 0, level = 0;
yading@10 271 int *bits = av_mallocz(4* entries);
yading@10 272
yading@10 273 if (!bits)
yading@10 274 return -1;
yading@10 275
yading@10 276 if (base_2_part)
yading@10 277 {
yading@10 278 low_bits = get_bits(gb, 4);
yading@10 279
yading@10 280 if (low_bits)
yading@10 281 for (i = 0; i < entries; i++)
yading@10 282 buf[i] = get_bits(gb, low_bits);
yading@10 283 }
yading@10 284
yading@10 285 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
yading@10 286
yading@10 287 while (n_zeros < entries)
yading@10 288 {
yading@10 289 int steplet = step >> 8;
yading@10 290
yading@10 291 if (!get_bits1(gb))
yading@10 292 {
yading@10 293 for (i = 0; i < steplet; i++)
yading@10 294 bits[x++] = dominant;
yading@10 295
yading@10 296 if (!dominant)
yading@10 297 n_zeros += steplet;
yading@10 298
yading@10 299 step += step / ADAPT_LEVEL;
yading@10 300 }
yading@10 301 else
yading@10 302 {
yading@10 303 int actual_run = read_uint_max(gb, steplet-1);
yading@10 304
yading@10 305 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
yading@10 306
yading@10 307 for (i = 0; i < actual_run; i++)
yading@10 308 bits[x++] = dominant;
yading@10 309
yading@10 310 bits[x++] = !dominant;
yading@10 311
yading@10 312 if (!dominant)
yading@10 313 n_zeros += actual_run;
yading@10 314 else
yading@10 315 n_zeros++;
yading@10 316
yading@10 317 step -= step / ADAPT_LEVEL;
yading@10 318 }
yading@10 319
yading@10 320 if (step < 256)
yading@10 321 {
yading@10 322 step = 65536 / step;
yading@10 323 dominant = !dominant;
yading@10 324 }
yading@10 325 }
yading@10 326
yading@10 327 // reconstruct unsigned values
yading@10 328 n_zeros = 0;
yading@10 329 for (i = 0; n_zeros < entries; i++)
yading@10 330 {
yading@10 331 while(1)
yading@10 332 {
yading@10 333 if (pos >= entries)
yading@10 334 {
yading@10 335 pos = 0;
yading@10 336 level += 1 << low_bits;
yading@10 337 }
yading@10 338
yading@10 339 if (buf[pos] >= level)
yading@10 340 break;
yading@10 341
yading@10 342 pos++;
yading@10 343 }
yading@10 344
yading@10 345 if (bits[i])
yading@10 346 buf[pos] += 1 << low_bits;
yading@10 347 else
yading@10 348 n_zeros++;
yading@10 349
yading@10 350 pos++;
yading@10 351 }
yading@10 352 // av_free(bits);
yading@10 353
yading@10 354 // read signs
yading@10 355 for (i = 0; i < entries; i++)
yading@10 356 if (buf[i] && get_bits1(gb))
yading@10 357 buf[i] = -buf[i];
yading@10 358
yading@10 359 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
yading@10 360
yading@10 361 return 0;
yading@10 362 }
yading@10 363 #endif
yading@10 364
yading@10 365 static void predictor_init_state(int *k, int *state, int order)
yading@10 366 {
yading@10 367 int i;
yading@10 368
yading@10 369 for (i = order-2; i >= 0; i--)
yading@10 370 {
yading@10 371 int j, p, x = state[i];
yading@10 372
yading@10 373 for (j = 0, p = i+1; p < order; j++,p++)
yading@10 374 {
yading@10 375 int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
yading@10 376 state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
yading@10 377 x = tmp;
yading@10 378 }
yading@10 379 }
yading@10 380 }
yading@10 381
yading@10 382 static int predictor_calc_error(int *k, int *state, int order, int error)
yading@10 383 {
yading@10 384 int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
yading@10 385
yading@10 386 #if 1
yading@10 387 int *k_ptr = &(k[order-2]),
yading@10 388 *state_ptr = &(state[order-2]);
yading@10 389 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
yading@10 390 {
yading@10 391 int k_value = *k_ptr, state_value = *state_ptr;
yading@10 392 x -= shift_down(k_value * state_value, LATTICE_SHIFT);
yading@10 393 state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
yading@10 394 }
yading@10 395 #else
yading@10 396 for (i = order-2; i >= 0; i--)
yading@10 397 {
yading@10 398 x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
yading@10 399 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
yading@10 400 }
yading@10 401 #endif
yading@10 402
yading@10 403 // don't drift too far, to avoid overflows
yading@10 404 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
yading@10 405 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
yading@10 406
yading@10 407 state[0] = x;
yading@10 408
yading@10 409 return x;
yading@10 410 }
yading@10 411
yading@10 412 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
yading@10 413 // Heavily modified Levinson-Durbin algorithm which
yading@10 414 // copes better with quantization, and calculates the
yading@10 415 // actual whitened result as it goes.
yading@10 416
yading@10 417 static void modified_levinson_durbin(int *window, int window_entries,
yading@10 418 int *out, int out_entries, int channels, int *tap_quant)
yading@10 419 {
yading@10 420 int i;
yading@10 421 int *state = av_mallocz(4* window_entries);
yading@10 422
yading@10 423 memcpy(state, window, 4* window_entries);
yading@10 424
yading@10 425 for (i = 0; i < out_entries; i++)
yading@10 426 {
yading@10 427 int step = (i+1)*channels, k, j;
yading@10 428 double xx = 0.0, xy = 0.0;
yading@10 429 #if 1
yading@10 430 int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
yading@10 431 j = window_entries - step;
yading@10 432 for (;j>=0;j--,x_ptr++,state_ptr++)
yading@10 433 {
yading@10 434 double x_value = *x_ptr, state_value = *state_ptr;
yading@10 435 xx += state_value*state_value;
yading@10 436 xy += x_value*state_value;
yading@10 437 }
yading@10 438 #else
yading@10 439 for (j = 0; j <= (window_entries - step); j++);
yading@10 440 {
yading@10 441 double stepval = window[step+j], stateval = window[j];
yading@10 442 // xx += (double)window[j]*(double)window[j];
yading@10 443 // xy += (double)window[step+j]*(double)window[j];
yading@10 444 xx += stateval*stateval;
yading@10 445 xy += stepval*stateval;
yading@10 446 }
yading@10 447 #endif
yading@10 448 if (xx == 0.0)
yading@10 449 k = 0;
yading@10 450 else
yading@10 451 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
yading@10 452
yading@10 453 if (k > (LATTICE_FACTOR/tap_quant[i]))
yading@10 454 k = LATTICE_FACTOR/tap_quant[i];
yading@10 455 if (-k > (LATTICE_FACTOR/tap_quant[i]))
yading@10 456 k = -(LATTICE_FACTOR/tap_quant[i]);
yading@10 457
yading@10 458 out[i] = k;
yading@10 459 k *= tap_quant[i];
yading@10 460
yading@10 461 #if 1
yading@10 462 x_ptr = &(window[step]);
yading@10 463 state_ptr = &(state[0]);
yading@10 464 j = window_entries - step;
yading@10 465 for (;j>=0;j--,x_ptr++,state_ptr++)
yading@10 466 {
yading@10 467 int x_value = *x_ptr, state_value = *state_ptr;
yading@10 468 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
yading@10 469 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
yading@10 470 }
yading@10 471 #else
yading@10 472 for (j=0; j <= (window_entries - step); j++)
yading@10 473 {
yading@10 474 int stepval = window[step+j], stateval=state[j];
yading@10 475 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
yading@10 476 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
yading@10 477 }
yading@10 478 #endif
yading@10 479 }
yading@10 480
yading@10 481 av_free(state);
yading@10 482 }
yading@10 483
yading@10 484 static inline int code_samplerate(int samplerate)
yading@10 485 {
yading@10 486 switch (samplerate)
yading@10 487 {
yading@10 488 case 44100: return 0;
yading@10 489 case 22050: return 1;
yading@10 490 case 11025: return 2;
yading@10 491 case 96000: return 3;
yading@10 492 case 48000: return 4;
yading@10 493 case 32000: return 5;
yading@10 494 case 24000: return 6;
yading@10 495 case 16000: return 7;
yading@10 496 case 8000: return 8;
yading@10 497 }
yading@10 498 return -1;
yading@10 499 }
yading@10 500
yading@10 501 static av_cold int sonic_encode_init(AVCodecContext *avctx)
yading@10 502 {
yading@10 503 SonicContext *s = avctx->priv_data;
yading@10 504 PutBitContext pb;
yading@10 505 int i, version = 0;
yading@10 506
yading@10 507 if (avctx->channels > MAX_CHANNELS)
yading@10 508 {
yading@10 509 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
yading@10 510 return -1; /* only stereo or mono for now */
yading@10 511 }
yading@10 512
yading@10 513 if (avctx->channels == 2)
yading@10 514 s->decorrelation = MID_SIDE;
yading@10 515 else
yading@10 516 s->decorrelation = 3;
yading@10 517
yading@10 518 if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
yading@10 519 {
yading@10 520 s->lossless = 1;
yading@10 521 s->num_taps = 32;
yading@10 522 s->downsampling = 1;
yading@10 523 s->quantization = 0.0;
yading@10 524 }
yading@10 525 else
yading@10 526 {
yading@10 527 s->num_taps = 128;
yading@10 528 s->downsampling = 2;
yading@10 529 s->quantization = 1.0;
yading@10 530 }
yading@10 531
yading@10 532 // max tap 2048
yading@10 533 if ((s->num_taps < 32) || (s->num_taps > 1024) ||
yading@10 534 ((s->num_taps>>5)<<5 != s->num_taps))
yading@10 535 {
yading@10 536 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
yading@10 537 return -1;
yading@10 538 }
yading@10 539
yading@10 540 // generate taps
yading@10 541 s->tap_quant = av_mallocz(4* s->num_taps);
yading@10 542 for (i = 0; i < s->num_taps; i++)
yading@10 543 s->tap_quant[i] = (int)(sqrt(i+1));
yading@10 544
yading@10 545 s->channels = avctx->channels;
yading@10 546 s->samplerate = avctx->sample_rate;
yading@10 547
yading@10 548 s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
yading@10 549 s->frame_size = s->channels*s->block_align*s->downsampling;
yading@10 550
yading@10 551 s->tail_size = s->num_taps*s->channels;
yading@10 552 s->tail = av_mallocz(4 * s->tail_size);
yading@10 553 if (!s->tail)
yading@10 554 return -1;
yading@10 555
yading@10 556 s->predictor_k = av_mallocz(4 * s->num_taps);
yading@10 557 if (!s->predictor_k)
yading@10 558 return -1;
yading@10 559
yading@10 560 for (i = 0; i < s->channels; i++)
yading@10 561 {
yading@10 562 s->coded_samples[i] = av_mallocz(4* s->block_align);
yading@10 563 if (!s->coded_samples[i])
yading@10 564 return -1;
yading@10 565 }
yading@10 566
yading@10 567 s->int_samples = av_mallocz(4* s->frame_size);
yading@10 568
yading@10 569 s->window_size = ((2*s->tail_size)+s->frame_size);
yading@10 570 s->window = av_mallocz(4* s->window_size);
yading@10 571 if (!s->window)
yading@10 572 return -1;
yading@10 573
yading@10 574 avctx->extradata = av_mallocz(16);
yading@10 575 if (!avctx->extradata)
yading@10 576 return -1;
yading@10 577 init_put_bits(&pb, avctx->extradata, 16*8);
yading@10 578
yading@10 579 put_bits(&pb, 2, version); // version
yading@10 580 if (version == 1)
yading@10 581 {
yading@10 582 put_bits(&pb, 2, s->channels);
yading@10 583 put_bits(&pb, 4, code_samplerate(s->samplerate));
yading@10 584 }
yading@10 585 put_bits(&pb, 1, s->lossless);
yading@10 586 if (!s->lossless)
yading@10 587 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
yading@10 588 put_bits(&pb, 2, s->decorrelation);
yading@10 589 put_bits(&pb, 2, s->downsampling);
yading@10 590 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
yading@10 591 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
yading@10 592
yading@10 593 flush_put_bits(&pb);
yading@10 594 avctx->extradata_size = put_bits_count(&pb)/8;
yading@10 595
yading@10 596 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
yading@10 597 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
yading@10 598
yading@10 599 avctx->coded_frame = avcodec_alloc_frame();
yading@10 600 if (!avctx->coded_frame)
yading@10 601 return AVERROR(ENOMEM);
yading@10 602 avctx->coded_frame->key_frame = 1;
yading@10 603 avctx->frame_size = s->block_align*s->downsampling;
yading@10 604
yading@10 605 return 0;
yading@10 606 }
yading@10 607
yading@10 608 static av_cold int sonic_encode_close(AVCodecContext *avctx)
yading@10 609 {
yading@10 610 SonicContext *s = avctx->priv_data;
yading@10 611 int i;
yading@10 612
yading@10 613 av_freep(&avctx->coded_frame);
yading@10 614
yading@10 615 for (i = 0; i < s->channels; i++)
yading@10 616 av_free(s->coded_samples[i]);
yading@10 617
yading@10 618 av_free(s->predictor_k);
yading@10 619 av_free(s->tail);
yading@10 620 av_free(s->tap_quant);
yading@10 621 av_free(s->window);
yading@10 622 av_free(s->int_samples);
yading@10 623
yading@10 624 return 0;
yading@10 625 }
yading@10 626
yading@10 627 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
yading@10 628 const AVFrame *frame, int *got_packet_ptr)
yading@10 629 {
yading@10 630 SonicContext *s = avctx->priv_data;
yading@10 631 PutBitContext pb;
yading@10 632 int i, j, ch, quant = 0, x = 0;
yading@10 633 int ret;
yading@10 634 const short *samples = (const int16_t*)frame->data[0];
yading@10 635
yading@10 636 if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
yading@10 637 return ret;
yading@10 638
yading@10 639 init_put_bits(&pb, avpkt->data, avpkt->size);
yading@10 640
yading@10 641 // short -> internal
yading@10 642 for (i = 0; i < s->frame_size; i++)
yading@10 643 s->int_samples[i] = samples[i];
yading@10 644
yading@10 645 if (!s->lossless)
yading@10 646 for (i = 0; i < s->frame_size; i++)
yading@10 647 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
yading@10 648
yading@10 649 switch(s->decorrelation)
yading@10 650 {
yading@10 651 case MID_SIDE:
yading@10 652 for (i = 0; i < s->frame_size; i += s->channels)
yading@10 653 {
yading@10 654 s->int_samples[i] += s->int_samples[i+1];
yading@10 655 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
yading@10 656 }
yading@10 657 break;
yading@10 658 case LEFT_SIDE:
yading@10 659 for (i = 0; i < s->frame_size; i += s->channels)
yading@10 660 s->int_samples[i+1] -= s->int_samples[i];
yading@10 661 break;
yading@10 662 case RIGHT_SIDE:
yading@10 663 for (i = 0; i < s->frame_size; i += s->channels)
yading@10 664 s->int_samples[i] -= s->int_samples[i+1];
yading@10 665 break;
yading@10 666 }
yading@10 667
yading@10 668 memset(s->window, 0, 4* s->window_size);
yading@10 669
yading@10 670 for (i = 0; i < s->tail_size; i++)
yading@10 671 s->window[x++] = s->tail[i];
yading@10 672
yading@10 673 for (i = 0; i < s->frame_size; i++)
yading@10 674 s->window[x++] = s->int_samples[i];
yading@10 675
yading@10 676 for (i = 0; i < s->tail_size; i++)
yading@10 677 s->window[x++] = 0;
yading@10 678
yading@10 679 for (i = 0; i < s->tail_size; i++)
yading@10 680 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
yading@10 681
yading@10 682 // generate taps
yading@10 683 modified_levinson_durbin(s->window, s->window_size,
yading@10 684 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
yading@10 685 if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
yading@10 686 return -1;
yading@10 687
yading@10 688 for (ch = 0; ch < s->channels; ch++)
yading@10 689 {
yading@10 690 x = s->tail_size+ch;
yading@10 691 for (i = 0; i < s->block_align; i++)
yading@10 692 {
yading@10 693 int sum = 0;
yading@10 694 for (j = 0; j < s->downsampling; j++, x += s->channels)
yading@10 695 sum += s->window[x];
yading@10 696 s->coded_samples[ch][i] = sum;
yading@10 697 }
yading@10 698 }
yading@10 699
yading@10 700 // simple rate control code
yading@10 701 if (!s->lossless)
yading@10 702 {
yading@10 703 double energy1 = 0.0, energy2 = 0.0;
yading@10 704 for (ch = 0; ch < s->channels; ch++)
yading@10 705 {
yading@10 706 for (i = 0; i < s->block_align; i++)
yading@10 707 {
yading@10 708 double sample = s->coded_samples[ch][i];
yading@10 709 energy2 += sample*sample;
yading@10 710 energy1 += fabs(sample);
yading@10 711 }
yading@10 712 }
yading@10 713
yading@10 714 energy2 = sqrt(energy2/(s->channels*s->block_align));
yading@10 715 energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
yading@10 716
yading@10 717 // increase bitrate when samples are like a gaussian distribution
yading@10 718 // reduce bitrate when samples are like a two-tailed exponential distribution
yading@10 719
yading@10 720 if (energy2 > energy1)
yading@10 721 energy2 += (energy2-energy1)*RATE_VARIATION;
yading@10 722
yading@10 723 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
yading@10 724 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
yading@10 725
yading@10 726 if (quant < 1)
yading@10 727 quant = 1;
yading@10 728 if (quant > 65534)
yading@10 729 quant = 65534;
yading@10 730
yading@10 731 set_ue_golomb(&pb, quant);
yading@10 732
yading@10 733 quant *= SAMPLE_FACTOR;
yading@10 734 }
yading@10 735
yading@10 736 // write out coded samples
yading@10 737 for (ch = 0; ch < s->channels; ch++)
yading@10 738 {
yading@10 739 if (!s->lossless)
yading@10 740 for (i = 0; i < s->block_align; i++)
yading@10 741 s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
yading@10 742
yading@10 743 if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
yading@10 744 return -1;
yading@10 745 }
yading@10 746
yading@10 747 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
yading@10 748
yading@10 749 flush_put_bits(&pb);
yading@10 750 avpkt->size = (put_bits_count(&pb)+7)/8;
yading@10 751 *got_packet_ptr = 1;
yading@10 752 return 0;
yading@10 753 }
yading@10 754 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
yading@10 755
yading@10 756 #if CONFIG_SONIC_DECODER
yading@10 757 static const int samplerate_table[] =
yading@10 758 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
yading@10 759
yading@10 760 static av_cold int sonic_decode_init(AVCodecContext *avctx)
yading@10 761 {
yading@10 762 SonicContext *s = avctx->priv_data;
yading@10 763 GetBitContext gb;
yading@10 764 int i, version;
yading@10 765
yading@10 766 s->channels = avctx->channels;
yading@10 767 s->samplerate = avctx->sample_rate;
yading@10 768
yading@10 769 if (!avctx->extradata)
yading@10 770 {
yading@10 771 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
yading@10 772 return -1;
yading@10 773 }
yading@10 774
yading@10 775 init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
yading@10 776
yading@10 777 version = get_bits(&gb, 2);
yading@10 778 if (version > 1)
yading@10 779 {
yading@10 780 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
yading@10 781 return -1;
yading@10 782 }
yading@10 783
yading@10 784 if (version == 1)
yading@10 785 {
yading@10 786 s->channels = get_bits(&gb, 2);
yading@10 787 s->samplerate = samplerate_table[get_bits(&gb, 4)];
yading@10 788 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
yading@10 789 s->channels, s->samplerate);
yading@10 790 }
yading@10 791
yading@10 792 if (s->channels > MAX_CHANNELS)
yading@10 793 {
yading@10 794 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
yading@10 795 return -1;
yading@10 796 }
yading@10 797
yading@10 798 s->lossless = get_bits1(&gb);
yading@10 799 if (!s->lossless)
yading@10 800 skip_bits(&gb, 3); // XXX FIXME
yading@10 801 s->decorrelation = get_bits(&gb, 2);
yading@10 802 if (s->decorrelation != 3 && s->channels != 2) {
yading@10 803 av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
yading@10 804 return AVERROR_INVALIDDATA;
yading@10 805 }
yading@10 806
yading@10 807 s->downsampling = get_bits(&gb, 2);
yading@10 808 if (!s->downsampling) {
yading@10 809 av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
yading@10 810 return AVERROR_INVALIDDATA;
yading@10 811 }
yading@10 812
yading@10 813 s->num_taps = (get_bits(&gb, 5)+1)<<5;
yading@10 814 if (get_bits1(&gb)) // XXX FIXME
yading@10 815 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
yading@10 816
yading@10 817 s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
yading@10 818 s->frame_size = s->channels*s->block_align*s->downsampling;
yading@10 819 // avctx->frame_size = s->block_align;
yading@10 820
yading@10 821 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
yading@10 822 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
yading@10 823
yading@10 824 // generate taps
yading@10 825 s->tap_quant = av_mallocz(4* s->num_taps);
yading@10 826 for (i = 0; i < s->num_taps; i++)
yading@10 827 s->tap_quant[i] = (int)(sqrt(i+1));
yading@10 828
yading@10 829 s->predictor_k = av_mallocz(4* s->num_taps);
yading@10 830
yading@10 831 for (i = 0; i < s->channels; i++)
yading@10 832 {
yading@10 833 s->predictor_state[i] = av_mallocz(4* s->num_taps);
yading@10 834 if (!s->predictor_state[i])
yading@10 835 return -1;
yading@10 836 }
yading@10 837
yading@10 838 for (i = 0; i < s->channels; i++)
yading@10 839 {
yading@10 840 s->coded_samples[i] = av_mallocz(4* s->block_align);
yading@10 841 if (!s->coded_samples[i])
yading@10 842 return -1;
yading@10 843 }
yading@10 844 s->int_samples = av_mallocz(4* s->frame_size);
yading@10 845
yading@10 846 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
yading@10 847 return 0;
yading@10 848 }
yading@10 849
yading@10 850 static av_cold int sonic_decode_close(AVCodecContext *avctx)
yading@10 851 {
yading@10 852 SonicContext *s = avctx->priv_data;
yading@10 853 int i;
yading@10 854
yading@10 855 av_free(s->int_samples);
yading@10 856 av_free(s->tap_quant);
yading@10 857 av_free(s->predictor_k);
yading@10 858
yading@10 859 for (i = 0; i < s->channels; i++)
yading@10 860 {
yading@10 861 av_free(s->predictor_state[i]);
yading@10 862 av_free(s->coded_samples[i]);
yading@10 863 }
yading@10 864
yading@10 865 return 0;
yading@10 866 }
yading@10 867
yading@10 868 static int sonic_decode_frame(AVCodecContext *avctx,
yading@10 869 void *data, int *got_frame_ptr,
yading@10 870 AVPacket *avpkt)
yading@10 871 {
yading@10 872 const uint8_t *buf = avpkt->data;
yading@10 873 int buf_size = avpkt->size;
yading@10 874 SonicContext *s = avctx->priv_data;
yading@10 875 GetBitContext gb;
yading@10 876 int i, quant, ch, j, ret;
yading@10 877 int16_t *samples;
yading@10 878 AVFrame *frame = data;
yading@10 879
yading@10 880 if (buf_size == 0) return 0;
yading@10 881
yading@10 882 frame->nb_samples = s->frame_size / avctx->channels;
yading@10 883 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
yading@10 884 return ret;
yading@10 885 samples = (int16_t *)frame->data[0];
yading@10 886
yading@10 887 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
yading@10 888
yading@10 889 init_get_bits(&gb, buf, buf_size*8);
yading@10 890
yading@10 891 intlist_read(&gb, s->predictor_k, s->num_taps, 0);
yading@10 892
yading@10 893 // dequantize
yading@10 894 for (i = 0; i < s->num_taps; i++)
yading@10 895 s->predictor_k[i] *= s->tap_quant[i];
yading@10 896
yading@10 897 if (s->lossless)
yading@10 898 quant = 1;
yading@10 899 else
yading@10 900 quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
yading@10 901
yading@10 902 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
yading@10 903
yading@10 904 for (ch = 0; ch < s->channels; ch++)
yading@10 905 {
yading@10 906 int x = ch;
yading@10 907
yading@10 908 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
yading@10 909
yading@10 910 intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
yading@10 911
yading@10 912 for (i = 0; i < s->block_align; i++)
yading@10 913 {
yading@10 914 for (j = 0; j < s->downsampling - 1; j++)
yading@10 915 {
yading@10 916 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
yading@10 917 x += s->channels;
yading@10 918 }
yading@10 919
yading@10 920 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
yading@10 921 x += s->channels;
yading@10 922 }
yading@10 923
yading@10 924 for (i = 0; i < s->num_taps; i++)
yading@10 925 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
yading@10 926 }
yading@10 927
yading@10 928 switch(s->decorrelation)
yading@10 929 {
yading@10 930 case MID_SIDE:
yading@10 931 for (i = 0; i < s->frame_size; i += s->channels)
yading@10 932 {
yading@10 933 s->int_samples[i+1] += shift(s->int_samples[i], 1);
yading@10 934 s->int_samples[i] -= s->int_samples[i+1];
yading@10 935 }
yading@10 936 break;
yading@10 937 case LEFT_SIDE:
yading@10 938 for (i = 0; i < s->frame_size; i += s->channels)
yading@10 939 s->int_samples[i+1] += s->int_samples[i];
yading@10 940 break;
yading@10 941 case RIGHT_SIDE:
yading@10 942 for (i = 0; i < s->frame_size; i += s->channels)
yading@10 943 s->int_samples[i] += s->int_samples[i+1];
yading@10 944 break;
yading@10 945 }
yading@10 946
yading@10 947 if (!s->lossless)
yading@10 948 for (i = 0; i < s->frame_size; i++)
yading@10 949 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
yading@10 950
yading@10 951 // internal -> short
yading@10 952 for (i = 0; i < s->frame_size; i++)
yading@10 953 samples[i] = av_clip_int16(s->int_samples[i]);
yading@10 954
yading@10 955 align_get_bits(&gb);
yading@10 956
yading@10 957 *got_frame_ptr = 1;
yading@10 958
yading@10 959 return (get_bits_count(&gb)+7)/8;
yading@10 960 }
yading@10 961
yading@10 962 AVCodec ff_sonic_decoder = {
yading@10 963 .name = "sonic",
yading@10 964 .type = AVMEDIA_TYPE_AUDIO,
yading@10 965 .id = AV_CODEC_ID_SONIC,
yading@10 966 .priv_data_size = sizeof(SonicContext),
yading@10 967 .init = sonic_decode_init,
yading@10 968 .close = sonic_decode_close,
yading@10 969 .decode = sonic_decode_frame,
yading@10 970 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
yading@10 971 .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
yading@10 972 };
yading@10 973 #endif /* CONFIG_SONIC_DECODER */
yading@10 974
yading@10 975 #if CONFIG_SONIC_ENCODER
yading@10 976 AVCodec ff_sonic_encoder = {
yading@10 977 .name = "sonic",
yading@10 978 .type = AVMEDIA_TYPE_AUDIO,
yading@10 979 .id = AV_CODEC_ID_SONIC,
yading@10 980 .priv_data_size = sizeof(SonicContext),
yading@10 981 .init = sonic_encode_init,
yading@10 982 .encode2 = sonic_encode_frame,
yading@10 983 .capabilities = CODEC_CAP_EXPERIMENTAL,
yading@10 984 .close = sonic_encode_close,
yading@10 985 .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
yading@10 986 };
yading@10 987 #endif
yading@10 988
yading@10 989 #if CONFIG_SONIC_LS_ENCODER
yading@10 990 AVCodec ff_sonic_ls_encoder = {
yading@10 991 .name = "sonicls",
yading@10 992 .type = AVMEDIA_TYPE_AUDIO,
yading@10 993 .id = AV_CODEC_ID_SONIC_LS,
yading@10 994 .priv_data_size = sizeof(SonicContext),
yading@10 995 .init = sonic_encode_init,
yading@10 996 .encode2 = sonic_encode_frame,
yading@10 997 .capabilities = CODEC_CAP_EXPERIMENTAL,
yading@10 998 .close = sonic_encode_close,
yading@10 999 .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
yading@10 1000 };
yading@10 1001 #endif