sonic.c
Go to the documentation of this file.
1 /*
2  * Simple free lossless/lossy audio codec
3  * Copyright (c) 2004 Alex Beregszaszi
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 #include "avcodec.h"
22 #include "get_bits.h"
23 #include "golomb.h"
24 #include "internal.h"
25 
26 /**
27  * @file
28  * Simple free lossless/lossy audio codec
29  * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
30  * Written and designed by Alex Beregszaszi
31  *
32  * TODO:
33  * - CABAC put/get_symbol
34  * - independent quantizer for channels
35  * - >2 channels support
36  * - more decorrelation types
37  * - more tap_quant tests
38  * - selectable intlist writers/readers (bonk-style, golomb, cabac)
39  */
40 
41 #define MAX_CHANNELS 2
42 
43 #define MID_SIDE 0
44 #define LEFT_SIDE 1
45 #define RIGHT_SIDE 2
46 
47 typedef struct SonicContext {
49 
51  double quantization;
52 
54 
55  int *tap_quant;
58 
59  // for encoding
60  int *tail;
61  int tail_size;
62  int *window;
64 
65  // for decoding
68 } SonicContext;
69 
70 #define LATTICE_SHIFT 10
71 #define SAMPLE_SHIFT 4
72 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
73 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
74 
75 #define BASE_QUANT 0.6
76 #define RATE_VARIATION 3.0
77 
78 static inline int divide(int a, int b)
79 {
80  if (a < 0)
81  return -( (-a + b/2)/b );
82  else
83  return (a + b/2)/b;
84 }
85 
86 static inline int shift(int a,int b)
87 {
88  return (a+(1<<(b-1))) >> b;
89 }
90 
91 static inline int shift_down(int a,int b)
92 {
93  return (a>>b)+((a<0)?1:0);
94 }
95 
96 #if 1
97 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
98 {
99  int i;
100 
101  for (i = 0; i < entries; i++)
102  set_se_golomb(pb, buf[i]);
103 
104  return 1;
105 }
106 
107 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
108 {
109  int i;
110 
111  for (i = 0; i < entries; i++)
112  buf[i] = get_se_golomb(gb);
113 
114  return 1;
115 }
116 
117 #else
118 
119 #define ADAPT_LEVEL 8
120 
121 static int bits_to_store(uint64_t x)
122 {
123  int res = 0;
124 
125  while(x)
126  {
127  res++;
128  x >>= 1;
129  }
130  return res;
131 }
132 
133 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
134 {
135  int i, bits;
136 
137  if (!max)
138  return;
139 
140  bits = bits_to_store(max);
141 
142  for (i = 0; i < bits-1; i++)
143  put_bits(pb, 1, value & (1 << i));
144 
145  if ( (value | (1 << (bits-1))) <= max)
146  put_bits(pb, 1, value & (1 << (bits-1)));
147 }
148 
149 static unsigned int read_uint_max(GetBitContext *gb, int max)
150 {
151  int i, bits, value = 0;
152 
153  if (!max)
154  return 0;
155 
156  bits = bits_to_store(max);
157 
158  for (i = 0; i < bits-1; i++)
159  if (get_bits1(gb))
160  value += 1 << i;
161 
162  if ( (value | (1<<(bits-1))) <= max)
163  if (get_bits1(gb))
164  value += 1 << (bits-1);
165 
166  return value;
167 }
168 
169 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
170 {
171  int i, j, x = 0, low_bits = 0, max = 0;
172  int step = 256, pos = 0, dominant = 0, any = 0;
173  int *copy, *bits;
174 
175  copy = av_mallocz(4* entries);
176  if (!copy)
177  return -1;
178 
179  if (base_2_part)
180  {
181  int energy = 0;
182 
183  for (i = 0; i < entries; i++)
184  energy += abs(buf[i]);
185 
186  low_bits = bits_to_store(energy / (entries * 2));
187  if (low_bits > 15)
188  low_bits = 15;
189 
190  put_bits(pb, 4, low_bits);
191  }
192 
193  for (i = 0; i < entries; i++)
194  {
195  put_bits(pb, low_bits, abs(buf[i]));
196  copy[i] = abs(buf[i]) >> low_bits;
197  if (copy[i] > max)
198  max = abs(copy[i]);
199  }
200 
201  bits = av_mallocz(4* entries*max);
202  if (!bits)
203  {
204 // av_free(copy);
205  return -1;
206  }
207 
208  for (i = 0; i <= max; i++)
209  {
210  for (j = 0; j < entries; j++)
211  if (copy[j] >= i)
212  bits[x++] = copy[j] > i;
213  }
214 
215  // store bitstream
216  while (pos < x)
217  {
218  int steplet = step >> 8;
219 
220  if (pos + steplet > x)
221  steplet = x - pos;
222 
223  for (i = 0; i < steplet; i++)
224  if (bits[i+pos] != dominant)
225  any = 1;
226 
227  put_bits(pb, 1, any);
228 
229  if (!any)
230  {
231  pos += steplet;
232  step += step / ADAPT_LEVEL;
233  }
234  else
235  {
236  int interloper = 0;
237 
238  while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
239  interloper++;
240 
241  // note change
242  write_uint_max(pb, interloper, (step >> 8) - 1);
243 
244  pos += interloper + 1;
245  step -= step / ADAPT_LEVEL;
246  }
247 
248  if (step < 256)
249  {
250  step = 65536 / step;
251  dominant = !dominant;
252  }
253  }
254 
255  // store signs
256  for (i = 0; i < entries; i++)
257  if (buf[i])
258  put_bits(pb, 1, buf[i] < 0);
259 
260 // av_free(bits);
261 // av_free(copy);
262 
263  return 0;
264 }
265 
266 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
267 {
268  int i, low_bits = 0, x = 0;
269  int n_zeros = 0, step = 256, dominant = 0;
270  int pos = 0, level = 0;
271  int *bits = av_mallocz(4* entries);
272 
273  if (!bits)
274  return -1;
275 
276  if (base_2_part)
277  {
278  low_bits = get_bits(gb, 4);
279 
280  if (low_bits)
281  for (i = 0; i < entries; i++)
282  buf[i] = get_bits(gb, low_bits);
283  }
284 
285 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
286 
287  while (n_zeros < entries)
288  {
289  int steplet = step >> 8;
290 
291  if (!get_bits1(gb))
292  {
293  for (i = 0; i < steplet; i++)
294  bits[x++] = dominant;
295 
296  if (!dominant)
297  n_zeros += steplet;
298 
299  step += step / ADAPT_LEVEL;
300  }
301  else
302  {
303  int actual_run = read_uint_max(gb, steplet-1);
304 
305 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
306 
307  for (i = 0; i < actual_run; i++)
308  bits[x++] = dominant;
309 
310  bits[x++] = !dominant;
311 
312  if (!dominant)
313  n_zeros += actual_run;
314  else
315  n_zeros++;
316 
317  step -= step / ADAPT_LEVEL;
318  }
319 
320  if (step < 256)
321  {
322  step = 65536 / step;
323  dominant = !dominant;
324  }
325  }
326 
327  // reconstruct unsigned values
328  n_zeros = 0;
329  for (i = 0; n_zeros < entries; i++)
330  {
331  while(1)
332  {
333  if (pos >= entries)
334  {
335  pos = 0;
336  level += 1 << low_bits;
337  }
338 
339  if (buf[pos] >= level)
340  break;
341 
342  pos++;
343  }
344 
345  if (bits[i])
346  buf[pos] += 1 << low_bits;
347  else
348  n_zeros++;
349 
350  pos++;
351  }
352 // av_free(bits);
353 
354  // read signs
355  for (i = 0; i < entries; i++)
356  if (buf[i] && get_bits1(gb))
357  buf[i] = -buf[i];
358 
359 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
360 
361  return 0;
362 }
363 #endif
364 
365 static void predictor_init_state(int *k, int *state, int order)
366 {
367  int i;
368 
369  for (i = order-2; i >= 0; i--)
370  {
371  int j, p, x = state[i];
372 
373  for (j = 0, p = i+1; p < order; j++,p++)
374  {
375  int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
376  state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
377  x = tmp;
378  }
379  }
380 }
381 
382 static int predictor_calc_error(int *k, int *state, int order, int error)
383 {
384  int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
385 
386 #if 1
387  int *k_ptr = &(k[order-2]),
388  *state_ptr = &(state[order-2]);
389  for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
390  {
391  int k_value = *k_ptr, state_value = *state_ptr;
392  x -= shift_down(k_value * state_value, LATTICE_SHIFT);
393  state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
394  }
395 #else
396  for (i = order-2; i >= 0; i--)
397  {
398  x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
399  state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
400  }
401 #endif
402 
403  // don't drift too far, to avoid overflows
404  if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
405  if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
406 
407  state[0] = x;
408 
409  return x;
410 }
411 
412 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
413 // Heavily modified Levinson-Durbin algorithm which
414 // copes better with quantization, and calculates the
415 // actual whitened result as it goes.
416 
417 static void modified_levinson_durbin(int *window, int window_entries,
418  int *out, int out_entries, int channels, int *tap_quant)
419 {
420  int i;
421  int *state = av_mallocz(4* window_entries);
422 
423  memcpy(state, window, 4* window_entries);
424 
425  for (i = 0; i < out_entries; i++)
426  {
427  int step = (i+1)*channels, k, j;
428  double xx = 0.0, xy = 0.0;
429 #if 1
430  int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
431  j = window_entries - step;
432  for (;j>=0;j--,x_ptr++,state_ptr++)
433  {
434  double x_value = *x_ptr, state_value = *state_ptr;
435  xx += state_value*state_value;
436  xy += x_value*state_value;
437  }
438 #else
439  for (j = 0; j <= (window_entries - step); j++);
440  {
441  double stepval = window[step+j], stateval = window[j];
442 // xx += (double)window[j]*(double)window[j];
443 // xy += (double)window[step+j]*(double)window[j];
444  xx += stateval*stateval;
445  xy += stepval*stateval;
446  }
447 #endif
448  if (xx == 0.0)
449  k = 0;
450  else
451  k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
452 
453  if (k > (LATTICE_FACTOR/tap_quant[i]))
454  k = LATTICE_FACTOR/tap_quant[i];
455  if (-k > (LATTICE_FACTOR/tap_quant[i]))
456  k = -(LATTICE_FACTOR/tap_quant[i]);
457 
458  out[i] = k;
459  k *= tap_quant[i];
460 
461 #if 1
462  x_ptr = &(window[step]);
463  state_ptr = &(state[0]);
464  j = window_entries - step;
465  for (;j>=0;j--,x_ptr++,state_ptr++)
466  {
467  int x_value = *x_ptr, state_value = *state_ptr;
468  *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
469  *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
470  }
471 #else
472  for (j=0; j <= (window_entries - step); j++)
473  {
474  int stepval = window[step+j], stateval=state[j];
475  window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
476  state[j] += shift_down(k * stepval, LATTICE_SHIFT);
477  }
478 #endif
479  }
480 
481  av_free(state);
482 }
483 
484 static inline int code_samplerate(int samplerate)
485 {
486  switch (samplerate)
487  {
488  case 44100: return 0;
489  case 22050: return 1;
490  case 11025: return 2;
491  case 96000: return 3;
492  case 48000: return 4;
493  case 32000: return 5;
494  case 24000: return 6;
495  case 16000: return 7;
496  case 8000: return 8;
497  }
498  return -1;
499 }
500 
501 static av_cold int sonic_encode_init(AVCodecContext *avctx)
502 {
503  SonicContext *s = avctx->priv_data;
504  PutBitContext pb;
505  int i, version = 0;
506 
507  if (avctx->channels > MAX_CHANNELS)
508  {
509  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
510  return -1; /* only stereo or mono for now */
511  }
512 
513  if (avctx->channels == 2)
514  s->decorrelation = MID_SIDE;
515  else
516  s->decorrelation = 3;
517 
518  if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
519  {
520  s->lossless = 1;
521  s->num_taps = 32;
522  s->downsampling = 1;
523  s->quantization = 0.0;
524  }
525  else
526  {
527  s->num_taps = 128;
528  s->downsampling = 2;
529  s->quantization = 1.0;
530  }
531 
532  // max tap 2048
533  if ((s->num_taps < 32) || (s->num_taps > 1024) ||
534  ((s->num_taps>>5)<<5 != s->num_taps))
535  {
536  av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
537  return -1;
538  }
539 
540  // generate taps
541  s->tap_quant = av_mallocz(4* s->num_taps);
542  for (i = 0; i < s->num_taps; i++)
543  s->tap_quant[i] = (int)(sqrt(i+1));
544 
545  s->channels = avctx->channels;
546  s->samplerate = avctx->sample_rate;
547 
548  s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
550 
551  s->tail_size = s->num_taps*s->channels;
552  s->tail = av_mallocz(4 * s->tail_size);
553  if (!s->tail)
554  return -1;
555 
556  s->predictor_k = av_mallocz(4 * s->num_taps);
557  if (!s->predictor_k)
558  return -1;
559 
560  for (i = 0; i < s->channels; i++)
561  {
562  s->coded_samples[i] = av_mallocz(4* s->block_align);
563  if (!s->coded_samples[i])
564  return -1;
565  }
566 
567  s->int_samples = av_mallocz(4* s->frame_size);
568 
569  s->window_size = ((2*s->tail_size)+s->frame_size);
570  s->window = av_mallocz(4* s->window_size);
571  if (!s->window)
572  return -1;
573 
574  avctx->extradata = av_mallocz(16);
575  if (!avctx->extradata)
576  return -1;
577  init_put_bits(&pb, avctx->extradata, 16*8);
578 
579  put_bits(&pb, 2, version); // version
580  if (version == 1)
581  {
582  put_bits(&pb, 2, s->channels);
583  put_bits(&pb, 4, code_samplerate(s->samplerate));
584  }
585  put_bits(&pb, 1, s->lossless);
586  if (!s->lossless)
587  put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
588  put_bits(&pb, 2, s->decorrelation);
589  put_bits(&pb, 2, s->downsampling);
590  put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
591  put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
592 
593  flush_put_bits(&pb);
594  avctx->extradata_size = put_bits_count(&pb)/8;
595 
596  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
597  version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
598 
599  avctx->coded_frame = avcodec_alloc_frame();
600  if (!avctx->coded_frame)
601  return AVERROR(ENOMEM);
602  avctx->coded_frame->key_frame = 1;
603  avctx->frame_size = s->block_align*s->downsampling;
604 
605  return 0;
606 }
607 
608 static av_cold int sonic_encode_close(AVCodecContext *avctx)
609 {
610  SonicContext *s = avctx->priv_data;
611  int i;
612 
613  av_freep(&avctx->coded_frame);
614 
615  for (i = 0; i < s->channels; i++)
616  av_free(s->coded_samples[i]);
617 
618  av_free(s->predictor_k);
619  av_free(s->tail);
620  av_free(s->tap_quant);
621  av_free(s->window);
622  av_free(s->int_samples);
623 
624  return 0;
625 }
626 
627 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
628  const AVFrame *frame, int *got_packet_ptr)
629 {
630  SonicContext *s = avctx->priv_data;
631  PutBitContext pb;
632  int i, j, ch, quant = 0, x = 0;
633  int ret;
634  const short *samples = (const int16_t*)frame->data[0];
635 
636  if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
637  return ret;
638 
639  init_put_bits(&pb, avpkt->data, avpkt->size);
640 
641  // short -> internal
642  for (i = 0; i < s->frame_size; i++)
643  s->int_samples[i] = samples[i];
644 
645  if (!s->lossless)
646  for (i = 0; i < s->frame_size; i++)
647  s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
648 
649  switch(s->decorrelation)
650  {
651  case MID_SIDE:
652  for (i = 0; i < s->frame_size; i += s->channels)
653  {
654  s->int_samples[i] += s->int_samples[i+1];
655  s->int_samples[i+1] -= shift(s->int_samples[i], 1);
656  }
657  break;
658  case LEFT_SIDE:
659  for (i = 0; i < s->frame_size; i += s->channels)
660  s->int_samples[i+1] -= s->int_samples[i];
661  break;
662  case RIGHT_SIDE:
663  for (i = 0; i < s->frame_size; i += s->channels)
664  s->int_samples[i] -= s->int_samples[i+1];
665  break;
666  }
667 
668  memset(s->window, 0, 4* s->window_size);
669 
670  for (i = 0; i < s->tail_size; i++)
671  s->window[x++] = s->tail[i];
672 
673  for (i = 0; i < s->frame_size; i++)
674  s->window[x++] = s->int_samples[i];
675 
676  for (i = 0; i < s->tail_size; i++)
677  s->window[x++] = 0;
678 
679  for (i = 0; i < s->tail_size; i++)
680  s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
681 
682  // generate taps
683  modified_levinson_durbin(s->window, s->window_size,
684  s->predictor_k, s->num_taps, s->channels, s->tap_quant);
685  if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
686  return -1;
687 
688  for (ch = 0; ch < s->channels; ch++)
689  {
690  x = s->tail_size+ch;
691  for (i = 0; i < s->block_align; i++)
692  {
693  int sum = 0;
694  for (j = 0; j < s->downsampling; j++, x += s->channels)
695  sum += s->window[x];
696  s->coded_samples[ch][i] = sum;
697  }
698  }
699 
700  // simple rate control code
701  if (!s->lossless)
702  {
703  double energy1 = 0.0, energy2 = 0.0;
704  for (ch = 0; ch < s->channels; ch++)
705  {
706  for (i = 0; i < s->block_align; i++)
707  {
708  double sample = s->coded_samples[ch][i];
709  energy2 += sample*sample;
710  energy1 += fabs(sample);
711  }
712  }
713 
714  energy2 = sqrt(energy2/(s->channels*s->block_align));
715  energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
716 
717  // increase bitrate when samples are like a gaussian distribution
718  // reduce bitrate when samples are like a two-tailed exponential distribution
719 
720  if (energy2 > energy1)
721  energy2 += (energy2-energy1)*RATE_VARIATION;
722 
723  quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
724 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
725 
726  if (quant < 1)
727  quant = 1;
728  if (quant > 65534)
729  quant = 65534;
730 
731  set_ue_golomb(&pb, quant);
732 
733  quant *= SAMPLE_FACTOR;
734  }
735 
736  // write out coded samples
737  for (ch = 0; ch < s->channels; ch++)
738  {
739  if (!s->lossless)
740  for (i = 0; i < s->block_align; i++)
741  s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
742 
743  if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
744  return -1;
745  }
746 
747 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
748 
749  flush_put_bits(&pb);
750  avpkt->size = (put_bits_count(&pb)+7)/8;
751  *got_packet_ptr = 1;
752  return 0;
753 }
754 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
755 
756 #if CONFIG_SONIC_DECODER
757 static const int samplerate_table[] =
758  { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
759 
760 static av_cold int sonic_decode_init(AVCodecContext *avctx)
761 {
762  SonicContext *s = avctx->priv_data;
763  GetBitContext gb;
764  int i, version;
765 
766  s->channels = avctx->channels;
767  s->samplerate = avctx->sample_rate;
768 
769  if (!avctx->extradata)
770  {
771  av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
772  return -1;
773  }
774 
775  init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
776 
777  version = get_bits(&gb, 2);
778  if (version > 1)
779  {
780  av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
781  return -1;
782  }
783 
784  if (version == 1)
785  {
786  s->channels = get_bits(&gb, 2);
787  s->samplerate = samplerate_table[get_bits(&gb, 4)];
788  av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
789  s->channels, s->samplerate);
790  }
791 
792  if (s->channels > MAX_CHANNELS)
793  {
794  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
795  return -1;
796  }
797 
798  s->lossless = get_bits1(&gb);
799  if (!s->lossless)
800  skip_bits(&gb, 3); // XXX FIXME
801  s->decorrelation = get_bits(&gb, 2);
802  if (s->decorrelation != 3 && s->channels != 2) {
803  av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
804  return AVERROR_INVALIDDATA;
805  }
806 
807  s->downsampling = get_bits(&gb, 2);
808  if (!s->downsampling) {
809  av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
810  return AVERROR_INVALIDDATA;
811  }
812 
813  s->num_taps = (get_bits(&gb, 5)+1)<<5;
814  if (get_bits1(&gb)) // XXX FIXME
815  av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
816 
817  s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
819 // avctx->frame_size = s->block_align;
820 
821  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
822  version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
823 
824  // generate taps
825  s->tap_quant = av_mallocz(4* s->num_taps);
826  for (i = 0; i < s->num_taps; i++)
827  s->tap_quant[i] = (int)(sqrt(i+1));
828 
829  s->predictor_k = av_mallocz(4* s->num_taps);
830 
831  for (i = 0; i < s->channels; i++)
832  {
833  s->predictor_state[i] = av_mallocz(4* s->num_taps);
834  if (!s->predictor_state[i])
835  return -1;
836  }
837 
838  for (i = 0; i < s->channels; i++)
839  {
840  s->coded_samples[i] = av_mallocz(4* s->block_align);
841  if (!s->coded_samples[i])
842  return -1;
843  }
844  s->int_samples = av_mallocz(4* s->frame_size);
845 
846  avctx->sample_fmt = AV_SAMPLE_FMT_S16;
847  return 0;
848 }
849 
850 static av_cold int sonic_decode_close(AVCodecContext *avctx)
851 {
852  SonicContext *s = avctx->priv_data;
853  int i;
854 
855  av_free(s->int_samples);
856  av_free(s->tap_quant);
857  av_free(s->predictor_k);
858 
859  for (i = 0; i < s->channels; i++)
860  {
861  av_free(s->predictor_state[i]);
862  av_free(s->coded_samples[i]);
863  }
864 
865  return 0;
866 }
867 
868 static int sonic_decode_frame(AVCodecContext *avctx,
869  void *data, int *got_frame_ptr,
870  AVPacket *avpkt)
871 {
872  const uint8_t *buf = avpkt->data;
873  int buf_size = avpkt->size;
874  SonicContext *s = avctx->priv_data;
875  GetBitContext gb;
876  int i, quant, ch, j, ret;
877  int16_t *samples;
878  AVFrame *frame = data;
879 
880  if (buf_size == 0) return 0;
881 
882  frame->nb_samples = s->frame_size / avctx->channels;
883  if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
884  return ret;
885  samples = (int16_t *)frame->data[0];
886 
887 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
888 
889  init_get_bits(&gb, buf, buf_size*8);
890 
891  intlist_read(&gb, s->predictor_k, s->num_taps, 0);
892 
893  // dequantize
894  for (i = 0; i < s->num_taps; i++)
895  s->predictor_k[i] *= s->tap_quant[i];
896 
897  if (s->lossless)
898  quant = 1;
899  else
900  quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
901 
902 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
903 
904  for (ch = 0; ch < s->channels; ch++)
905  {
906  int x = ch;
907 
909 
910  intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
911 
912  for (i = 0; i < s->block_align; i++)
913  {
914  for (j = 0; j < s->downsampling - 1; j++)
915  {
917  x += s->channels;
918  }
919 
920  s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
921  x += s->channels;
922  }
923 
924  for (i = 0; i < s->num_taps; i++)
925  s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
926  }
927 
928  switch(s->decorrelation)
929  {
930  case MID_SIDE:
931  for (i = 0; i < s->frame_size; i += s->channels)
932  {
933  s->int_samples[i+1] += shift(s->int_samples[i], 1);
934  s->int_samples[i] -= s->int_samples[i+1];
935  }
936  break;
937  case LEFT_SIDE:
938  for (i = 0; i < s->frame_size; i += s->channels)
939  s->int_samples[i+1] += s->int_samples[i];
940  break;
941  case RIGHT_SIDE:
942  for (i = 0; i < s->frame_size; i += s->channels)
943  s->int_samples[i] += s->int_samples[i+1];
944  break;
945  }
946 
947  if (!s->lossless)
948  for (i = 0; i < s->frame_size; i++)
949  s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
950 
951  // internal -> short
952  for (i = 0; i < s->frame_size; i++)
953  samples[i] = av_clip_int16(s->int_samples[i]);
954 
955  align_get_bits(&gb);
956 
957  *got_frame_ptr = 1;
958 
959  return (get_bits_count(&gb)+7)/8;
960 }
961 
962 AVCodec ff_sonic_decoder = {
963  .name = "sonic",
964  .type = AVMEDIA_TYPE_AUDIO,
965  .id = AV_CODEC_ID_SONIC,
966  .priv_data_size = sizeof(SonicContext),
967  .init = sonic_decode_init,
968  .close = sonic_decode_close,
969  .decode = sonic_decode_frame,
970  .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
971  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
972 };
973 #endif /* CONFIG_SONIC_DECODER */
974 
975 #if CONFIG_SONIC_ENCODER
976 AVCodec ff_sonic_encoder = {
977  .name = "sonic",
978  .type = AVMEDIA_TYPE_AUDIO,
979  .id = AV_CODEC_ID_SONIC,
980  .priv_data_size = sizeof(SonicContext),
981  .init = sonic_encode_init,
982  .encode2 = sonic_encode_frame,
983  .capabilities = CODEC_CAP_EXPERIMENTAL,
984  .close = sonic_encode_close,
985  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
986 };
987 #endif
988 
989 #if CONFIG_SONIC_LS_ENCODER
990 AVCodec ff_sonic_ls_encoder = {
991  .name = "sonicls",
992  .type = AVMEDIA_TYPE_AUDIO,
993  .id = AV_CODEC_ID_SONIC_LS,
994  .priv_data_size = sizeof(SonicContext),
995  .init = sonic_encode_init,
996  .encode2 = sonic_encode_frame,
997  .capabilities = CODEC_CAP_EXPERIMENTAL,
998  .close = sonic_encode_close,
999  .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1000 };
1001 #endif
static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
Definition: sonic.c:97
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:205
const struct AVCodec * codec
int * int_samples
Definition: sonic.c:56
int * tail
Definition: sonic.c:60
int samplerate
Definition: sonic.c:53
#define LATTICE_FACTOR
Definition: sonic.c:72
const char * s
Definition: avisynth_c.h:668
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
static int shift(int a, int b)
Definition: sonic.c:86
This structure describes decoded (raw) audio or video data.
Definition: frame.h:76
int lossless
Definition: sonic.c:48
static int get_se_golomb(GetBitContext *gb)
read signed exp golomb code.
Definition: golomb.h:175
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:240
int * predictor_state[MAX_CHANNELS]
Definition: sonic.c:67
struct SonicContext SonicContext
AVFrame * coded_frame
the picture in the bitstream
if max(w)>1 w=0.9 *w/max(w)
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
static void set_ue_golomb(PutBitContext *pb, int i)
write unsigned exp golomb code.
Definition: golomb.h:442
static int divide(int a, int b)
Definition: sonic.c:78
#define LATTICE_SHIFT
Definition: sonic.c:70
int version
Definition: avisynth_c.h:666
int * tap_quant
Definition: sonic.c:55
signed 16 bits
Definition: samplefmt.h:52
#define sample
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
initialize output if(nPeaks >3)%at least 3 peaks in spectrum for trying to find f0 nf0peaks
#define MID_SIDE
Definition: sonic.c:43
uint8_t bits
Definition: crc.c:216
enum AVSampleFormat sample_fmt
audio sample format
uint8_t
#define av_cold
Definition: attributes.h:78
#define MAX_CHANNELS
Definition: sonic.c:41
#define b
Definition: input.c:42
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
#define CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
uint8_t * data
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:193
bitstream reader API header.
integer sqrt
Definition: avutil.txt:2
static void copy(LZOContext *c, int cnt)
Copies bytes from input to output buffer with checking.
Definition: lzo.c:79
#define RIGHT_SIDE
Definition: sonic.c:45
frame
Definition: stft.m:14
Discrete Time axis x
enum AVCodecID id
int channels
Definition: sonic.c:53
void av_free(void *ptr)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc(). ...
Definition: mem.c:183
static int get_ue_golomb(GetBitContext *gb)
read unsigned exp golomb code.
Definition: golomb.h:53
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Spectrum Plot time data
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:246
const char * name
Name of the codec implementation.
static void put_bits(J2kEncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:160
external API header
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:73
AVFrame * avcodec_alloc_frame(void)
Allocate an AVFrame and set its fields to default values.
ret
Definition: avfilter.c:821
int block_align
Definition: sonic.c:53
#define RATE_VARIATION
Definition: sonic.c:76
FIXME Range Coding of cr are level
Definition: snow.txt:367
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int size)
Check AVPacket size and/or allocate data.
for k
int frame_size
Number of samples per channel in an audio frame.
static void set_se_golomb(PutBitContext *pb, int i)
write signed exp golomb code.
Definition: golomb.h:476
int sample_rate
samples per second
int * predictor_k
Definition: sonic.c:66
main external API structure.
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:375
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:148
void * buf
Definition: avisynth_c.h:594
int tail_size
Definition: sonic.c:61
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:273
double value
Definition: eval.c:82
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:265
synthesis window for stochastic i
#define LEFT_SIDE
Definition: sonic.c:44
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:379
static void predictor_init_state(int *k, int *state, int order)
Definition: sonic.c:365
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFilterBuffer structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Buffer references ownership and permissions
const uint8_t * quant
static uint32_t state
Definition: trasher.c:27
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:87
#define BASE_QUANT
Definition: sonic.c:75
#define SAMPLE_SHIFT
Definition: sonic.c:71
static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
Definition: sonic.c:107
int downsampling
Definition: sonic.c:50
int decorrelation
Definition: sonic.c:48
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:81
int window_size
Definition: sonic.c:63
#define CODEC_CAP_EXPERIMENTAL
Codec is experimental and is thus avoided in favor of non experimental encoders.
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:54
int channels
number of audio channels
double quantization
Definition: sonic.c:51
int * coded_samples[MAX_CHANNELS]
Definition: sonic.c:57
int key_frame
1 -> keyframe, 0-> not
Definition: frame.h:139
static const uint8_t * align_get_bits(GetBitContext *s)
Definition: get_bits.h:418
static int predictor_calc_error(int *k, int *state, int order, int error)
Definition: sonic.c:382
int frame_size
Definition: sonic.c:53
int num_taps
Definition: sonic.c:50
#define AV_LOG_INFO
Definition: log.h:156
#define SAMPLE_FACTOR
Definition: sonic.c:73
Filter the word “frame” indicates either a video frame or a group of audio samples
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=av_sample_fmt_is_planar(in_fmt);out_planar=av_sample_fmt_is_planar(out_fmt);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_dlog(ac->avr,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> out
static int shift_down(int a, int b)
Definition: sonic.c:91
static int decode(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
Definition: crystalhd.c:868
exp golomb vlc stuff
This structure stores compressed data.
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:127
for(j=16;j >0;--j)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
int * window
Definition: sonic.c:62