aacpsy.c
Go to the documentation of this file.
1 /*
2  * AAC encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder psychoacoustic model
25  */
26 
27 #include "libavutil/libm.h"
28 
29 #include "avcodec.h"
30 #include "aactab.h"
31 #include "psymodel.h"
32 
33 /***********************************
34  * TODOs:
35  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
36  * control quality for quality-based output
37  **********************************/
38 
39 /**
40  * constants for 3GPP AAC psychoacoustic model
41  * @{
42  */
43 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
44 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
45 /* spreading factor for low-to-hi energy spreading, long block, > 22kbps/channel (20dB/Bark) */
46 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f
47 /* spreading factor for low-to-hi energy spreading, long block, <= 22kbps/channel (15dB/Bark) */
48 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f
49 /* spreading factor for low-to-hi energy spreading, short block (15 dB/Bark) */
50 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f
51 /* spreading factor for hi-to-low energy spreading, long block (30dB/Bark) */
52 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f
53 /* spreading factor for hi-to-low energy spreading, short block (20dB/Bark) */
54 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f
55 
56 #define PSY_3GPP_RPEMIN 0.01f
57 #define PSY_3GPP_RPELEV 2.0f
58 
59 #define PSY_3GPP_C1 3.0f /* log2(8) */
60 #define PSY_3GPP_C2 1.3219281f /* log2(2.5) */
61 #define PSY_3GPP_C3 0.55935729f /* 1 - C2 / C1 */
62 
63 #define PSY_SNR_1DB 7.9432821e-1f /* -1dB */
64 #define PSY_SNR_25DB 3.1622776e-3f /* -25dB */
65 
66 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f
67 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f
68 #define PSY_3GPP_SAVE_ADD_L -0.84285712f
69 #define PSY_3GPP_SAVE_ADD_S -0.75f
70 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f
71 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f
72 #define PSY_3GPP_SPEND_ADD_L -0.35f
73 #define PSY_3GPP_SPEND_ADD_S -0.26111111f
74 #define PSY_3GPP_CLIP_LO_L 0.2f
75 #define PSY_3GPP_CLIP_LO_S 0.2f
76 #define PSY_3GPP_CLIP_HI_L 0.95f
77 #define PSY_3GPP_CLIP_HI_S 0.75f
78 
79 #define PSY_3GPP_AH_THR_LONG 0.5f
80 #define PSY_3GPP_AH_THR_SHORT 0.63f
81 
82 enum {
86 };
87 
88 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
89 
90 /* LAME psy model constants */
91 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
92 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
93 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
94 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
95 #define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short block
96 
97 /**
98  * @}
99  */
100 
101 /**
102  * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
103  */
104 typedef struct AacPsyBand{
105  float energy; ///< band energy
106  float thr; ///< energy threshold
107  float thr_quiet; ///< threshold in quiet
108  float nz_lines; ///< number of non-zero spectral lines
109  float active_lines; ///< number of active spectral lines
110  float pe; ///< perceptual entropy
111  float pe_const; ///< constant part of the PE calculation
112  float norm_fac; ///< normalization factor for linearization
113  int avoid_holes; ///< hole avoidance flag
114 }AacPsyBand;
115 
116 /**
117  * single/pair channel context for psychoacoustic model
118  */
119 typedef struct AacPsyChannel{
120  AacPsyBand band[128]; ///< bands information
121  AacPsyBand prev_band[128]; ///< bands information from the previous frame
122 
123  float win_energy; ///< sliding average of channel energy
124  float iir_state[2]; ///< hi-pass IIR filter state
125  uint8_t next_grouping; ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
126  enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
127  /* LAME psy model specific members */
128  float attack_threshold; ///< attack threshold for this channel
129  float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
130  int prev_attack; ///< attack value for the last short block in the previous sequence
132 
133 /**
134  * psychoacoustic model frame type-dependent coefficients
135  */
136 typedef struct AacPsyCoeffs{
137  float ath; ///< absolute threshold of hearing per bands
138  float barks; ///< Bark value for each spectral band in long frame
139  float spread_low[2]; ///< spreading factor for low-to-high threshold spreading in long frame
140  float spread_hi [2]; ///< spreading factor for high-to-low threshold spreading in long frame
141  float min_snr; ///< minimal SNR
142 }AacPsyCoeffs;
143 
144 /**
145  * 3GPP TS26.403-inspired psychoacoustic model specific data
146  */
147 typedef struct AacPsyContext{
148  int chan_bitrate; ///< bitrate per channel
149  int frame_bits; ///< average bits per frame
150  int fill_level; ///< bit reservoir fill level
151  struct {
152  float min; ///< minimum allowed PE for bit factor calculation
153  float max; ///< maximum allowed PE for bit factor calculation
154  float previous; ///< allowed PE of the previous frame
155  float correction; ///< PE correction factor
156  } pe;
157  AacPsyCoeffs psy_coef[2][64];
160 
161 /**
162  * LAME psy model preset struct
163  */
164 typedef struct {
165  int quality; ///< Quality to map the rest of the vaules to.
166  /* This is overloaded to be both kbps per channel in ABR mode, and
167  * requested quality in constant quality mode.
168  */
169  float st_lrm; ///< short threshold for L, R, and M channels
170 } PsyLamePreset;
171 
172 /**
173  * LAME psy model preset table for ABR
174  */
175 static const PsyLamePreset psy_abr_map[] = {
176 /* TODO: Tuning. These were taken from LAME. */
177 /* kbps/ch st_lrm */
178  { 8, 6.60},
179  { 16, 6.60},
180  { 24, 6.60},
181  { 32, 6.60},
182  { 40, 6.60},
183  { 48, 6.60},
184  { 56, 6.60},
185  { 64, 6.40},
186  { 80, 6.00},
187  { 96, 5.60},
188  {112, 5.20},
189  {128, 5.20},
190  {160, 5.20}
191 };
192 
193 /**
194 * LAME psy model preset table for constant quality
195 */
196 static const PsyLamePreset psy_vbr_map[] = {
197 /* vbr_q st_lrm */
198  { 0, 4.20},
199  { 1, 4.20},
200  { 2, 4.20},
201  { 3, 4.20},
202  { 4, 4.20},
203  { 5, 4.20},
204  { 6, 4.20},
205  { 7, 4.20},
206  { 8, 4.20},
207  { 9, 4.20},
208  {10, 4.20}
209 };
210 
211 /**
212  * LAME psy model FIR coefficient table
213  */
214 static const float psy_fir_coeffs[] = {
215  -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
216  -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
217  -5.52212e-17 * 2, -0.313819 * 2
218 };
219 
220 #if ARCH_MIPS
221 # include "mips/aacpsy_mips.h"
222 #endif /* ARCH_MIPS */
223 
224 /**
225  * Calculate the ABR attack threshold from the above LAME psymodel table.
226  */
227 static float lame_calc_attack_threshold(int bitrate)
228 {
229  /* Assume max bitrate to start with */
230  int lower_range = 12, upper_range = 12;
231  int lower_range_kbps = psy_abr_map[12].quality;
232  int upper_range_kbps = psy_abr_map[12].quality;
233  int i;
234 
235  /* Determine which bitrates the value specified falls between.
236  * If the loop ends without breaking our above assumption of 320kbps was correct.
237  */
238  for (i = 1; i < 13; i++) {
239  if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) {
240  upper_range = i;
241  upper_range_kbps = psy_abr_map[i ].quality;
242  lower_range = i - 1;
243  lower_range_kbps = psy_abr_map[i - 1].quality;
244  break; /* Upper range found */
245  }
246  }
247 
248  /* Determine which range the value specified is closer to */
249  if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
250  return psy_abr_map[lower_range].st_lrm;
251  return psy_abr_map[upper_range].st_lrm;
252 }
253 
254 /**
255  * LAME psy model specific initialization
256  */
257 static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) {
258  int i, j;
259 
260  for (i = 0; i < avctx->channels; i++) {
261  AacPsyChannel *pch = &ctx->ch[i];
262 
263  if (avctx->flags & CODEC_FLAG_QSCALE)
264  pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LAMBDA].st_lrm;
265  else
266  pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
267 
268  for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
269  pch->prev_energy_subshort[j] = 10.0f;
270  }
271 }
272 
273 /**
274  * Calculate Bark value for given line.
275  */
276 static av_cold float calc_bark(float f)
277 {
278  return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
279 }
280 
281 #define ATH_ADD 4
282 /**
283  * Calculate ATH value for given frequency.
284  * Borrowed from Lame.
285  */
286 static av_cold float ath(float f, float add)
287 {
288  f /= 1000.0f;
289  return 3.64 * pow(f, -0.8)
290  - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
291  + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
292  + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
293 }
294 
296  AacPsyContext *pctx;
297  float bark;
298  int i, j, g, start;
299  float prev, minscale, minath, minsnr, pe_min;
300  const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels;
301  const int bandwidth = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx);
302  const float num_bark = calc_bark((float)bandwidth);
303 
304  ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
305  pctx = (AacPsyContext*) ctx->model_priv_data;
306 
307  pctx->chan_bitrate = chan_bitrate;
308  pctx->frame_bits = chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate;
309  pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
310  pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
311  ctx->bitres.size = 6144 - pctx->frame_bits;
312  ctx->bitres.size -= ctx->bitres.size % 8;
313  pctx->fill_level = ctx->bitres.size;
314  minath = ath(3410, ATH_ADD);
315  for (j = 0; j < 2; j++) {
316  AacPsyCoeffs *coeffs = pctx->psy_coef[j];
317  const uint8_t *band_sizes = ctx->bands[j];
318  float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
319  float avg_chan_bits = chan_bitrate / ctx->avctx->sample_rate * (j ? 128.0f : 1024.0f);
320  /* reference encoder uses 2.4% here instead of 60% like the spec says */
321  float bark_pe = 0.024f * PSY_3GPP_BITS_TO_PE(avg_chan_bits) / num_bark;
322  float en_spread_low = j ? PSY_3GPP_EN_SPREAD_LOW_S : PSY_3GPP_EN_SPREAD_LOW_L;
323  /* High energy spreading for long blocks <= 22kbps/channel and short blocks are the same. */
324  float en_spread_hi = (j || (chan_bitrate <= 22.0f)) ? PSY_3GPP_EN_SPREAD_HI_S : PSY_3GPP_EN_SPREAD_HI_L1;
325 
326  i = 0;
327  prev = 0.0;
328  for (g = 0; g < ctx->num_bands[j]; g++) {
329  i += band_sizes[g];
330  bark = calc_bark((i-1) * line_to_frequency);
331  coeffs[g].barks = (bark + prev) / 2.0;
332  prev = bark;
333  }
334  for (g = 0; g < ctx->num_bands[j] - 1; g++) {
335  AacPsyCoeffs *coeff = &coeffs[g];
336  float bark_width = coeffs[g+1].barks - coeffs->barks;
337  coeff->spread_low[0] = pow(10.0, -bark_width * PSY_3GPP_THR_SPREAD_LOW);
338  coeff->spread_hi [0] = pow(10.0, -bark_width * PSY_3GPP_THR_SPREAD_HI);
339  coeff->spread_low[1] = pow(10.0, -bark_width * en_spread_low);
340  coeff->spread_hi [1] = pow(10.0, -bark_width * en_spread_hi);
341  pe_min = bark_pe * bark_width;
342  minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
343  coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
344  }
345  start = 0;
346  for (g = 0; g < ctx->num_bands[j]; g++) {
347  minscale = ath(start * line_to_frequency, ATH_ADD);
348  for (i = 1; i < band_sizes[g]; i++)
349  minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
350  coeffs[g].ath = minscale - minath;
351  start += band_sizes[g];
352  }
353  }
354 
355  pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels);
356 
357  lame_window_init(pctx, ctx->avctx);
358 
359  return 0;
360 }
361 
362 /**
363  * IIR filter used in block switching decision
364  */
365 static float iir_filter(int in, float state[2])
366 {
367  float ret;
368 
369  ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
370  state[0] = in;
371  state[1] = ret;
372  return ret;
373 }
374 
375 /**
376  * window grouping information stored as bits (0 - new group, 1 - group continues)
377  */
378 static const uint8_t window_grouping[9] = {
379  0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
380 };
381 
382 /**
383  * Tell encoder which window types to use.
384  * @see 3GPP TS26.403 5.4.1 "Blockswitching"
385  */
387  const int16_t *audio,
388  const int16_t *la,
389  int channel, int prev_type)
390 {
391  int i, j;
392  int br = ctx->avctx->bit_rate / ctx->avctx->channels;
393  int attack_ratio = br <= 16000 ? 18 : 10;
395  AacPsyChannel *pch = &pctx->ch[channel];
396  uint8_t grouping = 0;
397  int next_type = pch->next_window_seq;
398  FFPsyWindowInfo wi = { { 0 } };
399 
400  if (la) {
401  float s[8], v;
402  int switch_to_eight = 0;
403  float sum = 0.0, sum2 = 0.0;
404  int attack_n = 0;
405  int stay_short = 0;
406  for (i = 0; i < 8; i++) {
407  for (j = 0; j < 128; j++) {
408  v = iir_filter(la[i*128+j], pch->iir_state);
409  sum += v*v;
410  }
411  s[i] = sum;
412  sum2 += sum;
413  }
414  for (i = 0; i < 8; i++) {
415  if (s[i] > pch->win_energy * attack_ratio) {
416  attack_n = i + 1;
417  switch_to_eight = 1;
418  break;
419  }
420  }
421  pch->win_energy = pch->win_energy*7/8 + sum2/64;
422 
423  wi.window_type[1] = prev_type;
424  switch (prev_type) {
425  case ONLY_LONG_SEQUENCE:
426  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
427  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
428  break;
429  case LONG_START_SEQUENCE:
430  wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
431  grouping = pch->next_grouping;
432  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
433  break;
434  case LONG_STOP_SEQUENCE:
435  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
436  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
437  break;
439  stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
440  wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
441  grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
442  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
443  break;
444  }
445 
446  pch->next_grouping = window_grouping[attack_n];
447  pch->next_window_seq = next_type;
448  } else {
449  for (i = 0; i < 3; i++)
450  wi.window_type[i] = prev_type;
451  grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
452  }
453 
454  wi.window_shape = 1;
455  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
456  wi.num_windows = 1;
457  wi.grouping[0] = 1;
458  } else {
459  int lastgrp = 0;
460  wi.num_windows = 8;
461  for (i = 0; i < 8; i++) {
462  if (!((grouping >> i) & 1))
463  lastgrp = i;
464  wi.grouping[lastgrp]++;
465  }
466  }
467 
468  return wi;
469 }
470 
471 /* 5.6.1.2 "Calculation of Bit Demand" */
472 static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
473  int short_window)
474 {
475  const float bitsave_slope = short_window ? PSY_3GPP_SAVE_SLOPE_S : PSY_3GPP_SAVE_SLOPE_L;
476  const float bitsave_add = short_window ? PSY_3GPP_SAVE_ADD_S : PSY_3GPP_SAVE_ADD_L;
477  const float bitspend_slope = short_window ? PSY_3GPP_SPEND_SLOPE_S : PSY_3GPP_SPEND_SLOPE_L;
478  const float bitspend_add = short_window ? PSY_3GPP_SPEND_ADD_S : PSY_3GPP_SPEND_ADD_L;
479  const float clip_low = short_window ? PSY_3GPP_CLIP_LO_S : PSY_3GPP_CLIP_LO_L;
480  const float clip_high = short_window ? PSY_3GPP_CLIP_HI_S : PSY_3GPP_CLIP_HI_L;
481  float clipped_pe, bit_save, bit_spend, bit_factor, fill_level;
482 
483  ctx->fill_level += ctx->frame_bits - bits;
484  ctx->fill_level = av_clip(ctx->fill_level, 0, size);
485  fill_level = av_clipf((float)ctx->fill_level / size, clip_low, clip_high);
486  clipped_pe = av_clipf(pe, ctx->pe.min, ctx->pe.max);
487  bit_save = (fill_level + bitsave_add) * bitsave_slope;
488  assert(bit_save <= 0.3f && bit_save >= -0.05000001f);
489  bit_spend = (fill_level + bitspend_add) * bitspend_slope;
490  assert(bit_spend <= 0.5f && bit_spend >= -0.1f);
491  /* The bit factor graph in the spec is obviously incorrect.
492  * bit_spend + ((bit_spend - bit_spend))...
493  * The reference encoder subtracts everything from 1, but also seems incorrect.
494  * 1 - bit_save + ((bit_spend + bit_save))...
495  * Hopefully below is correct.
496  */
497  bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->pe.max - ctx->pe.min)) * (clipped_pe - ctx->pe.min);
498  /* NOTE: The reference encoder attempts to center pe max/min around the current pe. */
499  ctx->pe.max = FFMAX(pe, ctx->pe.max);
500  ctx->pe.min = FFMIN(pe, ctx->pe.min);
501 
502  return FFMIN(ctx->frame_bits * bit_factor, ctx->frame_bits + size - bits);
503 }
504 
506 {
507  float pe, a;
508 
509  band->pe = 0.0f;
510  band->pe_const = 0.0f;
511  band->active_lines = 0.0f;
512  if (band->energy > band->thr) {
513  a = log2f(band->energy);
514  pe = a - log2f(band->thr);
515  band->active_lines = band->nz_lines;
516  if (pe < PSY_3GPP_C1) {
517  pe = pe * PSY_3GPP_C3 + PSY_3GPP_C2;
518  a = a * PSY_3GPP_C3 + PSY_3GPP_C2;
519  band->active_lines *= PSY_3GPP_C3;
520  }
521  band->pe = pe * band->nz_lines;
522  band->pe_const = a * band->nz_lines;
523  }
524 
525  return band->pe;
526 }
527 
528 static float calc_reduction_3gpp(float a, float desired_pe, float pe,
529  float active_lines)
530 {
531  float thr_avg, reduction;
532 
533  if(active_lines == 0.0)
534  return 0;
535 
536  thr_avg = exp2f((a - pe) / (4.0f * active_lines));
537  reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg;
538 
539  return FFMAX(reduction, 0.0f);
540 }
541 
542 static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
543  float reduction)
544 {
545  float thr = band->thr;
546 
547  if (band->energy > thr) {
548  thr = sqrtf(thr);
549  thr = sqrtf(thr) + reduction;
550  thr *= thr;
551  thr *= thr;
552 
553  /* This deviates from the 3GPP spec to match the reference encoder.
554  * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands
555  * that have hole avoidance on (active or inactive). It always reduces the
556  * threshold of bands with hole avoidance off.
557  */
558  if (thr > band->energy * min_snr && band->avoid_holes != PSY_3GPP_AH_NONE) {
559  thr = FFMAX(band->thr, band->energy * min_snr);
561  }
562  }
563 
564  return thr;
565 }
566 
567 #ifndef calc_thr_3gpp
568 static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
569  const uint8_t *band_sizes, const float *coefs)
570 {
571  int i, w, g;
572  int start = 0;
573  for (w = 0; w < wi->num_windows*16; w += 16) {
574  for (g = 0; g < num_bands; g++) {
575  AacPsyBand *band = &pch->band[w+g];
576 
577  float form_factor = 0.0f;
578  float Temp;
579  band->energy = 0.0f;
580  for (i = 0; i < band_sizes[g]; i++) {
581  band->energy += coefs[start+i] * coefs[start+i];
582  form_factor += sqrtf(fabs(coefs[start+i]));
583  }
584  Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
585  band->thr = band->energy * 0.001258925f;
586  band->nz_lines = form_factor * sqrtf(Temp);
587 
588  start += band_sizes[g];
589  }
590  }
591 }
592 #endif /* calc_thr_3gpp */
593 
594 #ifndef psy_hp_filter
595 static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
596 {
597  int i, j;
598  for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
599  float sum1, sum2;
600  sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
601  sum2 = 0.0;
602  for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
603  sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
604  sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
605  }
606  /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768. Tuning this for normalized floats would be difficult. */
607  hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
608  }
609 }
610 #endif /* psy_hp_filter */
611 
612 /**
613  * Calculate band thresholds as suggested in 3GPP TS26.403
614  */
615 static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
616  const float *coefs, const FFPsyWindowInfo *wi)
617 {
619  AacPsyChannel *pch = &pctx->ch[channel];
620  int i, w, g;
621  float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0};
622  float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
623  float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f);
624  const int num_bands = ctx->num_bands[wi->num_windows == 8];
625  const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
626  AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
627  const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
628 
629  //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
630  calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs);
631 
632  //modify thresholds and energies - spread, threshold in quiet, pre-echo control
633  for (w = 0; w < wi->num_windows*16; w += 16) {
634  AacPsyBand *bands = &pch->band[w];
635 
636  /* 5.4.2.3 "Spreading" & 5.4.3 "Spread Energy Calculation" */
637  spread_en[0] = bands[0].energy;
638  for (g = 1; g < num_bands; g++) {
639  bands[g].thr = FFMAX(bands[g].thr, bands[g-1].thr * coeffs[g].spread_hi[0]);
640  spread_en[w+g] = FFMAX(bands[g].energy, spread_en[w+g-1] * coeffs[g].spread_hi[1]);
641  }
642  for (g = num_bands - 2; g >= 0; g--) {
643  bands[g].thr = FFMAX(bands[g].thr, bands[g+1].thr * coeffs[g].spread_low[0]);
644  spread_en[w+g] = FFMAX(spread_en[w+g], spread_en[w+g+1] * coeffs[g].spread_low[1]);
645  }
646  //5.4.2.4 "Threshold in quiet"
647  for (g = 0; g < num_bands; g++) {
648  AacPsyBand *band = &bands[g];
649 
650  band->thr_quiet = band->thr = FFMAX(band->thr, coeffs[g].ath);
651  //5.4.2.5 "Pre-echo control"
652  if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (wi->window_type[1] == LONG_START_SEQUENCE && !w)))
653  band->thr = FFMAX(PSY_3GPP_RPEMIN*band->thr, FFMIN(band->thr,
654  PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
655 
656  /* 5.6.1.3.1 "Preparatory steps of the perceptual entropy calculation" */
657  pe += calc_pe_3gpp(band);
658  a += band->pe_const;
659  active_lines += band->active_lines;
660 
661  /* 5.6.1.3.3 "Selection of the bands for avoidance of holes" */
662  if (spread_en[w+g] * avoid_hole_thr > band->energy || coeffs[g].min_snr > 1.0f)
664  else
666  }
667  }
668 
669  /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
670  ctx->ch[channel].entropy = pe;
671  desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
672  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
673  /* NOTE: PE correction is kept simple. During initial testing it had very
674  * little effect on the final bitrate. Probably a good idea to come
675  * back and do more testing later.
676  */
677  if (ctx->bitres.bits > 0)
678  desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
679  0.85f, 1.15f);
680  pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
681 
682  if (desired_pe < pe) {
683  /* 5.6.1.3.4 "First Estimation of the reduction value" */
684  for (w = 0; w < wi->num_windows*16; w += 16) {
685  reduction = calc_reduction_3gpp(a, desired_pe, pe, active_lines);
686  pe = 0.0f;
687  a = 0.0f;
688  active_lines = 0.0f;
689  for (g = 0; g < num_bands; g++) {
690  AacPsyBand *band = &pch->band[w+g];
691 
692  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
693  /* recalculate PE */
694  pe += calc_pe_3gpp(band);
695  a += band->pe_const;
696  active_lines += band->active_lines;
697  }
698  }
699 
700  /* 5.6.1.3.5 "Second Estimation of the reduction value" */
701  for (i = 0; i < 2; i++) {
702  float pe_no_ah = 0.0f, desired_pe_no_ah;
703  active_lines = a = 0.0f;
704  for (w = 0; w < wi->num_windows*16; w += 16) {
705  for (g = 0; g < num_bands; g++) {
706  AacPsyBand *band = &pch->band[w+g];
707 
708  if (band->avoid_holes != PSY_3GPP_AH_ACTIVE) {
709  pe_no_ah += band->pe;
710  a += band->pe_const;
711  active_lines += band->active_lines;
712  }
713  }
714  }
715  desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f);
716  if (active_lines > 0.0f)
717  reduction += calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines);
718 
719  pe = 0.0f;
720  for (w = 0; w < wi->num_windows*16; w += 16) {
721  for (g = 0; g < num_bands; g++) {
722  AacPsyBand *band = &pch->band[w+g];
723 
724  if (active_lines > 0.0f)
725  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
726  pe += calc_pe_3gpp(band);
727  band->norm_fac = band->active_lines / band->thr;
728  norm_fac += band->norm_fac;
729  }
730  }
731  delta_pe = desired_pe - pe;
732  if (fabs(delta_pe) > 0.05f * desired_pe)
733  break;
734  }
735 
736  if (pe < 1.15f * desired_pe) {
737  /* 6.6.1.3.6 "Final threshold modification by linearization" */
738  norm_fac = 1.0f / norm_fac;
739  for (w = 0; w < wi->num_windows*16; w += 16) {
740  for (g = 0; g < num_bands; g++) {
741  AacPsyBand *band = &pch->band[w+g];
742 
743  if (band->active_lines > 0.5f) {
744  float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe;
745  float thr = band->thr;
746 
747  thr *= exp2f(delta_sfb_pe / band->active_lines);
748  if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE)
749  thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy);
750  band->thr = thr;
751  }
752  }
753  }
754  } else {
755  /* 5.6.1.3.7 "Further perceptual entropy reduction" */
756  g = num_bands;
757  while (pe > desired_pe && g--) {
758  for (w = 0; w < wi->num_windows*16; w+= 16) {
759  AacPsyBand *band = &pch->band[w+g];
760  if (band->avoid_holes != PSY_3GPP_AH_NONE && coeffs[g].min_snr < PSY_SNR_1DB) {
761  coeffs[g].min_snr = PSY_SNR_1DB;
762  band->thr = band->energy * PSY_SNR_1DB;
763  pe += band->active_lines * 1.5f - band->pe;
764  }
765  }
766  }
767  /* TODO: allow more holes (unused without mid/side) */
768  }
769  }
770 
771  for (w = 0; w < wi->num_windows*16; w += 16) {
772  for (g = 0; g < num_bands; g++) {
773  AacPsyBand *band = &pch->band[w+g];
774  FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g];
775 
776  psy_band->threshold = band->thr;
777  psy_band->energy = band->energy;
778  }
779  }
780 
781  memcpy(pch->prev_band, pch->band, sizeof(pch->band));
782 }
783 
784 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
785  const float **coeffs, const FFPsyWindowInfo *wi)
786 {
787  int ch;
788  FFPsyChannelGroup *group = ff_psy_find_group(ctx, channel);
789 
790  for (ch = 0; ch < group->num_ch; ch++)
791  psy_3gpp_analyze_channel(ctx, channel + ch, coeffs[ch], &wi[ch]);
792 }
793 
795 {
797  av_freep(&pctx->ch);
798  av_freep(&apc->model_priv_data);
799 }
800 
801 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
802 {
803  int blocktype = ONLY_LONG_SEQUENCE;
804  if (uselongblock) {
806  blocktype = LONG_STOP_SEQUENCE;
807  } else {
808  blocktype = EIGHT_SHORT_SEQUENCE;
813  }
814 
815  wi->window_type[0] = ctx->next_window_seq;
816  ctx->next_window_seq = blocktype;
817 }
818 
819 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
820  const float *la, int channel, int prev_type)
821 {
823  AacPsyChannel *pch = &pctx->ch[channel];
824  int grouping = 0;
825  int uselongblock = 1;
826  int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
827  int i;
828  FFPsyWindowInfo wi = { { 0 } };
829 
830  if (la) {
831  float hpfsmpl[AAC_BLOCK_SIZE_LONG];
832  float const *pf = hpfsmpl;
833  float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
834  float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
835  float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
836  const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN);
837  int att_sum = 0;
838 
839  /* LAME comment: apply high pass filter of fs/4 */
840  psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs);
841 
842  /* Calculate the energies of each sub-shortblock */
843  for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
844  energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
845  assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
846  attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
847  energy_short[0] += energy_subshort[i];
848  }
849 
850  for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
851  float const *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
852  float p = 1.0f;
853  for (; pf < pfe; pf++)
854  p = FFMAX(p, fabsf(*pf));
855  pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
856  energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
857  /* NOTE: The indexes below are [i + 3 - 2] in the LAME source.
858  * Obviously the 3 and 2 have some significance, or this would be just [i + 1]
859  * (which is what we use here). What the 3 stands for is ambiguous, as it is both
860  * number of short blocks, and the number of sub-short blocks.
861  * It seems that LAME is comparing each sub-block to sub-block + 1 in the
862  * previous block.
863  */
864  if (p > energy_subshort[i + 1])
865  p = p / energy_subshort[i + 1];
866  else if (energy_subshort[i + 1] > p * 10.0f)
867  p = energy_subshort[i + 1] / (p * 10.0f);
868  else
869  p = 0.0;
870  attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
871  }
872 
873  /* compare energy between sub-short blocks */
874  for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
875  if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
876  if (attack_intensity[i] > pch->attack_threshold)
877  attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
878 
879  /* should have energy change between short blocks, in order to avoid periodic signals */
880  /* Good samples to show the effect are Trumpet test songs */
881  /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
882  /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
883  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
884  float const u = energy_short[i - 1];
885  float const v = energy_short[i];
886  float const m = FFMAX(u, v);
887  if (m < 40000) { /* (2) */
888  if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
889  if (i == 1 && attacks[0] < attacks[i])
890  attacks[0] = 0;
891  attacks[i] = 0;
892  }
893  }
894  att_sum += attacks[i];
895  }
896 
897  if (attacks[0] <= pch->prev_attack)
898  attacks[0] = 0;
899 
900  att_sum += attacks[0];
901  /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */
902  if (pch->prev_attack == 3 || att_sum) {
903  uselongblock = 0;
904 
905  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
906  if (attacks[i] && attacks[i-1])
907  attacks[i] = 0;
908  }
909  } else {
910  /* We have no lookahead info, so just use same type as the previous sequence. */
911  uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
912  }
913 
914  lame_apply_block_type(pch, &wi, uselongblock);
915 
916  wi.window_type[1] = prev_type;
917  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
918  wi.num_windows = 1;
919  wi.grouping[0] = 1;
920  if (wi.window_type[0] == LONG_START_SEQUENCE)
921  wi.window_shape = 0;
922  else
923  wi.window_shape = 1;
924  } else {
925  int lastgrp = 0;
926 
927  wi.num_windows = 8;
928  wi.window_shape = 0;
929  for (i = 0; i < 8; i++) {
930  if (!((pch->next_grouping >> i) & 1))
931  lastgrp = i;
932  wi.grouping[lastgrp]++;
933  }
934  }
935 
936  /* Determine grouping, based on the location of the first attack, and save for
937  * the next frame.
938  * FIXME: Move this to analysis.
939  * TODO: Tune groupings depending on attack location
940  * TODO: Handle more than one attack in a group
941  */
942  for (i = 0; i < 9; i++) {
943  if (attacks[i]) {
944  grouping = i;
945  break;
946  }
947  }
948  pch->next_grouping = window_grouping[grouping];
949 
950  pch->prev_attack = attacks[8];
951 
952  return wi;
953 }
954 
956 {
957  .name = "3GPP TS 26.403-inspired model",
958  .init = psy_3gpp_init,
959  .window = psy_lame_window,
960  .analyze = psy_3gpp_analyze,
961  .end = psy_3gpp_end,
962 };
Definition: start.py:1
int quality
Quality to map the rest of the vaules to.
Definition: aacpsy.c:165
float v
const char * s
Definition: avisynth_c.h:668
int size
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
Definition: aacpsy.c:378
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:69
#define AAC_BLOCK_SIZE_SHORT
short block size
Definition: aacpsy.c:93
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
Definition: aacpsy.c:472
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:84
#define PSY_3GPP_AH_THR_SHORT
Definition: aacpsy.c:80
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
Definition: aacpsy.c:196
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:56
Sinusoidal phase f
About Git write you should know how to use GIT properly Luckily Git comes with excellent documentation git help man git shows you the available git< command > help man git< command > shows information about the subcommand< command > The most comprehensive manual is the website Git Reference visit they are quite exhaustive You do not need a special username or password All you need is to provide a ssh public key to the Git server admin What follows now is a basic introduction to Git and some FFmpeg specific guidelines Read it at least if you are granted commit privileges to the FFmpeg project you are expected to be familiar with these rules I if not You can get git from etc no matter how small Every one of them has been saved from looking like a fool by this many times It s very easy for stray debug output or cosmetic modifications to slip in
Definition: git-howto.txt:5
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
Definition: aacpsy.c:528
float ath
absolute threshold of hearing per bands
Definition: aacpsy.c:137
#define PSY_3GPP_EN_SPREAD_HI_L1
Definition: aacpsy.c:46
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
Definition: aacpsy.c:286
enum WindowSequence next_window_seq
window sequence to be used in the next frame
Definition: aacpsy.c:126
#define PSY_SNR_25DB
Definition: aacpsy.c:64
#define AAC_BLOCK_SIZE_LONG
long block size
Definition: aacpsy.c:92
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:85
output residual component w
LAME psy model preset struct.
Definition: aacpsy.c:164
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
float thr
energy threshold
Definition: aacpsy.c:106
float correction
PE correction factor.
Definition: aacpsy.c:155
static av_cold void psy_3gpp_end(FFPsyContext *apc)
Definition: aacpsy.c:794
float attack_threshold
attack threshold for this channel
Definition: aacpsy.c:128
#define PSY_3GPP_EN_SPREAD_LOW_L
Definition: aacpsy.c:52
float nz_lines
number of non-zero spectral lines
Definition: aacpsy.c:108
uint8_t bits
Definition: crc.c:216
psychoacoustic model frame type-dependent coefficients
Definition: aacpsy.c:136
#define av_cold
Definition: attributes.h:78
struct FFPsyContext::@83 bitres
window constants for m
int size
size of the bitresevoir in bits
Definition: psymodel.h:89
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
Definition: aacpsy.c:542
#define NAN
Definition: math.h:7
#define PSY_3GPP_C2
Definition: aacpsy.c:60
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
Definition: aacpsy.c:91
#define PSY_3GPP_CLIP_LO_L
Definition: aacpsy.c:74
#define PSY_3GPP_SPEND_SLOPE_S
Definition: aacpsy.c:71
#define PSY_3GPP_THR_SPREAD_LOW
Definition: aacpsy.c:44
context used by psychoacoustic model
Definition: psymodel.h:76
#define atanf(x)
Definition: libm.h:38
#define AAC_CUTOFF(s)
Definition: psymodel.h:32
single band psychoacoustic information
Definition: psymodel.h:37
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
Definition: aacpsy.c:227
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
Definition: aacpsy.c:125
#define PSY_3GPP_SAVE_ADD_L
Definition: aacpsy.c:68
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs)
Definition: aacpsy.c:568
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
Definition: aacpsy.c:276
#define PSY_3GPP_SPEND_ADD_S
Definition: aacpsy.c:73
struct AacPsyBand AacPsyBand
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
#define PSY_SNR_1DB
Definition: aacpsy.c:63
3GPP TS26.403-inspired psychoacoustic model specific data
Definition: aacpsy.c:147
single/pair channel context for psychoacoustic model
Definition: aacpsy.c:119
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
Definition: aacpsy.c:214
float barks
Bark value for each spectral band in long frame.
Definition: aacpsy.c:138
int flags
CODEC_FLAG_*.
#define CODEC_FLAG_QSCALE
Use fixed qscale.
float pe_const
constant part of the PE calculation
Definition: aacpsy.c:111
int num_windows
number of windows in a frame
Definition: psymodel.h:68
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Definition: aacpsy.c:819
#define PSY_3GPP_SPEND_SLOPE_L
Definition: aacpsy.c:70
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
Definition: aacpsy.c:43
float energy
Definition: psymodel.h:39
WindowSequence
Definition: aac.h:68
#define FFMAX(a, b)
Definition: common.h:56
external API header
codec-specific psychoacoustic model implementation
Definition: psymodel.h:99
#define PSY_3GPP_RPELEV
Definition: aacpsy.c:57
static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
Definition: aacpsy.c:257
float thr_quiet
threshold in quiet
Definition: aacpsy.c:107
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Definition: aacpsy.c:784
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:205
FFT buffer for g
Definition: stft_peak.m:17
int bit_rate
the average bitrate
struct AacPsyContext AacPsyContext
3GPP TS26.403-inspired psychoacoustic model specific data
#define FFMIN(a, b)
Definition: common.h:58
int prev_attack
attack value for the last short block in the previous sequence
Definition: aacpsy.c:130
#define PSY_3GPP_SAVE_SLOPE_S
Definition: aacpsy.c:67
ret
Definition: avfilter.c:821
#define PSY_3GPP_C3
Definition: aacpsy.c:61
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:58
int frame_bits
average bits per frame
Definition: aacpsy.c:149
int fill_level
bit reservoir fill level
Definition: aacpsy.c:150
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
Definition: aacpsy.c:801
#define PSY_3GPP_SAVE_SLOPE_L
Definition: aacpsy.c:66
Reference: libavcodec/aacpsy.c.
float u
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
Definition: aacpsy.c:95
#define ATH_ADD
Definition: aacpsy.c:281
float energy
band energy
Definition: aacpsy.c:105
const FFPsyModel ff_aac_psy_model
Definition: aacpsy.c:955
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
Definition: aacpsy.c:615
float a
float st_lrm
short threshold for L, R, and M channels
Definition: aacpsy.c:169
#define PSY_3GPP_EN_SPREAD_LOW_S
Definition: aacpsy.c:54
1i.*Xphase exp()
#define exp2f(x)
Definition: libm.h:82
float prev_energy_subshort[8 *3]
Definition: aacpsy.c:129
int sample_rate
samples per second
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:66
main external API structure.
float win_energy
sliding average of channel energy
Definition: aacpsy.c:123
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:93
float active_lines
number of active spectral lines
Definition: aacpsy.c:109
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
Definition: aacpsy.c:365
int avoid_holes
hole avoidance flag
Definition: aacpsy.c:113
Replacements for frequently missing libm functions.
AacPsyBand band[128]
bands information
Definition: aacpsy.c:120
#define PSY_3GPP_CLIP_HI_S
Definition: aacpsy.c:77
synthesis window for stochastic i
#define PSY_3GPP_RPEMIN
Definition: aacpsy.c:56
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
Definition: aacpsy.c:175
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:67
static const double coeff[2][5]
Definition: vf_ow.c:64
float min_snr
minimal SNR
Definition: aacpsy.c:141
float max
maximum allowed PE for bit factor calculation
Definition: aacpsy.c:153
float previous
allowed PE of the previous frame
Definition: aacpsy.c:154
AacPsyCoeffs psy_coef[2][64]
Definition: aacpsy.c:157
float min
minimum allowed PE for bit factor calculation
Definition: aacpsy.c:152
int global_quality
Global quality for codecs which cannot change it per frame.
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
Definition: aacpsy.c:295
static uint32_t state
Definition: trasher.c:27
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
Definition: aacpsy.c:595
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
Definition: aacpsy.c:140
const char * name
Definition: psymodel.h:100
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
Definition: aacpsy.c:386
static float calc_pe_3gpp(AacPsyBand *band)
Definition: aacpsy.c:505
#define exp2(x)
Definition: libm.h:77
windowing related information
Definition: psymodel.h:65
#define log2f(x)
Definition: libm.h:127
struct AacPsyContext::@30 pe
#define PSY_3GPP_BITS_TO_PE(bits)
Definition: aacpsy.c:88
#define PSY_3GPP_C1
Definition: aacpsy.c:59
float norm_fac
normalization factor for linearization
Definition: aacpsy.c:112
int chan_bitrate
bitrate per channel
Definition: aacpsy.c:148
int cutoff
Audio cutoff bandwidth (0 means "automatic")
#define PSY_3GPP_CLIP_LO_S
Definition: aacpsy.c:75
#define PSY_3GPP_AH_THR_LONG
Definition: aacpsy.c:79
int channels
number of audio channels
float pe
perceptual entropy
Definition: aacpsy.c:110
#define PSY_3GPP_EN_SPREAD_HI_S
Definition: aacpsy.c:50
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:169
AacPsyChannel * ch
Definition: aacpsy.c:158
#define PSY_3GPP_SAVE_ADD_S
Definition: aacpsy.c:69
struct AacPsyCoeffs AacPsyCoeffs
psychoacoustic model frame type-dependent coefficients
void INT64 start
Definition: avisynth_c.h:594
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
Definition: aacpsy.c:104
AVCodecContext * avctx
encoder context
Definition: psymodel.h:77
float threshold
Definition: psymodel.h:40
AAC data declarations.
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
Definition: aacpsy.c:139
#define PSY_3GPP_CLIP_HI_L
Definition: aacpsy.c:76
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:66
struct AacPsyChannel AacPsyChannel
single/pair channel context for psychoacoustic model
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
Definition: aacpsy.c:94
#define av_unused
Definition: attributes.h:114
#define PSY_3GPP_SPEND_ADD_L
Definition: aacpsy.c:72