43 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark) 44 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark) 46 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f 48 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f 50 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f 52 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f 54 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f 56 #define PSY_3GPP_RPEMIN 0.01f 57 #define PSY_3GPP_RPELEV 2.0f 59 #define PSY_3GPP_C1 3.0f 60 #define PSY_3GPP_C2 1.3219281f 61 #define PSY_3GPP_C3 0.55935729f 63 #define PSY_SNR_1DB 7.9432821e-1f 64 #define PSY_SNR_25DB 3.1622776e-3f 66 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f 67 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f 68 #define PSY_3GPP_SAVE_ADD_L -0.84285712f 69 #define PSY_3GPP_SAVE_ADD_S -0.75f 70 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f 71 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f 72 #define PSY_3GPP_SPEND_ADD_L -0.35f 73 #define PSY_3GPP_SPEND_ADD_S -0.26111111f 74 #define PSY_3GPP_CLIP_LO_L 0.2f 75 #define PSY_3GPP_CLIP_LO_S 0.2f 76 #define PSY_3GPP_CLIP_HI_L 0.95f 77 #define PSY_3GPP_CLIP_HI_S 0.75f 79 #define PSY_3GPP_AH_THR_LONG 0.5f 80 #define PSY_3GPP_AH_THR_SHORT 0.63f 88 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f) 91 #define PSY_LAME_FIR_LEN 21 92 #define AAC_BLOCK_SIZE_LONG 1024
93 #define AAC_BLOCK_SIZE_SHORT 128
94 #define AAC_NUM_BLOCKS_SHORT 8
95 #define PSY_LAME_NUM_SUBBLOCKS 3
215 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
216 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
217 -5.52212e-17 * 2, -0.313819 * 2
230 int lower_range = 12, upper_range = 12;
231 int lower_range_kbps = psy_abr_map[12].
quality;
232 int upper_range_kbps = psy_abr_map[12].
quality;
238 for (i = 1; i < 13; i++) {
239 if (
FFMAX(bitrate, psy_abr_map[i].
quality) != bitrate) {
241 upper_range_kbps = psy_abr_map[
i ].
quality;
243 lower_range_kbps = psy_abr_map[i - 1].
quality;
249 if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
250 return psy_abr_map[lower_range].
st_lrm;
251 return psy_abr_map[upper_range].
st_lrm;
260 for (i = 0; i < avctx->
channels; i++) {
278 return 13.3f *
atanf(0.00076f * f) + 3.5f *
atanf((f / 7500.0f) * (f / 7500.0f));
289 return 3.64 * pow(f, -0.8)
290 - 6.8 *
exp(-0.6 * (f - 3.4) * (f - 3.4))
291 + 6.0 *
exp(-0.15 * (f - 8.7) * (f - 8.7))
292 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
299 float prev, minscale, minath, minsnr, pe_min;
302 const float num_bark =
calc_bark((
float)bandwidth);
315 for (j = 0; j < 2; j++) {
319 float avg_chan_bits = chan_bitrate / ctx->
avctx->
sample_rate * (j ? 128.0f : 1024.0f);
328 for (g = 0; g < ctx->
num_bands[j]; g++) {
330 bark =
calc_bark((i-1) * line_to_frequency);
331 coeffs[
g].
barks = (bark + prev) / 2.0;
334 for (g = 0; g < ctx->
num_bands[j] - 1; g++) {
336 float bark_width = coeffs[g+1].
barks - coeffs->
barks;
339 coeff->
spread_low[1] = pow(10.0, -bark_width * en_spread_low);
340 coeff->
spread_hi [1] = pow(10.0, -bark_width * en_spread_hi);
341 pe_min = bark_pe * bark_width;
342 minsnr =
exp2(pe_min / band_sizes[g]) - 1.5f;
346 for (g = 0; g < ctx->
num_bands[j]; g++) {
347 minscale =
ath(start * line_to_frequency,
ATH_ADD);
348 for (i = 1; i < band_sizes[
g]; i++)
349 minscale =
FFMIN(minscale,
ath((start + i) * line_to_frequency,
ATH_ADD));
350 coeffs[
g].
ath = minscale - minath;
351 start += band_sizes[
g];
369 ret = 0.7548f * (in - state[0]) + 0.5095
f * state[1];
379 0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
387 const int16_t *audio,
389 int channel,
int prev_type)
393 int attack_ratio = br <= 16000 ? 18 : 10;
397 int next_type = pch->next_window_seq;
402 int switch_to_eight = 0;
403 float sum = 0.0, sum2 = 0.0;
406 for (
i = 0;
i < 8;
i++) {
407 for (j = 0; j < 128; j++) {
414 for (
i = 0;
i < 8;
i++) {
415 if (s[
i] > pch->win_energy * attack_ratio) {
421 pch->win_energy = pch->win_energy*7/8 + sum2/64;
423 wi.window_type[1] = prev_type;
431 grouping = pch->next_grouping;
447 pch->next_window_seq = next_type;
449 for (
i = 0;
i < 3;
i++)
450 wi.window_type[
i] = prev_type;
461 for (
i = 0;
i < 8;
i++) {
462 if (!((grouping >>
i) & 1))
464 wi.grouping[lastgrp]++;
481 float clipped_pe, bit_save, bit_spend, bit_factor, fill_level;
485 fill_level = av_clipf((
float)ctx->
fill_level / size, clip_low, clip_high);
486 clipped_pe = av_clipf(pe, ctx->
pe.
min, ctx->
pe.
max);
487 bit_save = (fill_level + bitsave_add) * bitsave_slope;
488 assert(bit_save <= 0.3f && bit_save >= -0.05000001
f);
489 bit_spend = (fill_level + bitspend_add) * bitspend_slope;
490 assert(bit_spend <= 0.5f && bit_spend >= -0.1
f);
497 bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->
pe.
max - ctx->
pe.
min)) * (clipped_pe - ctx->
pe.
min);
531 float thr_avg, reduction;
533 if(active_lines == 0.0)
536 thr_avg =
exp2f((a - pe) / (4.0
f * active_lines));
537 reduction =
exp2f((a - desired_pe) / (4.0
f * active_lines)) - thr_avg;
539 return FFMAX(reduction, 0.0
f);
549 thr = sqrtf(thr) + reduction;
567 #ifndef calc_thr_3gpp 569 const uint8_t *band_sizes,
const float *coefs)
574 for (g = 0; g < num_bands; g++) {
577 float form_factor = 0.0f;
580 for (i = 0; i < band_sizes[
g]; i++) {
581 band->
energy += coefs[start+
i] * coefs[start+
i];
582 form_factor += sqrtf(fabs(coefs[start+i]));
584 Temp = band->
energy > 0 ? sqrtf((
float)band_sizes[g] / band->
energy) : 0;
586 band->
nz_lines = form_factor * sqrtf(Temp);
588 start += band_sizes[
g];
594 #ifndef psy_hp_filter 603 sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i +
PSY_LAME_FIR_LEN - j]);
604 sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i +
PSY_LAME_FIR_LEN - j - 1]);
607 hpfsmpl[
i] = (sum1 + sum2) * 32768.0
f;
621 float desired_bits, desired_pe, delta_pe, reduction=
NAN, spread_en[128] = {0};
623 float pe = pctx->chan_bitrate > 32000 ? 0.0f :
FFMAX(50.0
f, 100.0
f - pctx->chan_bitrate * 100.0f / 32000.0f);
624 const int num_bands = ctx->num_bands[wi->num_windows == 8];
625 const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
626 AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
633 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
637 spread_en[0] = bands[0].
energy;
638 for (
g = 1;
g < num_bands;
g++) {
639 bands[
g].
thr =
FFMAX(bands[
g].
thr, bands[
g-1].thr * coeffs[
g].spread_hi[0]);
640 spread_en[
w+
g] =
FFMAX(bands[
g].
energy, spread_en[
w+
g-1] * coeffs[
g].spread_hi[1]);
642 for (
g = num_bands - 2;
g >= 0;
g--) {
643 bands[
g].
thr =
FFMAX(bands[
g].
thr, bands[
g+1].thr * coeffs[
g].spread_low[0]);
644 spread_en[
w+
g] =
FFMAX(spread_en[
w+
g], spread_en[
w+
g+1] * coeffs[
g].spread_low[1]);
647 for (
g = 0;
g < num_bands;
g++) {
662 if (spread_en[
w+
g] * avoid_hole_thr > band->
energy || coeffs[
g].
min_snr > 1.0f)
670 ctx->ch[channel].entropy =
pe;
671 desired_bits =
calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
677 if (ctx->bitres.bits > 0)
682 if (desired_pe < pe) {
684 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
689 for (
g = 0;
g < num_bands;
g++) {
701 for (
i = 0;
i < 2;
i++) {
702 float pe_no_ah = 0.0f, desired_pe_no_ah;
704 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
705 for (
g = 0;
g < num_bands;
g++) {
709 pe_no_ah += band->
pe;
715 desired_pe_no_ah =
FFMAX(desired_pe - (pe - pe_no_ah), 0.0
f);
720 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
721 for (
g = 0;
g < num_bands;
g++) {
731 delta_pe = desired_pe -
pe;
732 if (fabs(delta_pe) > 0.05
f * desired_pe)
736 if (pe < 1.15
f * desired_pe) {
739 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
740 for (
g = 0;
g < num_bands;
g++) {
757 while (pe > desired_pe &&
g--) {
758 for (
w = 0;
w < wi->num_windows*16;
w+= 16) {
771 for (
w = 0;
w < wi->num_windows*16;
w += 16) {
772 for (
g = 0;
g < num_bands;
g++) {
774 FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[
w+
g];
781 memcpy(pch->prev_band, pch->band,
sizeof(pch->band));
790 for (ch = 0; ch < group->
num_ch; ch++)
820 const float *la,
int channel,
int prev_type)
825 int uselongblock = 1;
832 float const *pf = hpfsmpl;
844 energy_subshort[
i] = pch->prev_energy_subshort[i + ((
AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
845 assert(pch->prev_energy_subshort[i + ((
AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
846 attack_intensity[
i] = energy_subshort[
i] / pch->prev_energy_subshort[i + ((
AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
847 energy_short[0] += energy_subshort[
i];
853 for (; pf < pfe; pf++)
854 p =
FFMAX(p, fabsf(*pf));
864 if (p > energy_subshort[i + 1])
865 p = p / energy_subshort[i + 1];
866 else if (energy_subshort[i + 1] > p * 10.0
f)
867 p = energy_subshort[i + 1] / (p * 10.0f);
875 if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
876 if (attack_intensity[i] > pch->attack_threshold)
884 float const u = energy_short[i - 1];
885 float const v = energy_short[
i];
886 float const m =
FFMAX(u, v);
888 if (u < 1.7
f * v && v < 1.7
f * u) {
889 if (i == 1 && attacks[0] < attacks[i])
894 att_sum += attacks[
i];
897 if (attacks[0] <= pch->prev_attack)
900 att_sum += attacks[0];
902 if (pch->prev_attack == 3 || att_sum) {
905 for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
906 if (attacks[i] && attacks[i-1])
929 for (i = 0; i < 8; i++) {
930 if (!((pch->next_grouping >> i) & 1))
942 for (i = 0; i < 9; i++) {
950 pch->prev_attack = attacks[8];
957 .
name =
"3GPP TS 26.403-inspired model",
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
int quality
Quality to map the rest of the vaules to.
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
int grouping[8]
window grouping (for e.g. AAC)
#define AAC_BLOCK_SIZE_SHORT
short block size
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
uint8_t ** bands
scalefactor band sizes for possible frame sizes
#define PSY_3GPP_AH_THR_SHORT
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
struct FFPsyContext::@82 bitres
psychoacoustic information for an arbitrary group of channels
About Git write you should know how to use GIT properly Luckily Git comes with excellent documentation git help man git shows you the available git< command > help man git< command > shows information about the subcommand< command > The most comprehensive manual is the website Git Reference visit they are quite exhaustive You do not need a special username or password All you need is to provide a ssh public key to the Git server admin What follows now is a basic introduction to Git and some FFmpeg specific guidelines Read it at least if you are granted commit privileges to the FFmpeg project you are expected to be familiar with these rules I if not You can get git from etc no matter how small Every one of them has been saved from looking like a fool by this many times It s very easy for stray debug output or cosmetic modifications to slip in
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
float ath
absolute threshold of hearing per bands
#define PSY_3GPP_EN_SPREAD_HI_L1
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
enum WindowSequence next_window_seq
window sequence to be used in the next frame
#define AAC_BLOCK_SIZE_LONG
long block size
int * num_bands
number of scalefactor bands for possible frame sizes
output residual component w
LAME psy model preset struct.
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
float thr
energy threshold
float correction
PE correction factor.
static av_cold void psy_3gpp_end(FFPsyContext *apc)
float attack_threshold
attack threshold for this channel
#define PSY_3GPP_EN_SPREAD_LOW_L
float nz_lines
number of non-zero spectral lines
psychoacoustic model frame type-dependent coefficients
int size
size of the bitresevoir in bits
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
#define PSY_3GPP_CLIP_LO_L
#define PSY_3GPP_SPEND_SLOPE_S
#define PSY_3GPP_THR_SPREAD_LOW
context used by psychoacoustic model
single band psychoacoustic information
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
#define PSY_3GPP_SAVE_ADD_L
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs)
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
#define PSY_3GPP_SPEND_ADD_S
struct AacPsyBand AacPsyBand
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
3GPP TS26.403-inspired psychoacoustic model specific data
single/pair channel context for psychoacoustic model
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
float barks
Bark value for each spectral band in long frame.
#define CODEC_FLAG_QSCALE
Use fixed qscale.
float pe_const
constant part of the PE calculation
int num_windows
number of windows in a frame
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
#define PSY_3GPP_SPEND_SLOPE_L
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
codec-specific psychoacoustic model implementation
static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
float thr_quiet
threshold in quiet
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
int bit_rate
the average bitrate
struct AacPsyContext AacPsyContext
3GPP TS26.403-inspired psychoacoustic model specific data
int prev_attack
attack value for the last short block in the previous sequence
#define PSY_3GPP_SAVE_SLOPE_S
uint8_t num_ch
number of channels in this group
int frame_bits
average bits per frame
int fill_level
bit reservoir fill level
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
#define PSY_3GPP_SAVE_SLOPE_L
Reference: libavcodec/aacpsy.c.
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
const FFPsyModel ff_aac_psy_model
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
float st_lrm
short threshold for L, R, and M channels
#define PSY_3GPP_EN_SPREAD_LOW_S
int sample_rate
samples per second
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
main external API structure.
float win_energy
sliding average of channel energy
void * model_priv_data
psychoacoustic model implementation private data
float active_lines
number of active spectral lines
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
int avoid_holes
hole avoidance flag
Replacements for frequently missing libm functions.
AacPsyBand band[128]
bands information
#define PSY_3GPP_CLIP_HI_S
synthesis window for stochastic i
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
int window_shape
window shape (sine/KBD/whatever)
static const double coeff[2][5]
float max
maximum allowed PE for bit factor calculation
float previous
allowed PE of the previous frame
AacPsyCoeffs psy_coef[2][64]
float min
minimum allowed PE for bit factor calculation
int global_quality
Global quality for codecs which cannot change it per frame.
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
static float calc_pe_3gpp(AacPsyBand *band)
windowing related information
struct AacPsyContext::@30 pe
#define PSY_3GPP_BITS_TO_PE(bits)
float norm_fac
normalization factor for linearization
int chan_bitrate
bitrate per channel
int cutoff
Audio cutoff bandwidth (0 means "automatic")
#define PSY_3GPP_CLIP_LO_S
#define PSY_3GPP_AH_THR_LONG
int channels
number of audio channels
float pe
perceptual entropy
#define PSY_3GPP_EN_SPREAD_HI_S
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
#define PSY_3GPP_SAVE_ADD_S
struct AacPsyCoeffs AacPsyCoeffs
psychoacoustic model frame type-dependent coefficients
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
AVCodecContext * avctx
encoder context
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
#define PSY_3GPP_CLIP_HI_L
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
struct AacPsyChannel AacPsyChannel
single/pair channel context for psychoacoustic model
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
#define PSY_3GPP_SPEND_ADD_L