yading@10: /* yading@10: * AAC definitions and structures yading@10: * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) yading@10: * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: /** yading@10: * @file yading@10: * AAC definitions and structures yading@10: * @author Oded Shimon ( ods15 ods15 dyndns org ) yading@10: * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) yading@10: */ yading@10: yading@10: #ifndef AVCODEC_AAC_H yading@10: #define AVCODEC_AAC_H yading@10: yading@10: #include "libavutil/float_dsp.h" yading@10: #include "avcodec.h" yading@10: #include "fft.h" yading@10: #include "mpeg4audio.h" yading@10: #include "sbr.h" yading@10: #include "fmtconvert.h" yading@10: yading@10: #include yading@10: yading@10: #define MAX_CHANNELS 64 yading@10: #define MAX_ELEM_ID 16 yading@10: yading@10: #define TNS_MAX_ORDER 20 yading@10: #define MAX_LTP_LONG_SFB 40 yading@10: yading@10: enum RawDataBlockType { yading@10: TYPE_SCE, yading@10: TYPE_CPE, yading@10: TYPE_CCE, yading@10: TYPE_LFE, yading@10: TYPE_DSE, yading@10: TYPE_PCE, yading@10: TYPE_FIL, yading@10: TYPE_END, yading@10: }; yading@10: yading@10: enum ExtensionPayloadID { yading@10: EXT_FILL, yading@10: EXT_FILL_DATA, yading@10: EXT_DATA_ELEMENT, yading@10: EXT_DYNAMIC_RANGE = 0xb, yading@10: EXT_SBR_DATA = 0xd, yading@10: EXT_SBR_DATA_CRC = 0xe, yading@10: }; yading@10: yading@10: enum WindowSequence { yading@10: ONLY_LONG_SEQUENCE, yading@10: LONG_START_SEQUENCE, yading@10: EIGHT_SHORT_SEQUENCE, yading@10: LONG_STOP_SEQUENCE, yading@10: }; yading@10: yading@10: enum BandType { yading@10: ZERO_BT = 0, ///< Scalefactors and spectral data are all zero. yading@10: FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word. yading@10: ESC_BT = 11, ///< Spectral data are coded with an escape sequence. yading@10: NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream. yading@10: INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions. yading@10: INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions. yading@10: }; yading@10: yading@10: #define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10) yading@10: yading@10: enum ChannelPosition { yading@10: AAC_CHANNEL_OFF = 0, yading@10: AAC_CHANNEL_FRONT = 1, yading@10: AAC_CHANNEL_SIDE = 2, yading@10: AAC_CHANNEL_BACK = 3, yading@10: AAC_CHANNEL_LFE = 4, yading@10: AAC_CHANNEL_CC = 5, yading@10: }; yading@10: yading@10: /** yading@10: * The point during decoding at which channel coupling is applied. yading@10: */ yading@10: enum CouplingPoint { yading@10: BEFORE_TNS, yading@10: BETWEEN_TNS_AND_IMDCT, yading@10: AFTER_IMDCT = 3, yading@10: }; yading@10: yading@10: /** yading@10: * Output configuration status yading@10: */ yading@10: enum OCStatus { yading@10: OC_NONE, ///< Output unconfigured yading@10: OC_TRIAL_PCE, ///< Output configuration under trial specified by an inband PCE yading@10: OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header yading@10: OC_GLOBAL_HDR, ///< Output configuration set in a global header but not yet locked yading@10: OC_LOCKED, ///< Output configuration locked in place yading@10: }; yading@10: yading@10: typedef struct OutputConfiguration { yading@10: MPEG4AudioConfig m4ac; yading@10: uint8_t layout_map[MAX_ELEM_ID*4][3]; yading@10: int layout_map_tags; yading@10: int channels; yading@10: uint64_t channel_layout; yading@10: enum OCStatus status; yading@10: } OutputConfiguration; yading@10: yading@10: /** yading@10: * Predictor State yading@10: */ yading@10: typedef struct PredictorState { yading@10: float cor0; yading@10: float cor1; yading@10: float var0; yading@10: float var1; yading@10: float r0; yading@10: float r1; yading@10: } PredictorState; yading@10: yading@10: #define MAX_PREDICTORS 672 yading@10: yading@10: #define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times yading@10: #define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0 yading@10: #define SCALE_MAX_POS 255 ///< scalefactor index maximum value yading@10: #define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard yading@10: #define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference yading@10: yading@10: /** yading@10: * Long Term Prediction yading@10: */ yading@10: typedef struct LongTermPrediction { yading@10: int8_t present; yading@10: int16_t lag; yading@10: float coef; yading@10: int8_t used[MAX_LTP_LONG_SFB]; yading@10: } LongTermPrediction; yading@10: yading@10: /** yading@10: * Individual Channel Stream yading@10: */ yading@10: typedef struct IndividualChannelStream { yading@10: uint8_t max_sfb; ///< number of scalefactor bands per group yading@10: enum WindowSequence window_sequence[2]; yading@10: uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window. yading@10: int num_window_groups; yading@10: uint8_t group_len[8]; yading@10: LongTermPrediction ltp; yading@10: const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window yading@10: const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window yading@10: int num_swb; ///< number of scalefactor window bands yading@10: int num_windows; yading@10: int tns_max_bands; yading@10: int predictor_present; yading@10: int predictor_initialized; yading@10: int predictor_reset_group; yading@10: uint8_t prediction_used[41]; yading@10: } IndividualChannelStream; yading@10: yading@10: /** yading@10: * Temporal Noise Shaping yading@10: */ yading@10: typedef struct TemporalNoiseShaping { yading@10: int present; yading@10: int n_filt[8]; yading@10: int length[8][4]; yading@10: int direction[8][4]; yading@10: int order[8][4]; yading@10: float coef[8][4][TNS_MAX_ORDER]; yading@10: } TemporalNoiseShaping; yading@10: yading@10: /** yading@10: * Dynamic Range Control - decoded from the bitstream but not processed further. yading@10: */ yading@10: typedef struct DynamicRangeControl { yading@10: int pce_instance_tag; ///< Indicates with which program the DRC info is associated. yading@10: int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative yading@10: int dyn_rng_ctl[17]; ///< DRC magnitude information yading@10: int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing. yading@10: int band_incr; ///< Number of DRC bands greater than 1 having DRC info. yading@10: int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain. yading@10: int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines. yading@10: int prog_ref_level; /**< A reference level for the long-term program audio level for all yading@10: * channels combined. yading@10: */ yading@10: } DynamicRangeControl; yading@10: yading@10: typedef struct Pulse { yading@10: int num_pulse; yading@10: int start; yading@10: int pos[4]; yading@10: int amp[4]; yading@10: } Pulse; yading@10: yading@10: /** yading@10: * coupling parameters yading@10: */ yading@10: typedef struct ChannelCoupling { yading@10: enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied. yading@10: int num_coupled; ///< number of target elements yading@10: enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE. yading@10: int id_select[8]; ///< element id yading@10: int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel; yading@10: * [2] list of gains for left channel; [3] lists of gains for both channels yading@10: */ yading@10: float gain[16][120]; yading@10: } ChannelCoupling; yading@10: yading@10: /** yading@10: * Single Channel Element - used for both SCE and LFE elements. yading@10: */ yading@10: typedef struct SingleChannelElement { yading@10: IndividualChannelStream ics; yading@10: TemporalNoiseShaping tns; yading@10: Pulse pulse; yading@10: enum BandType band_type[128]; ///< band types yading@10: int band_type_run_end[120]; ///< band type run end points yading@10: float sf[120]; ///< scalefactors yading@10: int sf_idx[128]; ///< scalefactor indices (used by encoder) yading@10: uint8_t zeroes[128]; ///< band is not coded (used by encoder) yading@10: DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT yading@10: DECLARE_ALIGNED(32, float, saved)[1024]; ///< overlap yading@10: DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer yading@10: DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP yading@10: PredictorState predictor_state[MAX_PREDICTORS]; yading@10: float *ret; ///< PCM output yading@10: } SingleChannelElement; yading@10: yading@10: /** yading@10: * channel element - generic struct for SCE/CPE/CCE/LFE yading@10: */ yading@10: typedef struct ChannelElement { yading@10: // CPE specific yading@10: int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream. yading@10: int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder) yading@10: uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band yading@10: // shared yading@10: SingleChannelElement ch[2]; yading@10: // CCE specific yading@10: ChannelCoupling coup; yading@10: SpectralBandReplication sbr; yading@10: } ChannelElement; yading@10: yading@10: /** yading@10: * main AAC context yading@10: */ yading@10: struct AACContext { yading@10: AVClass *class; yading@10: AVCodecContext *avctx; yading@10: AVFrame *frame; yading@10: yading@10: int is_saved; ///< Set if elements have stored overlap from previous frame. yading@10: DynamicRangeControl che_drc; yading@10: yading@10: /** yading@10: * @name Channel element related data yading@10: * @{ yading@10: */ yading@10: ChannelElement *che[4][MAX_ELEM_ID]; yading@10: ChannelElement *tag_che_map[4][MAX_ELEM_ID]; yading@10: int tags_mapped; yading@10: /** @} */ yading@10: yading@10: /** yading@10: * @name temporary aligned temporary buffers yading@10: * (We do not want to have these on the stack.) yading@10: * @{ yading@10: */ yading@10: DECLARE_ALIGNED(32, float, buf_mdct)[1024]; yading@10: /** @} */ yading@10: yading@10: /** yading@10: * @name Computed / set up during initialization yading@10: * @{ yading@10: */ yading@10: FFTContext mdct; yading@10: FFTContext mdct_small; yading@10: FFTContext mdct_ltp; yading@10: FmtConvertContext fmt_conv; yading@10: AVFloatDSPContext fdsp; yading@10: int random_state; yading@10: /** @} */ yading@10: yading@10: /** yading@10: * @name Members used for output yading@10: * @{ yading@10: */ yading@10: SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement yading@10: /** @} */ yading@10: yading@10: yading@10: /** yading@10: * @name Japanese DTV specific extension yading@10: * @{ yading@10: */ yading@10: int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel yading@10: int dmono_mode; ///< 0->not dmono, 1->use first channel, 2->use second channel yading@10: /** @} */ yading@10: yading@10: DECLARE_ALIGNED(32, float, temp)[128]; yading@10: yading@10: OutputConfiguration oc[2]; yading@10: int warned_num_aac_frames; yading@10: yading@10: /* aacdec functions pointers */ yading@10: void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce); yading@10: void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce); yading@10: void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns, yading@10: IndividualChannelStream *ics, int decode); yading@10: void (*windowing_and_mdct_ltp)(AACContext *ac, float *out, yading@10: float *in, IndividualChannelStream *ics); yading@10: void (*update_ltp)(AACContext *ac, SingleChannelElement *sce); yading@10: yading@10: }; yading@10: yading@10: void ff_aacdec_init_mips(AACContext *c); yading@10: yading@10: #endif /* AVCODEC_AAC_H */