aac.h
Go to the documentation of this file.
1 /*
2  * AAC definitions and structures
3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * AAC definitions and structures
26  * @author Oded Shimon ( ods15 ods15 dyndns org )
27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
28  */
29 
30 #ifndef AVCODEC_AAC_H
31 #define AVCODEC_AAC_H
32 
33 #include "libavutil/float_dsp.h"
34 #include "avcodec.h"
35 #include "fft.h"
36 #include "mpeg4audio.h"
37 #include "sbr.h"
38 #include "fmtconvert.h"
39 
40 #include <stdint.h>
41 
42 #define MAX_CHANNELS 64
43 #define MAX_ELEM_ID 16
44 
45 #define TNS_MAX_ORDER 20
46 #define MAX_LTP_LONG_SFB 40
47 
57 };
58 
64  EXT_SBR_DATA = 0xd,
66 };
67 
73 };
74 
75 enum BandType {
76  ZERO_BT = 0, ///< Scalefactors and spectral data are all zero.
77  FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word.
78  ESC_BT = 11, ///< Spectral data are coded with an escape sequence.
79  NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream.
80  INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions.
81  INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions.
82 };
83 
84 #define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10)
85 
93 };
94 
95 /**
96  * The point during decoding at which channel coupling is applied.
97  */
102 };
103 
104 /**
105  * Output configuration status
106  */
107 enum OCStatus {
108  OC_NONE, ///< Output unconfigured
109  OC_TRIAL_PCE, ///< Output configuration under trial specified by an inband PCE
110  OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header
111  OC_GLOBAL_HDR, ///< Output configuration set in a global header but not yet locked
112  OC_LOCKED, ///< Output configuration locked in place
113 };
114 
115 typedef struct OutputConfiguration {
119  int channels;
120  uint64_t channel_layout;
123 
124 /**
125  * Predictor State
126  */
127 typedef struct PredictorState {
128  float cor0;
129  float cor1;
130  float var0;
131  float var1;
132  float r0;
133  float r1;
135 
136 #define MAX_PREDICTORS 672
137 
138 #define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times
139 #define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0
140 #define SCALE_MAX_POS 255 ///< scalefactor index maximum value
141 #define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard
142 #define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference
143 
144 /**
145  * Long Term Prediction
146  */
147 typedef struct LongTermPrediction {
148  int8_t present;
149  int16_t lag;
150  float coef;
153 
154 /**
155  * Individual Channel Stream
156  */
157 typedef struct IndividualChannelStream {
158  uint8_t max_sfb; ///< number of scalefactor bands per group
159  enum WindowSequence window_sequence[2];
160  uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window.
162  uint8_t group_len[8];
164  const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
165  const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window
166  int num_swb; ///< number of scalefactor window bands
172  uint8_t prediction_used[41];
174 
175 /**
176  * Temporal Noise Shaping
177  */
178 typedef struct TemporalNoiseShaping {
179  int present;
180  int n_filt[8];
181  int length[8][4];
182  int direction[8][4];
183  int order[8][4];
184  float coef[8][4][TNS_MAX_ORDER];
186 
187 /**
188  * Dynamic Range Control - decoded from the bitstream but not processed further.
189  */
190 typedef struct DynamicRangeControl {
191  int pce_instance_tag; ///< Indicates with which program the DRC info is associated.
192  int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative
193  int dyn_rng_ctl[17]; ///< DRC magnitude information
194  int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing.
195  int band_incr; ///< Number of DRC bands greater than 1 having DRC info.
196  int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain.
197  int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
198  int prog_ref_level; /**< A reference level for the long-term program audio level for all
199  * channels combined.
200  */
202 
203 typedef struct Pulse {
205  int start;
206  int pos[4];
207  int amp[4];
208 } Pulse;
209 
210 /**
211  * coupling parameters
212  */
213 typedef struct ChannelCoupling {
214  enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied.
215  int num_coupled; ///< number of target elements
216  enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE.
217  int id_select[8]; ///< element id
218  int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel;
219  * [2] list of gains for left channel; [3] lists of gains for both channels
220  */
221  float gain[16][120];
223 
224 /**
225  * Single Channel Element - used for both SCE and LFE elements.
226  */
227 typedef struct SingleChannelElement {
231  enum BandType band_type[128]; ///< band types
232  int band_type_run_end[120]; ///< band type run end points
233  float sf[120]; ///< scalefactors
234  int sf_idx[128]; ///< scalefactor indices (used by encoder)
235  uint8_t zeroes[128]; ///< band is not coded (used by encoder)
236  DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT
237  DECLARE_ALIGNED(32, float, saved)[1024]; ///< overlap
238  DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer
239  DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP
240  PredictorState predictor_state[MAX_PREDICTORS];
241  float *ret; ///< PCM output
243 
244 /**
245  * channel element - generic struct for SCE/CPE/CCE/LFE
246  */
247 typedef struct ChannelElement {
248  // CPE specific
249  int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
250  int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder)
251  uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band
252  // shared
254  // CCE specific
258 
259 /**
260  * main AAC context
261  */
262 struct AACContext {
263  AVClass *class;
266 
267  int is_saved; ///< Set if elements have stored overlap from previous frame.
269 
270  /**
271  * @name Channel element related data
272  * @{
273  */
275  ChannelElement *tag_che_map[4][MAX_ELEM_ID];
277  /** @} */
278 
279  /**
280  * @name temporary aligned temporary buffers
281  * (We do not want to have these on the stack.)
282  * @{
283  */
284  DECLARE_ALIGNED(32, float, buf_mdct)[1024];
285  /** @} */
286 
287  /**
288  * @name Computed / set up during initialization
289  * @{
290  */
297  /** @} */
298 
299  /**
300  * @name Members used for output
301  * @{
302  */
303  SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement
304  /** @} */
305 
306 
307  /**
308  * @name Japanese DTV specific extension
309  * @{
310  */
311  int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel
312  int dmono_mode; ///< 0->not dmono, 1->use first channel, 2->use second channel
313  /** @} */
314 
315  DECLARE_ALIGNED(32, float, temp)[128];
316 
319 
320  /* aacdec functions pointers */
323  void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns,
324  IndividualChannelStream *ics, int decode);
326  float *in, IndividualChannelStream *ics);
328 
329 };
330 
332 
333 #endif /* AVCODEC_AAC_H */
int predictor_initialized
Definition: aac.h:170
struct Pulse Pulse
Definition: aac.h:53
static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
Conduct IMDCT and windowing.
This structure describes decoded (raw) audio or video data.
Definition: frame.h:76
AVCodecContext * avctx
Definition: aac.h:264
Definition: aac.h:203
struct LongTermPrediction LongTermPrediction
Long Term Prediction.
else temp
Definition: vf_mcdeint.c:148
Definition: aac.h:56
Definition: aac.h:49
Definition: aac.h:50
float cor1
Definition: aac.h:129
About Git write you should know how to use GIT properly Luckily Git comes with excellent documentation git help man git shows you the available git< command > help man git< command > shows information about the subcommand< command > The most comprehensive manual is the website Git Reference visit they are quite exhaustive You do not need a special username or password All you need is to provide a ssh public key to the Git server admin What follows now is a basic introduction to Git and some FFmpeg specific guidelines Read it at least if you are granted commit privileges to the FFmpeg project you are expected to be familiar with these rules I if not You can get git from etc no matter how small Every one of them has been saved from looking like a fool by this many times It s very easy for stray debug output or cosmetic modifications to slip in
Definition: git-howto.txt:5
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:59
int common_window
Set if channels share a common &#39;IndividualChannelStream&#39; in bitstream.
Definition: aac.h:249
uint64_t channel_layout
Definition: aac.h:120
#define MAX_LTP_LONG_SFB
Definition: aac.h:46
Dynamic Range Control - decoded from the bitstream but not processed further.
Definition: aac.h:190
ChannelPosition
Definition: aac.h:86
struct PredictorState PredictorState
Predictor State.
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:79
Definition: aac.h:51
int band_incr
Number of DRC bands greater than 1 having DRC info.
Definition: aac.h:195
int dmono_mode
0->not dmono, 1->use first channel, 2->use second channel
Definition: aac.h:312
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
Definition: aac.h:164
Definition: aac.h:60
BandType
Definition: aac.h:75
uint8_t
uint8_t layout_map[MAX_ELEM_ID *4][3]
Definition: aac.h:117
Output configuration under trial specified by an inband PCE.
Definition: aac.h:109
Definition: aac.h:52
TemporalNoiseShaping tns
Definition: aac.h:229
CouplingPoint
The point during decoding at which channel coupling is applied.
Definition: aac.h:98
int num_coupled
number of target elements
Definition: aac.h:215
FFTContext mdct_ltp
Definition: aac.h:293
Scalefactor data are intensity stereo positions.
Definition: aac.h:81
end end ac
Output configuration set in a global header but not yet locked.
Definition: aac.h:111
int random_state
Definition: aac.h:296
float var1
Definition: aac.h:131
MPEG4AudioConfig m4ac
Definition: aac.h:116
SpectralBandReplication sbr
Definition: aac.h:256
FFTContext mdct_small
Definition: aac.h:292
FmtConvertContext fmt_conv
Definition: aac.h:294
ExtensionPayloadID
Definition: aac.h:59
Spectral Band Replication definitions and structures.
FFmpeg Automated Testing Environment ************************************Table of Contents *****************FFmpeg Automated Testing Environment Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass target exec to configure or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script tests fate sh from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at doc fate_config sh template Create a configuration that suits your based on the configuration template The slot configuration variable can be any string that is not yet used
Definition: fate.txt:34
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:158
Definition: aac.h:55
WindowSequence
Definition: aac.h:68
int num_swb
number of scalefactor window bands
Definition: aac.h:166
external API header
struct DynamicRangeControl DynamicRangeControl
Dynamic Range Control - decoded from the bitstream but not processed further.
int prog_ref_level
A reference level for the long-term program audio level for all channels combined.
Definition: aac.h:198
Output configuration locked in place.
Definition: aac.h:112
Predictor State.
Definition: aac.h:127
AVFloatDSPContext fdsp
Definition: aac.h:295
Definition: fft.h:62
int predictor_reset_group
Definition: aac.h:171
float var0
Definition: aac.h:130
void ff_aacdec_init_mips(AACContext *c)
Definition: aacdec_mips.c:822
int pce_instance_tag
Indicates with which program the DRC info is associated.
Definition: aac.h:191
int interpolation_scheme
Indicates the interpolation scheme used in the SBR QMF domain.
Definition: aac.h:196
coupling parameters
Definition: aac.h:213
int tags_mapped
Definition: aac.h:276
float coef
Definition: aac.h:150
int force_dmono_mode
0->not dmono, 1->use first channel, 2->use second channel
Definition: aac.h:311
int is_saved
Set if elements have stored overlap from previous frame.
Definition: aac.h:267
int warned_num_aac_frames
Definition: aac.h:318
typedef void(RENAME(mix_any_func_type))
Temporal Noise Shaping.
Definition: aac.h:178
static void update_ltp(AACContext *ac, SingleChannelElement *sce)
Update the LTP buffer for next frame.
struct TemporalNoiseShaping TemporalNoiseShaping
Temporal Noise Shaping.
Long Term Prediction.
Definition: aac.h:147
main external API structure.
IndividualChannelStream ics
Definition: aac.h:228
#define MAX_PREDICTORS
Definition: aac.h:136
#define MAX_ELEM_ID
Definition: aac.h:43
Describe the class of an AVClass context structure.
Definition: log.h:50
Spectral data are coded with an escape sequence.
Definition: aac.h:78
struct OutputConfiguration OutputConfiguration
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:165
static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
Apply the long term prediction.
OCStatus
Output configuration status.
Definition: aac.h:107
#define MAX_CHANNELS
Definition: aac.h:42
float * ret
PCM output.
Definition: aac.h:241
#define TNS_MAX_ORDER
Definition: aac.h:45
main AAC context
Definition: aac.h:262
LongTermPrediction ltp
Definition: aac.h:163
#define type
ChannelCoupling coup
Definition: aac.h:255
int ms_mode
Signals mid/side stereo flags coding mode (used by encoder)
Definition: aac.h:250
Output configuration under trial specified by a frame header.
Definition: aac.h:110
enum OCStatus status
Definition: aac.h:121
Scalefactor data are intensity stereo positions.
Definition: aac.h:80
int16_t lag
Definition: aac.h:149
DynamicRangeControl che_drc
Definition: aac.h:268
AVFrame * frame
Definition: aac.h:265
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:227
struct SingleChannelElement SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
static double c[64]
Definition: aac.h:54
Individual Channel Stream.
Definition: aac.h:157
static void windowing_and_mdct_ltp(AACContext *ac, float *out, float *in, IndividualChannelStream *ics)
Apply windowing and MDCT to obtain the spectral coefficient from the predicted sample by LTP...
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:247
int start
Definition: aac.h:205
struct ChannelCoupling ChannelCoupling
coupling parameters
float r1
Definition: aac.h:133
Scalefactors and spectral data are all zero.
Definition: aac.h:76
int num_pulse
Definition: aac.h:204
float cor0
Definition: aac.h:128
FFTContext mdct
Definition: aac.h:291
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=av_sample_fmt_is_planar(in_fmt);out_planar=av_sample_fmt_is_planar(out_fmt);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_dlog(ac->avr,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> out
static int decode(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
Definition: crystalhd.c:868
static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, IndividualChannelStream *ics, int decode)
Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4...
float r0
Definition: aac.h:132
const char int length
Definition: avisynth_c.h:668
int8_t present
Definition: aac.h:148
struct ChannelElement ChannelElement
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:99
Spectral Band Replication.
Definition: sbr.h:137
int layout_map_tags
Definition: aac.h:118
struct IndividualChannelStream IndividualChannelStream
Individual Channel Stream.
Output unconfigured.
Definition: aac.h:108
This and later band types encode two values (rather than four) with one code word.
Definition: aac.h:77
RawDataBlockType
Definition: aac.h:48