yading@10: /* yading@10: * audio encoder psychoacoustic model yading@10: * Copyright (C) 2008 Konstantin Shishkov yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: #ifndef AVCODEC_PSYMODEL_H yading@10: #define AVCODEC_PSYMODEL_H yading@10: yading@10: #include "avcodec.h" yading@10: yading@10: /** maximum possible number of bands */ yading@10: #define PSY_MAX_BANDS 128 yading@10: /** maximum number of channels */ yading@10: #define PSY_MAX_CHANS 20 yading@10: yading@10: #define AAC_CUTOFF(s) (s->bit_rate ? FFMIN3(4000 + s->bit_rate/8, 12000 + s->bit_rate/32, s->sample_rate / 2) : (s->sample_rate / 2)) yading@10: yading@10: /** yading@10: * single band psychoacoustic information yading@10: */ yading@10: typedef struct FFPsyBand { yading@10: int bits; yading@10: float energy; yading@10: float threshold; yading@10: float distortion; yading@10: float perceptual_weight; yading@10: } FFPsyBand; yading@10: yading@10: /** yading@10: * single channel psychoacoustic information yading@10: */ yading@10: typedef struct FFPsyChannel { yading@10: FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information yading@10: float entropy; ///< total PE for this channel yading@10: } FFPsyChannel; yading@10: yading@10: /** yading@10: * psychoacoustic information for an arbitrary group of channels yading@10: */ yading@10: typedef struct FFPsyChannelGroup { yading@10: FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group yading@10: uint8_t num_ch; ///< number of channels in this group yading@10: uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group yading@10: } FFPsyChannelGroup; yading@10: yading@10: /** yading@10: * windowing related information yading@10: */ yading@10: typedef struct FFPsyWindowInfo { yading@10: int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next yading@10: int window_shape; ///< window shape (sine/KBD/whatever) yading@10: int num_windows; ///< number of windows in a frame yading@10: int grouping[8]; ///< window grouping (for e.g. AAC) yading@10: int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA) yading@10: } FFPsyWindowInfo; yading@10: yading@10: /** yading@10: * context used by psychoacoustic model yading@10: */ yading@10: typedef struct FFPsyContext { yading@10: AVCodecContext *avctx; ///< encoder context yading@10: const struct FFPsyModel *model; ///< encoder-specific model functions yading@10: yading@10: FFPsyChannel *ch; ///< single channel information yading@10: FFPsyChannelGroup *group; ///< channel group information yading@10: int num_groups; ///< number of channel groups yading@10: yading@10: uint8_t **bands; ///< scalefactor band sizes for possible frame sizes yading@10: int *num_bands; ///< number of scalefactor bands for possible frame sizes yading@10: int num_lens; ///< number of scalefactor band sets yading@10: yading@10: struct { yading@10: int size; ///< size of the bitresevoir in bits yading@10: int bits; ///< number of bits used in the bitresevoir yading@10: } bitres; yading@10: yading@10: void* model_priv_data; ///< psychoacoustic model implementation private data yading@10: } FFPsyContext; yading@10: yading@10: /** yading@10: * codec-specific psychoacoustic model implementation yading@10: */ yading@10: typedef struct FFPsyModel { yading@10: const char *name; yading@10: int (*init) (FFPsyContext *apc); yading@10: yading@10: /** yading@10: * Suggest window sequence for channel. yading@10: * yading@10: * @param ctx model context yading@10: * @param audio samples for the current frame yading@10: * @param la lookahead samples (NULL when unavailable) yading@10: * @param channel number of channel element to analyze yading@10: * @param prev_type previous window type yading@10: * yading@10: * @return suggested window information in a structure yading@10: */ yading@10: FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type); yading@10: yading@10: /** yading@10: * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. yading@10: * yading@10: * @param ctx model context yading@10: * @param channel channel number of the first channel in the group to perform analysis on yading@10: * @param coeffs array of pointers to the transformed coefficients yading@10: * @param wi window information for the channels in the group yading@10: */ yading@10: void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); yading@10: yading@10: void (*end) (FFPsyContext *apc); yading@10: } FFPsyModel; yading@10: yading@10: /** yading@10: * Initialize psychoacoustic model. yading@10: * yading@10: * @param ctx model context yading@10: * @param avctx codec context yading@10: * @param num_lens number of possible frame lengths yading@10: * @param bands scalefactor band lengths for all frame lengths yading@10: * @param num_bands number of scalefactor bands for all frame lengths yading@10: * @param num_groups number of channel groups yading@10: * @param group_map array with # of channels in group - 1, for each group yading@10: * yading@10: * @return zero if successful, a negative value if not yading@10: */ yading@10: int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, yading@10: const uint8_t **bands, const int *num_bands, yading@10: int num_groups, const uint8_t *group_map); yading@10: yading@10: /** yading@10: * Determine what group a channel belongs to. yading@10: * yading@10: * @param ctx psymodel context yading@10: * @param channel channel to locate the group for yading@10: * yading@10: * @return pointer to the FFPsyChannelGroup this channel belongs to yading@10: */ yading@10: FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); yading@10: yading@10: /** yading@10: * Cleanup model context at the end. yading@10: * yading@10: * @param ctx model context yading@10: */ yading@10: void ff_psy_end(FFPsyContext *ctx); yading@10: yading@10: yading@10: /************************************************************************** yading@10: * Audio preprocessing stuff. * yading@10: * This should be moved into some audio filter eventually. * yading@10: **************************************************************************/ yading@10: struct FFPsyPreprocessContext; yading@10: yading@10: /** yading@10: * psychoacoustic model audio preprocessing initialization yading@10: */ yading@10: struct FFPsyPreprocessContext *ff_psy_preprocess_init(AVCodecContext *avctx); yading@10: yading@10: /** yading@10: * Preprocess several channel in audio frame in order to compress it better. yading@10: * yading@10: * @param ctx preprocessing context yading@10: * @param audio samples to be filtered (in place) yading@10: * @param channels number of channel to preprocess yading@10: */ yading@10: void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels); yading@10: yading@10: /** yading@10: * Cleanup audio preprocessing module. yading@10: */ yading@10: void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); yading@10: yading@10: #endif /* AVCODEC_PSYMODEL_H */