wmavoice.c File Reference

Windows Media Audio Voice compatible decoder. More...

#include <math.h>
#include "libavutil/channel_layout.h"
#include "libavutil/float_dsp.h"
#include "libavutil/mem.h"
#include "avcodec.h"
#include "internal.h"
#include "get_bits.h"
#include "put_bits.h"
#include "wmavoice_data.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
#include "dct.h"
#include "rdft.h"
#include "sinewin.h"
Include dependency graph for wmavoice.c:

Go to the source code of this file.

Data Structures

struct  frame_type_desc
 Description of frame types. More...
 
struct  WMAVoiceContext
 WMA Voice decoding context. More...
 

Macros

#define MAX_BLOCKS   8
 maximum number of blocks per frame More...
 
#define MAX_LSPS   16
 maximum filter order More...
 
#define MAX_LSPS_ALIGN16   16
 same as MAX_LSPS; needs to be multiple More...
 
#define MAX_FRAMES   3
 maximum number of frames per superframe More...
 
#define MAX_FRAMESIZE   160
 maximum number of samples per frame More...
 
#define MAX_SIGNAL_HISTORY   416
 maximum excitation signal history More...
 
#define MAX_SFRAMESIZE   (MAX_FRAMESIZE * MAX_FRAMES)
 maximum number of samples per superframe More...
 
#define SFRAME_CACHE_MAXSIZE   256
 maximum cache size for frame data that More...
 
#define VLC_NBITS   6
 number of bits to read per VLC iteration More...
 
#define log_range(var, assign)
 

Enumerations

enum  { ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 }
 Adaptive codebook types. More...
 
enum  { FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 }
 Fixed codebook types. More...
 

Functions

static av_cold int decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25])
 Set up the variable bit mode (VBM) tree from container extradata. More...
 
static av_cold int wmavoice_decode_init (AVCodecContext *ctx)
 Set up decoder with parameters from demuxer (extradata etc.). More...
 
static void dequant_lsps (double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
 Dequantize LSPs. More...
 
static int pRNG (int frame_cntr, int block_num, int block_size)
 Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries). More...
 
static void synth_block_hardcoded (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
 Parse hardcoded signal for a single block. More...
 
static void synth_block_fcb_acb (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
 Parse FCB/ACB signal for a single block. More...
 
static void synth_block (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
 Parse data in a single block. More...
 
static int synth_frame (AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
 Synthesize output samples for a single frame. More...
 
static void stabilize_lsps (double *lsps, int num)
 Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering. More...
 
static int check_bits_for_superframe (GetBitContext *orig_gb, WMAVoiceContext *s)
 Test if there's enough bits to read 1 superframe. More...
 
static int synth_superframe (AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
 Synthesize output samples for a single superframe. More...
 
static int parse_packet_header (WMAVoiceContext *s)
 Parse the packet header at the start of each packet (input data to this decoder). More...
 
static void copy_bits (PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
 Copy (unaligned) bits from gb/data/size to pb. More...
 
static int wmavoice_decode_packet (AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)
 Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output). More...
 
static av_cold int wmavoice_decode_end (AVCodecContext *ctx)
 
static av_cold void wmavoice_flush (AVCodecContext *ctx)
 
Postfilter functions

Postfilter functions (gain control, wiener denoise filter, DC filter, kalman smoothening, plus surrounding code to wrap it)

static void adaptive_gain_control (float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
 Adaptive gain control (as used in postfilter). More...
 
static int kalman_smoothen (WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
 Kalman smoothing function. More...
 
static float tilt_factor (const float *lpcs, int n_lpcs)
 Get the tilt factor of a formant filter from its transfer function. More...
 
static void calc_input_response (WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
 Derive denoise filter coefficients (in real domain) from the LPCs. More...
 
static void wiener_denoise (WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
 This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it. More...
 
static void postfilter (WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
 Averaging projection filter, the postfilter used in WMAVoice. More...
 
LSP dequantization routines

LSP dequantization routines, for 10/16LSPs and independent/residual coding.

Note
we assume enough bits are available, caller should check. lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits; lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
static void dequant_lsp10i (GetBitContext *gb, double *lsps)
 Parse 10 independently-coded LSPs. More...
 
static void dequant_lsp10r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
 Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). More...
 
static void dequant_lsp16i (GetBitContext *gb, double *lsps)
 Parse 16 independently-coded LSPs. More...
 
static void dequant_lsp16r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
 Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). More...
 
Pitch-adaptive window coding functions

The next few functions are for pitch-adaptive window coding.

static void aw_parse_coords (WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
 Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame. More...
 
static void aw_pulse_set2 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
 Apply second set of pitch-adaptive window pulses. More...
 
static void aw_pulse_set1 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
 Apply first set of pitch-adaptive window pulses. More...
 

Variables

static VLC frame_type_vlc
 Frame type VLC coding. More...
 
static const struct frame_type_desc frame_descs [17]
 
AVCodec ff_wmavoice_decoder
 

Detailed Description

Windows Media Audio Voice compatible decoder.

Author
Ronald S. Bultje rsbul.nosp@m.tje@.nosp@m.gmail.nosp@m..com

Definition in file wmavoice.c.

Macro Definition Documentation

#define log_range (   var,
  assign 
)
Value:
do { \
float tmp = log10f(assign); var = tmp; \
max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
} while (0)
if max(w)>1 w=0.9 *w/max(w)
#define FFMAX(a, b)
Definition: common.h:56
#define FFMIN(a, b)
Definition: common.h:58
#define log10f(x)
Definition: libm.h:132
float min

Referenced by calc_input_response().

#define MAX_BLOCKS   8

maximum number of blocks per frame

Definition at line 46 of file wmavoice.c.

Referenced by synth_frame().

#define MAX_FRAMES   3

maximum number of frames per superframe

Definition at line 50 of file wmavoice.c.

Referenced by check_bits_for_superframe(), and synth_superframe().

#define MAX_FRAMESIZE   160

maximum number of samples per frame

Definition at line 51 of file wmavoice.c.

Referenced by aw_parse_coords(), aw_pulse_set1(), aw_pulse_set2(), postfilter(), synth_block_fcb_acb(), synth_block_hardcoded(), synth_frame(), and synth_superframe().

#define MAX_LSPS   16

maximum filter order

Definition at line 47 of file wmavoice.c.

Referenced by synth_block(), synth_frame(), synth_superframe(), and wmavoice_flush().

#define MAX_LSPS_ALIGN16   16

same as MAX_LSPS; needs to be multiple

of 16 for ASM input buffer alignment

Definition at line 48 of file wmavoice.c.

Referenced by postfilter(), and wmavoice_flush().

#define MAX_SFRAMESIZE   (MAX_FRAMESIZE * MAX_FRAMES)

maximum number of samples per superframe

Definition at line 53 of file wmavoice.c.

Referenced by synth_superframe().

#define MAX_SIGNAL_HISTORY   416

maximum excitation signal history

Definition at line 52 of file wmavoice.c.

Referenced by synth_superframe(), wmavoice_decode_init(), and wmavoice_flush().

#define SFRAME_CACHE_MAXSIZE   256

maximum cache size for frame data that

was split over two packets

Definition at line 55 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

#define VLC_NBITS   6

number of bits to read per VLC iteration

Definition at line 57 of file wmavoice.c.

Referenced by decode_vbmtree().

Enumeration Type Documentation

anonymous enum

Adaptive codebook types.

Enumerator
ACB_TYPE_NONE 

no adaptive codebook (only hardcoded fixed)

ACB_TYPE_ASYMMETRIC 

adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.

Signal is generated using an asymmetric sinc window function

Note
see wmavoice_ipol1_coeffs
ACB_TYPE_HAMMING 

Per-block pitch with signal generation using a Hamming sinc window function.

Note
see wmavoice_ipol2_coeffs

Definition at line 67 of file wmavoice.c.

anonymous enum

Fixed codebook types.

Enumerator
FCB_TYPE_SILENCE 

comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain values

FCB_TYPE_HARDCODED 

hardcoded (fixed) codebook with per-block gain values

FCB_TYPE_AW_PULSES 

Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.

FCB_TYPE_EXC_PULSES 

Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.

Definition at line 82 of file wmavoice.c.

Function Documentation

static void adaptive_gain_control ( float *  out,
const float *  in,
const float *  speech_synth,
int  size,
float  alpha,
float *  gain_mem 
)
static

Adaptive gain control (as used in postfilter).

Identical to ff_adaptive_gain_control() in acelp_vectors.c, except that the energy here is calculated using sum(abs(...)), whereas the other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).

Parameters
outoutput buffer for filtered samples
ininput buffer containing the samples as they are after the postfilter steps so far
speech_synthinput buffer containing speech synth before postfilter
sizeinput buffer size
alphaexponential filter factor
gain_mempointer to filter memory (single float)

Definition at line 469 of file wmavoice.c.

Referenced by postfilter().

static void aw_parse_coords ( WMAVoiceContext s,
GetBitContext gb,
const int *  pitch 
)
static

Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame.

Parameters
sWMA Voice decoding context private data
gbbit I/O context
pitchpitch for each block in this frame

Definition at line 998 of file wmavoice.c.

Referenced by synth_frame().

static void aw_pulse_set1 ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
AMRFixed fcb 
)
static

Apply first set of pitch-adaptive window pulses.

Parameters
sWMA Voice decoding context private data
gbbit I/O context
block_idxblock index in frame [0, 1]
fcbstorage location for fixed codebook pulse info

Definition at line 1138 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

static void aw_pulse_set2 ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
AMRFixed fcb 
)
static

Apply second set of pitch-adaptive window pulses.

Parameters
sWMA Voice decoding context private data
gbbit I/O context
block_idxblock index in frame [0, 1]
fcbstructure containing fixed codebook vector info

Definition at line 1049 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

static void calc_input_response ( WMAVoiceContext s,
float *  lpcs,
int  fcb_type,
float *  coeffs,
int  remainder 
)
static

Derive denoise filter coefficients (in real domain) from the LPCs.

Definition at line 568 of file wmavoice.c.

Referenced by wiener_denoise().

static int check_bits_for_superframe ( GetBitContext orig_gb,
WMAVoiceContext s 
)
static

Test if there's enough bits to read 1 superframe.

Parameters
orig_gbbit I/O context used for reading. This function does not modify the state of the bitreader; it only uses it to copy the current stream position
sWMA Voice decoding context private data
Returns
-1 if unsupported, 1 on not enough bits or 0 if OK.

Definition at line 1647 of file wmavoice.c.

Referenced by synth_superframe().

static void copy_bits ( PutBitContext pb,
const uint8_t data,
int  size,
GetBitContext gb,
int  nbits 
)
static

Copy (unaligned) bits from gb/data/size to pb.

Parameters
pbtarget buffer to copy bits into
datasource buffer to copy bits from
sizesize of the source data, in bytes
gbbit I/O context specifying the current position in the source. data. This function might use this to align the bit position to a whole-byte boundary before calling avpriv_copy_bits() on aligned source data
nbitsthe amount of bits to copy from source to target
Note
after calling this function, the current position in the input bit I/O context is undefined.

Definition at line 1898 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static av_cold int decode_vbmtree ( GetBitContext gb,
int8_t  vbm_tree[25] 
)
static

Set up the variable bit mode (VBM) tree from container extradata.

Parameters
gbbit I/O context. The bit context (s->gb) should be loaded with byte 23-46 of the container extradata (i.e. the ones containing the VBM tree).
vbm_treepointer to array to which the decoded VBM tree will be written.
Returns
0 on success, <0 on error.

Definition at line 304 of file wmavoice.c.

Referenced by wmavoice_decode_init().

static void dequant_lsp10i ( GetBitContext gb,
double *  lsps 
)
static

Parse 10 independently-coded LSPs.

Definition at line 853 of file wmavoice.c.

Referenced by dequant_lsp10r(), and synth_superframe().

static void dequant_lsp10r ( GetBitContext gb,
double *  i_lsps,
const double *  old,
double *  a1,
double *  a2,
int  q_mode 
)
static

Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 879 of file wmavoice.c.

Referenced by synth_superframe().

static void dequant_lsp16i ( GetBitContext gb,
double *  lsps 
)
static

Parse 16 independently-coded LSPs.

Definition at line 915 of file wmavoice.c.

Referenced by dequant_lsp16r(), and synth_superframe().

static void dequant_lsp16r ( GetBitContext gb,
double *  i_lsps,
const double *  old,
double *  a1,
double *  a2,
int  q_mode 
)
static

Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 948 of file wmavoice.c.

Referenced by synth_superframe().

static void dequant_lsps ( double *  lsps,
int  num,
const uint16_t *  values,
const uint16_t *  sizes,
int  n_stages,
const uint8_t table,
const double *  mul_q,
const double *  base_q 
)
static

Dequantize LSPs.

Parameters
lspsoutput pointer to the array that will hold the LSPs
numnumber of LSPs to be dequantized
valuesquantized values, contains n_stages values
sizesrange (i.e. max value) of each quantized value
n_stagesnumber of dequantization runs
tabledequantization table to be used
mul_qLSF multiplier
base_qbase (lowest) LSF values

Definition at line 821 of file wmavoice.c.

Referenced by dequant_lsp10i(), dequant_lsp10r(), dequant_lsp16i(), and dequant_lsp16r().

static int kalman_smoothen ( WMAVoiceContext s,
int  pitch,
const float *  in,
float *  out,
int  size 
)
static

Kalman smoothing function.

This function looks back pitch +/- 3 samples back into history to find the best fitting curve (that one giving the optimal gain of the two signals, i.e. the highest dot product between the two), and then uses that signal history to smoothen the output of the speech synthesis filter.

Parameters
sWMA Voice decoding context
pitchpitch of the speech signal
ininput speech signal
outoutput pointer for smoothened signal
sizeinput/output buffer size
Returns
-1 if no smoothening took place, e.g. because no optimal fit could be found, or 0 on success.

Definition at line 509 of file wmavoice.c.

Referenced by postfilter().

static int parse_packet_header ( WMAVoiceContext s)
static

Parse the packet header at the start of each packet (input data to this decoder).

Parameters
sWMA Voice decoding context private data
Returns
1 if not enough bits were available, or 0 on success.

Definition at line 1863 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static void postfilter ( WMAVoiceContext s,
const float *  synth,
float *  samples,
int  size,
const float *  lpcs,
float *  zero_exc_pf,
int  fcb_type,
int  pitch 
)
static

Averaging projection filter, the postfilter used in WMAVoice.

This uses the following steps:

  • A zero-synthesis filter (generate excitation from synth signal)
  • Kalman smoothing on excitation, based on pitch
  • Re-synthesized smoothened output
  • Iterative Wiener denoise filter
  • Adaptive gain filter
  • DC filter
Parameters
sWMAVoice decoding context
synthSpeech synthesis output (before postfilter)
samplesOutput buffer for filtered samples
sizeBuffer size of synth & samples
lpcsGenerated LPCs used for speech synthesis
zero_exc_pfdestination for zero synthesis filter (16-byte aligned)
fcb_typeFrame type (silence, hardcoded, AW-pulses or FCB-pulses)
pitchPitch of the input signal

Definition at line 767 of file wmavoice.c.

Referenced by synth_frame().

static int pRNG ( int  frame_cntr,
int  block_num,
int  block_size 
)
static

Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).

Parameters
frame_cntrcurrent frame number
block_numcurrent block index
block_sizeamount of entries we want to read from a table that has 1000 entries
Returns
a (non-)random number in the [0, 1000 - block_size] range.

Definition at line 1199 of file wmavoice.c.

Referenced by synth_block_hardcoded().

static void stabilize_lsps ( double *  lsps,
int  num 
)
static

Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.

Parameters
lspsarray of LSPs
numsize of LSP array
Note
basically a double version of ff_acelp_reorder_lsf(), might be useful to put in a generic location later on. Parts are also present in ff_set_min_dist_lsf() + ff_sort_nearly_sorted_floats(), which is in float.

Definition at line 1609 of file wmavoice.c.

Referenced by synth_superframe().

static void synth_block ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
int  block_pitch_sh2,
const double *  lsps,
const double *  prev_lsps,
const struct frame_type_desc frame_desc,
float *  excitation,
float *  synth 
)
static

Parse data in a single block.

Note
we assume enough bits are available, caller should check.
Parameters
sWMA Voice decoding context private data
gbbit I/O context
block_idxindex of the to-be-read block
sizeamount of samples to be read in this block
block_pitch_sh2pitch for this block << 2
lspsLSPs for (the end of) this frame
prev_lspsLSPs for the last frame
frame_descframe type descriptor
excitationtarget memory for the ACB+FCB interpolated signal
synthtarget memory for the speech synthesis filter output
Returns
0 on success, <0 on error.

Definition at line 1390 of file wmavoice.c.

Referenced by synth_frame().

static void synth_block_fcb_acb ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
int  block_pitch_sh2,
const struct frame_type_desc frame_desc,
float *  excitation 
)
static

Parse FCB/ACB signal for a single block.

Note
see synth_block().

Definition at line 1266 of file wmavoice.c.

Referenced by synth_block().

static void synth_block_hardcoded ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
const struct frame_type_desc frame_desc,
float *  excitation 
)
static

Parse hardcoded signal for a single block.

Note
see synth_block().

Definition at line 1235 of file wmavoice.c.

Referenced by synth_block().

static int synth_frame ( AVCodecContext ctx,
GetBitContext gb,
int  frame_idx,
float *  samples,
const double *  lsps,
const double *  prev_lsps,
float *  excitation,
float *  synth 
)
static

Synthesize output samples for a single frame.

Note
we assume enough bits are available, caller should check.
Parameters
ctxWMA Voice decoder context
gbbit I/O context (s->gb or one for cross-packet superframes)
frame_idxFrame number within superframe [0-2]
samplespointer to output sample buffer, has space for at least 160 samples
lspsLSP array
prev_lspsarray of previous frame's LSPs
excitationtarget buffer for excitation signal
synthtarget buffer for synthesized speech data
Returns
0 on success, <0 on error.

Definition at line 1433 of file wmavoice.c.

Referenced by synth_superframe().

static int synth_superframe ( AVCodecContext ctx,
AVFrame frame,
int *  got_frame_ptr 
)
static

Synthesize output samples for a single superframe.

If we have any data cached in s->sframe_cache, that will be used instead of whatever is loaded in s->gb.

WMA Voice superframes contain 3 frames, each containing 160 audio samples, to give a total of 480 samples per frame. See synth_frame() for frame parsing. In addition to 3 frames, superframes can also contain the LSPs (if these are globally specified for all frames (residually); they can also be specified individually per-frame. See the s->has_residual_lsps option), and can specify the number of samples encoded in this superframe (if less than 480), usually used to prevent blanks at track boundaries.

Parameters
ctxWMA Voice decoder context
Returns
0 on success, <0 on error or 1 if there was not enough data to fully parse the superframe

Definition at line 1732 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static float tilt_factor ( const float *  lpcs,
int  n_lpcs 
)
static

Get the tilt factor of a formant filter from its transfer function.

See also
tilt_factor() in amrnbdec.c, which does essentially the same, but somehow (??) it does a speech synthesis filter in the middle, which is missing here
Parameters
lpcsLPC coefficients
n_lpcsSize of LPC buffer
Returns
the tilt factor

Definition at line 555 of file wmavoice.c.

Referenced by calc_input_response(), and wiener_denoise().

static void wiener_denoise ( WMAVoiceContext s,
int  fcb_type,
float *  synth_pf,
int  size,
const float *  lpcs 
)
static

This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.

  • take RDFT of LPCs to get the power spectrum of the noise + speech;
  • using this power spectrum, calculate (for each frequency) the Wiener filter gain, which depends on the frequency power and desired level of noise subtraction (when set too high, this leads to artifacts) We can do this symmetrically over the X-axis (so 0-4kHz is the inverse of 4-8kHz);
  • by doing a phase shift, calculate the Hilbert transform of this array of per-frequency filter-gains to get the filtering coefficients;
  • smoothen/normalize/de-tilt these filter coefficients as desired;
  • take RDFT of noisy sound, apply the coefficients and take its IRDFT to get the denoised speech signal;
  • the leftover (i.e. output of the IRDFT on denoised speech data beyond the frame boundary) are saved and applied to subsequent frames by an overlap-add method (otherwise you get clicking-artifacts).
Parameters
sWMA Voice decoding context
fcb_typeFrame (codebook) type
synth_pfinput: the noisy speech signal, output: denoised speech data; should be 16-byte aligned (for ASM purposes)
sizesize of the speech data
lpcsLPCs used to synthesize this frame's speech data

Definition at line 685 of file wmavoice.c.

Referenced by postfilter().

static av_cold int wmavoice_decode_end ( AVCodecContext ctx)
static

Definition at line 2002 of file wmavoice.c.

static av_cold int wmavoice_decode_init ( AVCodecContext ctx)
static

Set up decoder with parameters from demuxer (extradata etc.).

Extradata layout:

  • byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
  • byte 19-22: flags field (annoyingly in LE; see below for known values),
  • byte 23-46: variable bitmode tree (really just 17 * 3 bits, rest is 0).

Definition at line 338 of file wmavoice.c.

static int wmavoice_decode_packet ( AVCodecContext ctx,
void data,
int *  got_frame_ptr,
AVPacket avpkt 
)
static

Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).

Every packet has a size of ctx->block_align bytes, starts with a packet header (see parse_packet_header()), and then a series of superframes. Superframe boundaries may exceed packets, i.e. superframes can split data over multiple (two) packets.

For more information about frames, see synth_superframe().

Definition at line 1927 of file wmavoice.c.

static av_cold void wmavoice_flush ( AVCodecContext ctx)
static

Definition at line 2016 of file wmavoice.c.

Variable Documentation

AVCodec ff_wmavoice_decoder
Initial value:
= {
.name = "wmavoice",
.priv_data_size = sizeof(WMAVoiceContext),
.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
}
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
#define CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:2016
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
WMA Voice decoding context.
Definition: wmavoice.c:132
static void flush(AVCodecContext *avctx)
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:338
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1927
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:375
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2002
#define CODEC_CAP_SUBFRAMES
Codec can output multiple frames per AVPacket Normally demuxers return one frame at a time...
static int decode(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
Definition: crystalhd.c:868

Definition at line 2044 of file wmavoice.c.

const struct frame_type_desc frame_descs[17]
static
Initial value:
= {
{ 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
{ 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
}
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:83
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:86
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:88
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:68
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:74
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs...
Definition: wmavoice.c:90
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:69

Referenced by check_bits_for_superframe(), and synth_frame().

VLC frame_type_vlc
static

Frame type VLC coding.

Definition at line 62 of file wmavoice.c.