Windows Media Audio Voice compatible decoder. More...

#include <math.h>
#include "libavutil/channel_layout.h"
#include "libavutil/float_dsp.h"
#include "libavutil/mem.h"
#include "avcodec.h"
#include "internal.h"
#include "get_bits.h"
#include "put_bits.h"
#include "wmavoice_data.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
#include "dct.h"
#include "rdft.h"
#include "sinewin.h"

Include dependency graph for wmavoice.c:

Go to the source code of this file.

Data Structures
struct	frame_type_desc
	Description of frame types. More...

struct	WMAVoiceContext
	WMA Voice decoding context. More...

Macros
#define	MAX_BLOCKS 8
	maximum number of blocks per frame More...

#define	MAX_LSPS 16
	maximum filter order More...

#define	MAX_LSPS_ALIGN16 16
	same as MAX_LSPS; needs to be multiple More...

#define	MAX_FRAMES 3
	maximum number of frames per superframe More...

#define	MAX_FRAMESIZE 160
	maximum number of samples per frame More...

#define	MAX_SIGNAL_HISTORY 416
	maximum excitation signal history More...

#define	MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
	maximum number of samples per superframe More...

#define	SFRAME_CACHE_MAXSIZE 256
	maximum cache size for frame data that More...

#define	VLC_NBITS 6
	number of bits to read per VLC iteration More...

#define	log_range(var, assign)

Enumerations
enum	{ ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 }
	Adaptive codebook types. More...

enum	{ FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 }
	Fixed codebook types. More...

Functions
static av_cold int	decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25])
	Set up the variable bit mode (VBM) tree from container extradata. More...

static av_cold int	wmavoice_decode_init (AVCodecContext *ctx)
	Set up decoder with parameters from demuxer (extradata etc.). More...

static void	dequant_lsps (double lsps, int num, const uint16_t values, const uint16_t sizes, int n_stages, const uint8_t table, const double mul_q, const double base_q)
	Dequantize LSPs. More...

static int	pRNG (int frame_cntr, int block_num, int block_size)
	Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries). More...

static void	synth_block_hardcoded (WMAVoiceContext s, GetBitContext gb, int block_idx, int size, const struct frame_type_desc frame_desc, float excitation)
	Parse hardcoded signal for a single block. More...

static void	synth_block_fcb_acb (WMAVoiceContext s, GetBitContext gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc frame_desc, float excitation)
	Parse FCB/ACB signal for a single block. More...

static void	synth_block (WMAVoiceContext s, GetBitContext gb, int block_idx, int size, int block_pitch_sh2, const double lsps, const double prev_lsps, const struct frame_type_desc frame_desc, float excitation, float *synth)
	Parse data in a single block. More...

static int	synth_frame (AVCodecContext ctx, GetBitContext gb, int frame_idx, float samples, const double lsps, const double prev_lsps, float excitation, float *synth)
	Synthesize output samples for a single frame. More...

static void	stabilize_lsps (double *lsps, int num)
	Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering. More...

static int	check_bits_for_superframe (GetBitContext orig_gb, WMAVoiceContext s)
	Test if there's enough bits to read 1 superframe. More...

static int	synth_superframe (AVCodecContext ctx, AVFrame frame, int *got_frame_ptr)
	Synthesize output samples for a single superframe. More...

static int	parse_packet_header (WMAVoiceContext *s)
	Parse the packet header at the start of each packet (input data to this decoder). More...

static void	copy_bits (PutBitContext pb, const uint8_t data, int size, GetBitContext *gb, int nbits)
	Copy (unaligned) bits from gb/data/size to pb. More...

static int	wmavoice_decode_packet (AVCodecContext ctx, void data, int got_frame_ptr, AVPacket avpkt)
	Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output). More...

static av_cold int	wmavoice_decode_end (AVCodecContext *ctx)

static av_cold void	wmavoice_flush (AVCodecContext *ctx)

Postfilter functions
Postfilter functions (gain control, wiener denoise filter, DC filter, kalman smoothening, plus surrounding code to wrap it)
static void	adaptive_gain_control (float out, const float in, const float speech_synth, int size, float alpha, float gain_mem)
	Adaptive gain control (as used in postfilter). More...

static int	kalman_smoothen (WMAVoiceContext s, int pitch, const float in, float *out, int size)
	Kalman smoothing function. More...

static float	tilt_factor (const float *lpcs, int n_lpcs)
	Get the tilt factor of a formant filter from its transfer function. More...

static void	calc_input_response (WMAVoiceContext s, float lpcs, int fcb_type, float *coeffs, int remainder)
	Derive denoise filter coefficients (in real domain) from the LPCs. More...

static void	wiener_denoise (WMAVoiceContext s, int fcb_type, float synth_pf, int size, const float *lpcs)
	This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it. More...

static void	postfilter (WMAVoiceContext s, const float synth, float samples, int size, const float lpcs, float *zero_exc_pf, int fcb_type, int pitch)
	Averaging projection filter, the postfilter used in WMAVoice. More...

LSP dequantization routines
LSP dequantization routines, for 10/16LSPs and independent/residual coding. Note we assume enough bits are available, caller should check. lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits; lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
static void	dequant_lsp10i (GetBitContext gb, double lsps)
	Parse 10 independently-coded LSPs. More...

static void	dequant_lsp10r (GetBitContext gb, double i_lsps, const double old, double a1, double *a2, int q_mode)
	Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). More...

static void	dequant_lsp16i (GetBitContext gb, double lsps)
	Parse 16 independently-coded LSPs. More...

static void	dequant_lsp16r (GetBitContext gb, double i_lsps, const double old, double a1, double *a2, int q_mode)
	Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). More...

Pitch-adaptive window coding functions
The next few functions are for pitch-adaptive window coding.
static void	aw_parse_coords (WMAVoiceContext s, GetBitContext gb, const int *pitch)
	Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame. More...

static void	aw_pulse_set2 (WMAVoiceContext s, GetBitContext gb, int block_idx, AMRFixed *fcb)
	Apply second set of pitch-adaptive window pulses. More...

static void	aw_pulse_set1 (WMAVoiceContext s, GetBitContext gb, int block_idx, AMRFixed *fcb)
	Apply first set of pitch-adaptive window pulses. More...

Variables
static VLC	frame_type_vlc
	Frame type VLC coding. More...

static const struct frame_type_desc	frame_descs [17]

AVCodec	ff_wmavoice_decoder

Detailed Description

Windows Media Audio Voice compatible decoder.

Author: Ronald S. Bultje rsbul.nosp@m.tje@.nosp@m.gmail.nosp@m..com

Definition in file wmavoice.c.

Macro Definition Documentation

#define log_range	(	var,
		assign
	)

Value:

do { \
        float tmp = log10f(assign);  var = tmp; \
        max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
    } while (0)

Referenced by calc_input_response().

#define MAX_BLOCKS 8

maximum number of blocks per frame

Definition at line 46 of file wmavoice.c.

Referenced by synth_frame().

#define MAX_FRAMES 3

maximum number of frames per superframe

Definition at line 50 of file wmavoice.c.

Referenced by check_bits_for_superframe(), and synth_superframe().

#define MAX_FRAMESIZE 160

maximum number of samples per frame

Definition at line 51 of file wmavoice.c.

Referenced by aw_parse_coords(), aw_pulse_set1(), aw_pulse_set2(), postfilter(), synth_block_fcb_acb(), synth_block_hardcoded(), synth_frame(), and synth_superframe().

#define MAX_LSPS 16

maximum filter order

Definition at line 47 of file wmavoice.c.

Referenced by synth_block(), synth_frame(), synth_superframe(), and wmavoice_flush().

#define MAX_LSPS_ALIGN16 16

same as MAX_LSPS; needs to be multiple

of 16 for ASM input buffer alignment

Definition at line 48 of file wmavoice.c.

Referenced by postfilter(), and wmavoice_flush().

#define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)

maximum number of samples per superframe

Definition at line 53 of file wmavoice.c.

Referenced by synth_superframe().

#define MAX_SIGNAL_HISTORY 416

maximum excitation signal history

Definition at line 52 of file wmavoice.c.

Referenced by synth_superframe(), wmavoice_decode_init(), and wmavoice_flush().

#define SFRAME_CACHE_MAXSIZE 256

maximum cache size for frame data that

was split over two packets

Definition at line 55 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

#define VLC_NBITS 6

number of bits to read per VLC iteration

Definition at line 57 of file wmavoice.c.

Referenced by decode_vbmtree().

Enumeration Type Documentation

anonymous enum

Adaptive codebook types.

Enumerator

ACB_TYPE_NONE

no adaptive codebook (only hardcoded fixed)

ACB_TYPE_ASYMMETRIC

adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.

Signal is generated using an asymmetric sinc window function

Note: see wmavoice_ipol1_coeffs

ACB_TYPE_HAMMING

Per-block pitch with signal generation using a Hamming sinc window function.

Note: see wmavoice_ipol2_coeffs

Definition at line 67 of file wmavoice.c.

anonymous enum

Fixed codebook types.

Enumerator
FCB_TYPE_SILENCE	comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain values
FCB_TYPE_HARDCODED	hardcoded (fixed) codebook with per-block gain values
FCB_TYPE_AW_PULSES	Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
FCB_TYPE_EXC_PULSES	Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.

Definition at line 82 of file wmavoice.c.

Function Documentation

static void adaptive_gain_control	(	float *	out,
		const float *	in,
		const float *	speech_synth,
		int	size,
		float	alpha,
		float *	gain_mem
	)

static

Adaptive gain control (as used in postfilter).

Identical to ff_adaptive_gain_control() in acelp_vectors.c, except that the energy here is calculated using sum(abs(...)), whereas the other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).

Parameters

out	output buffer for filtered samples
in	input buffer containing the samples as they are after the postfilter steps so far
speech_synth	input buffer containing speech synth before postfilter
size	input buffer size
alpha	exponential filter factor
gain_mem	pointer to filter memory (single float)

Definition at line 469 of file wmavoice.c.

Referenced by postfilter().

static void aw_parse_coords	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		const int *	pitch
	)

static

Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame.

Parameters

s	WMA Voice decoding context private data
gb	bit I/O context
pitch	pitch for each block in this frame

Definition at line 998 of file wmavoice.c.

Referenced by synth_frame().

static void aw_pulse_set1	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		AMRFixed *	fcb
	)

static

Apply first set of pitch-adaptive window pulses.

Parameters

s	WMA Voice decoding context private data
gb	bit I/O context
block_idx	block index in frame [0, 1]
fcb	storage location for fixed codebook pulse info

Definition at line 1138 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

static void aw_pulse_set2	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		AMRFixed *	fcb
	)

static

Apply second set of pitch-adaptive window pulses.

Parameters

s	WMA Voice decoding context private data
gb	bit I/O context
block_idx	block index in frame [0, 1]
fcb	structure containing fixed codebook vector info

Definition at line 1049 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

static void calc_input_response	(	WMAVoiceContext *	s,
		float *	lpcs,
		int	fcb_type,
		float *	coeffs,
		int	remainder
	)

static

Derive denoise filter coefficients (in real domain) from the LPCs.

Definition at line 568 of file wmavoice.c.

Referenced by wiener_denoise().

static int check_bits_for_superframe	(	GetBitContext *	orig_gb,
		WMAVoiceContext *	s
	)

static

Test if there's enough bits to read 1 superframe.

Parameters

orig_gb	bit I/O context used for reading. This function does not modify the state of the bitreader; it only uses it to copy the current stream position
s	WMA Voice decoding context private data

Returns: -1 if unsupported, 1 on not enough bits or 0 if OK.

Definition at line 1647 of file wmavoice.c.

Referenced by synth_superframe().

static void copy_bits	(	PutBitContext *	pb,
		const uint8_t *	data,
		int	size,
		GetBitContext *	gb,
		int	nbits
	)

static

Copy (unaligned) bits from gb/data/size to pb.

Parameters

pb	target buffer to copy bits into
data	source buffer to copy bits from
size	size of the source data, in bytes
gb	bit I/O context specifying the current position in the source. data. This function might use this to align the bit position to a whole-byte boundary before calling avpriv_copy_bits() on aligned source data
nbits	the amount of bits to copy from source to target

Note: after calling this function, the current position in the input bit I/O context is undefined.

Definition at line 1898 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static av_cold int decode_vbmtree	(	GetBitContext *	gb,
		int8_t	vbm_tree[25]
	)

static

Set up the variable bit mode (VBM) tree from container extradata.

Parameters

gb	bit I/O context. The bit context (s->gb) should be loaded with byte 23-46 of the container extradata (i.e. the ones containing the VBM tree).
vbm_tree	pointer to array to which the decoded VBM tree will be written.

Returns: 0 on success, <0 on error.

Definition at line 304 of file wmavoice.c.

Referenced by wmavoice_decode_init().

static void dequant_lsp10i	(	GetBitContext *	gb,
		double *	lsps
	)

static

Parse 10 independently-coded LSPs.

Definition at line 853 of file wmavoice.c.

Referenced by dequant_lsp10r(), and synth_superframe().

static void dequant_lsp10r	(	GetBitContext *	gb,
		double *	i_lsps,
		const double *	old,
		double *	a1,
		double *	a2,
		int	q_mode
	)

static

Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 879 of file wmavoice.c.

Referenced by synth_superframe().

static void dequant_lsp16i	(	GetBitContext *	gb,
		double *	lsps
	)

static

Parse 16 independently-coded LSPs.

Definition at line 915 of file wmavoice.c.

Referenced by dequant_lsp16r(), and synth_superframe().

static void dequant_lsp16r	(	GetBitContext *	gb,
		double *	i_lsps,
		const double *	old,
		double *	a1,
		double *	a2,
		int	q_mode
	)

static

Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 948 of file wmavoice.c.

Referenced by synth_superframe().

static void dequant_lsps	(	double *	lsps,
		int	num,
		const uint16_t *	values,
		const uint16_t *	sizes,
		int	n_stages,
		const uint8_t *	table,
		const double *	mul_q,
		const double *	base_q
	)

static

Dequantize LSPs.

Parameters

lsps	output pointer to the array that will hold the LSPs
num	number of LSPs to be dequantized
values	quantized values, contains n_stages values
sizes	range (i.e. max value) of each quantized value
n_stages	number of dequantization runs
table	dequantization table to be used
mul_q	LSF multiplier
base_q	base (lowest) LSF values

Definition at line 821 of file wmavoice.c.

Referenced by dequant_lsp10i(), dequant_lsp10r(), dequant_lsp16i(), and dequant_lsp16r().

static int kalman_smoothen	(	WMAVoiceContext *	s,
		int	pitch,
		const float *	in,
		float *	out,
		int	size
	)

static

Kalman smoothing function.

This function looks back pitch +/- 3 samples back into history to find the best fitting curve (that one giving the optimal gain of the two signals, i.e. the highest dot product between the two), and then uses that signal history to smoothen the output of the speech synthesis filter.

Parameters

s	WMA Voice decoding context
pitch	pitch of the speech signal
in	input speech signal
out	output pointer for smoothened signal
size	input/output buffer size

Returns: -1 if no smoothening took place, e.g. because no optimal fit could be found, or 0 on success.

Definition at line 509 of file wmavoice.c.

Referenced by postfilter().

static int parse_packet_header ( WMAVoiceContext * s )

static

Parse the packet header at the start of each packet (input data to this decoder).

Parameters

s	WMA Voice decoding context private data

Returns: 1 if not enough bits were available, or 0 on success.

Definition at line 1863 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static void postfilter	(	WMAVoiceContext *	s,
		const float *	synth,
		float *	samples,
		int	size,
		const float *	lpcs,
		float *	zero_exc_pf,
		int	fcb_type,
		int	pitch
	)

static

Averaging projection filter, the postfilter used in WMAVoice.

This uses the following steps:

A zero-synthesis filter (generate excitation from synth signal)
Kalman smoothing on excitation, based on pitch
Re-synthesized smoothened output
Iterative Wiener denoise filter
Adaptive gain filter
DC filter

Parameters

s	WMAVoice decoding context
synth	Speech synthesis output (before postfilter)
samples	Output buffer for filtered samples
size	Buffer size of synth & samples
lpcs	Generated LPCs used for speech synthesis
zero_exc_pf	destination for zero synthesis filter (16-byte aligned)
fcb_type	Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
pitch	Pitch of the input signal

Definition at line 767 of file wmavoice.c.

Referenced by synth_frame().

static int pRNG	(	int	frame_cntr,
		int	block_num,
		int	block_size
	)

static

Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).

Parameters

frame_cntr	current frame number
block_num	current block index
block_size	amount of entries we want to read from a table that has 1000 entries

Returns: a (non-)random number in the [0, 1000 - block_size] range.

Definition at line 1199 of file wmavoice.c.

Referenced by synth_block_hardcoded().

static void stabilize_lsps	(	double *	lsps,
		int	num
	)

static

Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.

Parameters

lsps	array of LSPs
num	size of LSP array

Note: basically a double version of ff_acelp_reorder_lsf(), might be useful to put in a generic location later on. Parts are also present in ff_set_min_dist_lsf() + ff_sort_nearly_sorted_floats(), which is in float.

Definition at line 1609 of file wmavoice.c.

Referenced by synth_superframe().

static void synth_block	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		int	size,
		int	block_pitch_sh2,
		const double *	lsps,
		const double *	prev_lsps,
		const struct frame_type_desc *	frame_desc,
		float *	excitation,
		float *	synth
	)

static

Parse data in a single block.

Note: we assume enough bits are available, caller should check.

Parameters

s	WMA Voice decoding context private data
gb	bit I/O context
block_idx	index of the to-be-read block
size	amount of samples to be read in this block
block_pitch_sh2	pitch for this block << 2
lsps	LSPs for (the end of) this frame
prev_lsps	LSPs for the last frame
frame_desc	frame type descriptor
excitation	target memory for the ACB+FCB interpolated signal
synth	target memory for the speech synthesis filter output

Returns: 0 on success, <0 on error.

Definition at line 1390 of file wmavoice.c.

Referenced by synth_frame().

static void synth_block_fcb_acb	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		int	size,
		int	block_pitch_sh2,
		const struct frame_type_desc *	frame_desc,
		float *	excitation
	)

static

Parse FCB/ACB signal for a single block.

Note: see synth_block().

Definition at line 1266 of file wmavoice.c.

Referenced by synth_block().

static void synth_block_hardcoded	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		int	size,
		const struct frame_type_desc *	frame_desc,
		float *	excitation
	)

static

Parse hardcoded signal for a single block.

Note: see synth_block().

Definition at line 1235 of file wmavoice.c.

Referenced by synth_block().

static int synth_frame	(	AVCodecContext *	ctx,
		GetBitContext *	gb,
		int	frame_idx,
		float *	samples,
		const double *	lsps,
		const double *	prev_lsps,
		float *	excitation,
		float *	synth
	)

static

Synthesize output samples for a single frame.

Note: we assume enough bits are available, caller should check.

Parameters

ctx	WMA Voice decoder context
gb	bit I/O context (s->gb or one for cross-packet superframes)
frame_idx	Frame number within superframe [0-2]
samples	pointer to output sample buffer, has space for at least 160 samples
lsps	LSP array
prev_lsps	array of previous frame's LSPs
excitation	target buffer for excitation signal
synth	target buffer for synthesized speech data

Returns: 0 on success, <0 on error.

Definition at line 1433 of file wmavoice.c.

Referenced by synth_superframe().

static int synth_superframe	(	AVCodecContext *	ctx,
		AVFrame *	frame,
		int *	got_frame_ptr
	)

static

Synthesize output samples for a single superframe.

If we have any data cached in s->sframe_cache, that will be used instead of whatever is loaded in s->gb.

WMA Voice superframes contain 3 frames, each containing 160 audio samples, to give a total of 480 samples per frame. See synth_frame() for frame parsing. In addition to 3 frames, superframes can also contain the LSPs (if these are globally specified for all frames (residually); they can also be specified individually per-frame. See the s->has_residual_lsps option), and can specify the number of samples encoded in this superframe (if less than 480), usually used to prevent blanks at track boundaries.

Parameters

ctx	WMA Voice decoder context

Returns: 0 on success, <0 on error or 1 if there was not enough data to fully parse the superframe

Definition at line 1732 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static float tilt_factor	(	const float *	lpcs,
		int	n_lpcs
	)

static

Get the tilt factor of a formant filter from its transfer function.

See also: tilt_factor() in amrnbdec.c, which does essentially the same, but somehow (??) it does a speech synthesis filter in the middle, which is missing here

Parameters

lpcs	LPC coefficients
n_lpcs	Size of LPC buffer

Returns: the tilt factor

Definition at line 555 of file wmavoice.c.

Referenced by calc_input_response(), and wiener_denoise().

static void wiener_denoise	(	WMAVoiceContext *	s,
		int	fcb_type,
		float *	synth_pf,
		int	size,
		const float *	lpcs
	)

static

This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.

take RDFT of LPCs to get the power spectrum of the noise + speech;
using this power spectrum, calculate (for each frequency) the Wiener filter gain, which depends on the frequency power and desired level of noise subtraction (when set too high, this leads to artifacts) We can do this symmetrically over the X-axis (so 0-4kHz is the inverse of 4-8kHz);
by doing a phase shift, calculate the Hilbert transform of this array of per-frequency filter-gains to get the filtering coefficients;
smoothen/normalize/de-tilt these filter coefficients as desired;
take RDFT of noisy sound, apply the coefficients and take its IRDFT to get the denoised speech signal;
the leftover (i.e. output of the IRDFT on denoised speech data beyond the frame boundary) are saved and applied to subsequent frames by an overlap-add method (otherwise you get clicking-artifacts).

Parameters

s	WMA Voice decoding context
fcb_type	Frame (codebook) type
synth_pf	input: the noisy speech signal, output: denoised speech data; should be 16-byte aligned (for ASM purposes)
size	size of the speech data
lpcs	LPCs used to synthesize this frame's speech data

Definition at line 685 of file wmavoice.c.

Referenced by postfilter().

static av_cold int wmavoice_decode_end ( AVCodecContext * ctx )

static

Definition at line 2002 of file wmavoice.c.

static av_cold int wmavoice_decode_init ( AVCodecContext * ctx )

static

Set up decoder with parameters from demuxer (extradata etc.).

Extradata layout:

byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
byte 19-22: flags field (annoyingly in LE; see below for known values),
byte 23-46: variable bitmode tree (really just 17 * 3 bits, rest is 0).

Definition at line 338 of file wmavoice.c.

static int wmavoice_decode_packet	(	AVCodecContext *	ctx,
		void *	data,
		int *	got_frame_ptr,
		AVPacket *	avpkt
	)

static

Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).

Every packet has a size of ctx->block_align bytes, starts with a packet header (see parse_packet_header()), and then a series of superframes. Superframe boundaries may exceed packets, i.e. superframes can split data over multiple (two) packets.

For more information about frames, see synth_superframe().

Definition at line 1927 of file wmavoice.c.

static av_cold void wmavoice_flush ( AVCodecContext * ctx )

static

Definition at line 2016 of file wmavoice.c.

Variable Documentation

AVCodec ff_wmavoice_decoder

Initial value:

= {
    .name           = "wmavoice",
    .type           = AVMEDIA_TYPE_AUDIO,
    .id             = AV_CODEC_ID_WMAVOICE,
    .priv_data_size = sizeof(WMAVoiceContext),
    .init           = wmavoice_decode_init,
    .close          = wmavoice_decode_end,
    .decode         = wmavoice_decode_packet,
    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
    .flush          = wmavoice_flush,
    .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
}

Definition at line 2044 of file wmavoice.c.

const struct frame_type_desc frame_descs[17]

static

Initial value:

= {
    { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
    { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
    { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
    { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
    { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
    { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
    { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
    { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
    { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
    { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
    { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
    { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
    { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
    { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
    { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
    { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
    { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
}

Referenced by check_bits_for_superframe(), and synth_frame().

VLC frame_type_vlc

static

Frame type VLC coding.

Definition at line 62 of file wmavoice.c.

Paris Hackday Code

Data Structures

Macros

Enumerations

Functions

Variables

Detailed Description

Macro Definition Documentation

Enumeration Type Documentation

Function Documentation

Variable Documentation