view extractMelody.m @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
line wrap: on
line source
function [y] = extractMelody(x,w,N,melodyFile)
%e.g. y = extractMelody(x,hamming(2025),4096,'baby.txt');


% Initialize
M = length(w);   % analysis window size
Ns = 1024;                               % FFT size for synthesis
H = 256;                                 % hop size for analysis and synthesis
soundlength = length(x);                 % length of input sound array
hNs = Ns/2;                              % half synthesis window size
hM = (M-1)/2;                            % half analysis window size
pin = max(hNs+1,1+hM);   % initialize sound pointer to middle of analysis window
pend = soundlength-max(hM,hNs);          % last sample to start a frame
fftbuffer = zeros(N,1);                  % initialize buffer for FFT
y = zeros(soundlength+Ns/2,1);           % output sine component
w = w/sum(w);                            % normalize analysis window
sw = zeros(Ns,1);
ow = triang(2*H-1);                      % overlapping window
ovidx = Ns/2+1-H+1:Ns/2+H;               % overlap indexes
sw(ovidx) = ow(1:2*H-1);
bh = blackmanharris(Ns);                 % synthesis window
bh = bh ./ sum(bh);                      % normalize synthesis window
sw(ovidx) = sw(ovidx) ./ bh(ovidx);


% Load melody file
melody = loadMelodyFile(melodyFile);
melody = [melody zeros(2,1000)];
i=0;

% For each segment
while pin<pend
    
    i=i+1;
    
    % Find predominant pitch for segment
    melody_seg = melody(:,ceil((pin-hM)/128):round((pin+hM)/128));
    ind = find(melody_seg(2,:)>150);       % Find pitches larger than 150Hz
    medpitch = median(melody_seg(2,ind));  % Median pitch in segment
    

    % Compute FFT for segment
    xw = x(pin-hM:pin+hM).*w(1:M);         % window the input sound
    fftbuffer(1:(M+1)/2) = xw((M+1)/2:M);  % zero-phase window in fftbuffer
    fftbuffer(N-(M-1)/2+1:N) = xw(1:(M-1)/2);
    X = fft(fftbuffer);                    % compute the FFT
    
    % Keep only the melody for each segment
    if(isempty(ind)) % if there is no melody in segment, then silence segment
        X = zeros(N,1);
    else             % if there is melody, then mask everything else

        melodyBinsStart = (medpitch/10.6568) * [1:40];
        melodyBinsEnd =  N + 2 - ((medpitch/10.6568) * [1:40]);
        melodyBins = round([melodyBinsStart melodyBinsEnd]);
        melodyBins = [melodyBins (melodyBins-1) (melodyBins+1) (melodyBins-2) (melodyBins+2) (melodyBins-3) (melodyBins+3) (melodyBins-4) (melodyBins+4) (melodyBins-5) (melodyBins+5) (melodyBins-6) (melodyBins+6)];
        
        C = setdiff(1:N,melodyBins);
        
        X(C) = 0;

    end;
    
    
    ri= pin-hNs;                           % input sound pointer for residual analysis
    yw = ifft(X);    
    y(ri:ri+Ns-1) = y(ri:ri+Ns-1)+yw(1:Ns).*sw;
    pin = pin+H;     
    
end

y = (max(x)/max(y))*y; % scale y to original amplitude

%wavwrite(y,44100,'test.wav');