FFmpeg: extractMelody.m Source File - Paris Hackday Code

FFmpeg
 function [y] = extractMelody(x,w,N,melodyFile)
 %e.g. y = extractMelody(x,hamming(2025),4096,'baby.txt');
 
 
 % Initialize
 M = length(w);   % analysis window size
 Ns = 1024;                               % FFT size for synthesis
 H = 256;                                 % hop size for analysis and synthesis
 soundlength = length(x);                 % length of input sound array
 hNs = Ns/2;                              % half synthesis window size
 hM = (M-1)/2;                            % half analysis window size
 pin = max(hNs+1,1+hM);   % initialize sound pointer to middle of analysis window
 pend = soundlength-max(hM,hNs);          % last sample to start a frame
 fftbuffer = zeros(N,1);                  % initialize buffer for FFT
 y = zeros(soundlength+Ns/2,1);           % output sine component
 w = w/sum(w);                            % normalize analysis window
 sw = zeros(Ns,1);
 ow = triang(2*H-1);                      % overlapping window
 ovidx = Ns/2+1-H+1:Ns/2+H;               % overlap indexes
 sw(ovidx) = ow(1:2*H-1);
 bh = blackmanharris(Ns);                 % synthesis window
 bh = bh ./ sum(bh);                      % normalize synthesis window
 sw(ovidx) = sw(ovidx) ./ bh(ovidx);
 
 
 % Load melody file
 melody = loadMelodyFile(melodyFile);
 melody = [melody zeros(2,1000)];
 i=0;
 
 % For each segment
 while pin<pend
     
     i=i+1;
     
     % Find predominant pitch for segment
     melody_seg = melody(:,ceil((pin-hM)/128):round((pin+hM)/128));
     ind = find(melody_seg(2,:)>150);       % Find pitches larger than 150Hz
     medpitch = median(melody_seg(2,ind));  % Median pitch in segment
     
 
     % Compute FFT for segment
     xw = x(pin-hM:pin+hM).*w(1:M);         % window the input sound
     fftbuffer(1:(M+1)/2) = xw((M+1)/2:M);  % zero-phase window in fftbuffer
     fftbuffer(N-(M-1)/2+1:N) = xw(1:(M-1)/2);
     X = fft(fftbuffer);                    % compute the FFT
     
     % Keep only the melody for each segment
     if(isempty(ind)) % if there is no melody in segment, then silence segment
         X = zeros(N,1);
     else             % if there is melody, then mask everything else
 
         melodyBinsStart = (medpitch/10.6568) * [1:40];
         melodyBinsEnd =  N + 2 - ((medpitch/10.6568) * [1:40]);
         melodyBins = round([melodyBinsStart melodyBinsEnd]);
         melodyBins = [melodyBins (melodyBins-1) (melodyBins+1) (melodyBins-2) (melodyBins+2) (melodyBins-3) (melodyBins+3) (melodyBins-4) (melodyBins+4) (melodyBins-5) (melodyBins+5) (melodyBins-6) (melodyBins+6)];
         
         C = setdiff(1:N,melodyBins);
         
         X(C) = 0;
 
     end;
     
     
     ri= pin-hNs;                           % input sound pointer for residual analysis
     yw = ifft(X);    
     y(ri:ri+Ns-1) = y(ri:ri+Ns-1)+yw(1:Ns).*sw;
     pin = pin+H;     
     
 end
 
 y = (max(x)/max(y))*y; % scale y to original amplitude
 
 %wavwrite(y,44100,'test.wav');