view nonExposed/generateScene.m @ 44:b7b1672b3c3b

Reading and writing of files now is done by soundfile since there seems to be a bug with writing .wav files with librosa (mplayer would play them as rubbish). Added soundfile as a requirement.
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Mon, 09 Oct 2017 11:55:03 +0100
parents 39399de892ef
children
line wrap: on
line source
function [sceneSchedule] = generateScene (sceneSchedule,sceneObjects,score,inputPath,outputPath,outputFileName,figuresOption,timeMode,endCut,norm,sr,channelOption)
% function [w,sceneSchedule] = generateScene (sceneSchedule,sceneObjects,score,inputPath,outputPath,outputFileName,displayFigures,timeMode,endCut)
% This function does the actual job of loading, scaling, positioning and
% mixing the various samples to generate a scene, based on the
% specifications given by its parameters.

% This program was written by Mathias Rossignol & Grégoire Lafay
% is Copyright (C) 2015 IRCAM <http://www.ircam.fr>
%
% This program is free software: you can redistribute it and/or modify it
% under the terms of the GNU General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
% This program is distributed in the hope that it will be useful, but
% WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
% or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
% for more details.
%
% You should have received a copy of the GNU General Public License along
% with this program.  If not, see <http://www.gnu.org/licenses/>.


% All instances of one sample will appear on a given track, before
% everything is mixed together, making it possible to apply track-specific
% effects or generate nice colorful representations


% We also generate a "bg" track with all background samples, against which
% the snr of fg sounds will be evaluated

%% Init tab

trackLength = sr*score.sceneDuration;

tracks = zeros(length(sceneObjects), trackLength);
bg = zeros(trackLength, 1);

dominantTrack = zeros(1,trackLength) + 1;
dominantObject = zeros(1,trackLength) + 1;
dominantEnergy = zeros(1,trackLength);
simulatedEBR = zeros(length(sceneSchedule),2);
objNum = 1;

%% Create sceneSchedule
id2remove=[];
for i=1:length(sceneSchedule)
    id = sceneSchedule(i).classId;
    if (sceneSchedule(i).isBackground)
        if strcmp(sceneObjects(i).classLabel,'noise')
            waves=score.backgrounds{1}{2}';
        else
            waves = audioread([inputPath 'background/' sceneObjects(id).names{1}]);
            [MinSizeWaves,indMinSizeWaves]=min(size(waves));
            if MinSizeWaves==2
                waves=mean(waves,indMinSizeWaves);
            end
        end
        scale = 1;
        if (i>1)
            scale = adjustScaleForEBR(waves, bg, 1, sceneSchedule(i).ebr, 1, 0.01);
        end
        if (length(waves) < trackLength)
            % need to loop the sample to fill the bg. Linear crossfade
            fadeLen = sr;
            fadeIn = transpose(0:1/fadeLen:1);
            fadeOut = transpose(1:-1/fadeLen:0);
            loop = waves;
            loop(1:fadeLen) = loop(1:fadeLen) .* fadeIn(1:fadeLen);
            loop(length(loop)-fadeLen+1:length(loop)) = loop(length(loop)+1-fadeLen:length(loop)) .* fadeOut(1:fadeLen);
            loop = scale * loop';
            startLoop=waves;
            startLoop(length(startLoop)-fadeLen+1:length(startLoop)) = startLoop(length(startLoop)+1-fadeLen:length(startLoop)) .* fadeOut(1:fadeLen);                   
            startLoop = scale * startLoop';         
            for l=0:floor(trackLength/(length(loop)-fadeLen))
                t1 = 1+l*(length(loop)-fadeLen);
                t2 = min(length(bg), t1+length(loop)-1);
                if l
                    tracks(id,t1:t2) = tracks(id,t1:t2) + loop(1:t2-t1+1);
                else
                   tracks(id,t1:t2) = tracks(id,t1:t2) + startLoop(1:t2-t1+1);
                end
            end
        else
            dim = min(trackLength, length(waves));
            tracks(id,1:dim) = tracks(id,1:dim) + scale*waves(1:dim)';
        end
        if (i==1)
            bg = tracks(id,:);
        else
            bg = bg+tracks(id,:);
        end
    else
        objNum = objNum+1;
        inst = sceneSchedule(i).instance;
        waves = audioread([inputPath 'event/' sceneObjects(id).names{inst}]);
        [MinSizeWaves,indMinSizeWaves]=min(size(waves));
        if MinSizeWaves==2
            waves=mean(waves,indMinSizeWaves);
        end
        pos = max(1,floor(sr*sceneSchedule(i).position));
        
        switch timeMode
            case {'replicate'}
                if(length(waves)/sr - sceneSchedule(i).duration > 0.5)
                    endTime = round(sceneSchedule(i).duration*sr);
                else
                    endTime = length(waves);
                end
            case {'abstract'}
                endTime = round(sceneSchedule(i).duration*sr);
            otherwise
                endTime = length(waves);
        end
        
        if endCut
            pos2Test=pos;
        else
            
            pos2Test=pos+endTime-1;
        end
        
        if(pos2Test<round(sceneObjects(sceneSchedule(i).classId).trackLength*sr))
            t2 = min(pos+endTime-1, round(sceneObjects(sceneSchedule(i).classId).trackLength*sr));
            wav2use=waves(1:t2-pos+1);
            sceneSchedule(i).duration=length(wav2use)/sr;
            scale = adjustScaleForEBR(wav2use, bg, pos, sceneSchedule(i).ebr, 1, 0.01);
            
            [~,bgEnergy] = powspec(bg(pos:t2));
            [~,evEnergy] = powspec(scale*wav2use.');
            labelStart = min(find(bgEnergy<evEnergy));
            labelEnd = max(find(bgEnergy<evEnergy));
            for t=labelStart:labelEnd
                if evEnergy(t)>dominantEnergy(pos+t)
                    dominantObject(pos+round(t*(t2-pos)/length(bgEnergy))) = objNum;
                    dominantEnergy(pos+round(t*(t2-pos)/length(bgEnergy))) = evEnergy(t);
                end
            end
            dominantObject(min(find(dominantObject==objNum)):max(find(dominantObject==objNum))) = objNum;
            dominantTrack(dominantObject==objNum) = id;
            
            tracks(id, pos:t2) = tracks(id, pos:t2) + scale*wav2use.';
            %% Store EBR and event locations
            simulatedEBR(i,1) = ebr(tracks(id, pos:t2),tracks(1, pos:t2));
            simulatedEBR(i,2)=pos;
        else
            id2remove=[id2remove i];
        end
    end
end

sceneSchedule(id2remove)=[];
% checkClassPresence(sceneSchedule,sceneObjects);


save ([outputPath 'annotation/' outputFileName '.mat'],'score', 'sceneObjects', 'sceneSchedule','dominantTrack', 'dominantObject','simulatedEBR');
saveAnnotationTxt(sceneSchedule,outputPath,outputFileName);

w = sum(tracks,1);
if norm %Normalize to [-norm,norm]
    w = w*norm/max(abs(w));
end

audiowrite([outputPath 'sound/' outputFileName '.wav'],w,sr);

switch channelOption
    case 1
        if norm %Normalize to [-norm,norm]
            tracks = tracks*norm/max(abs(sum(tracks,1)));
        end
        audiowrite([outputPath 'sound/' outputFileName '_channel_split' '.wav'],tracks',sr);
    case 2
        if norm %Normalize to [-norm,norm]
            tracks = tracks*norm/(1.05*max(abs(sum(tracks,1))));
        end
        for jj=1:size(tracks,1)
            audiowrite([outputPath 'sound/' outputFileName '_channel_' num2str(jj) '_class_' sceneObjects(jj).classLabel '.wav'],tracks(jj,:),sr);
        end
end


if figuresOption
    settingFigure.cmap = pleasantnessColormap(sceneObjects);
    if any(~sum(settingFigure.cmap, 2))
        fprintf(2, 'Unable to get some of the pleasantness tags, revert to randomized colors');
        settingFigure.cmap = randomColormap(sceneObjects);       
    end
    settingFigure.width=29.7; % cm
    settingFigure.height=21; % cm
    settingFigure.FontSize=16;
    settingFigure.sr=sr;
    settingFigure.sceneDuration=score.sceneDuration;
    
    nbBg = 0;
    for k=1:length(sceneObjects)
        if sceneObjects(k).isBackground
            nbBg = nbBg+1;
            bgo(nbBg, :) = resample(abs(tracks(k, :)), 1, 44100);
        end
    end
    
    
    
    [~, bgOrder] = sort(mean(bgo, 2)./median(bgo, 2));
    [~, eventOrder] = sort(sum(tracks(nbBg+1:end,:)==0, 2));
    trackOrder = [bgOrder; eventOrder+nbBg];
    tracks = tracks(trackOrder, :);
    sceneObjects = sceneObjects(trackOrder);
     settingFigure.cmap =  settingFigure.cmap(trackOrder, :);
     
    coloredSpectrumVisualization(tracks,1,settingFigure,figuresOption,[outputPath 'annotation/' outputFileName,'-spectrum.png']);
    timeDomainVisualization(tracks,2,settingFigure,figuresOption,[outputPath 'annotation/' outputFileName,'-timeDomain.png']);
    pianoRollVisualization(sceneObjects,sceneSchedule,score,3,settingFigure,figuresOption,[outputPath 'annotation/' outputFileName,'-pianoRoll.png'])
end