Mercurial > hg > batch-feature-extraction-tool
view Source/AudioReader.cpp @ 1:e86e9c111b29
Updates stuff that potentially fixes the memory leak and also makes it work on Windows and Linux (Need to test). Still have to fix fftw include for linux in Jucer.
author | David Ronan <d.m.ronan@qmul.ac.uk> |
---|---|
date | Thu, 09 Jul 2015 15:01:32 +0100 |
parents | 25bf17994ef1 |
children | c649e493c30a |
line wrap: on
line source
/* ============================================================================== AudioReader.cpp Created: 27 Aug 2014 3:17:10pm Author: david.ronan ============================================================================== */ #include "AudioReader.h" #include "WriteCSV.h" #include <iostream> #include <vector> #define ENERGYSEARCHTIME 3 #define SAMPLERATE 22050.0f AudioReader::AudioReader() { }; AudioReader::~AudioReader() { }; std::vector<FeatureData> AudioReader::Read(AudioFileData audioFileData, float poolTimeSecs, int analysisWindowSize) { WriteCSV writeCSV = WriteCSV(); std::vector<FeatureData> featureData = std::vector<FeatureData>(); std::string CSVFileName = "..\\FeatureData" + writeCSV.currentDateTime(); vector<string> audioFileNames = audioFileData.GetFileNames(); vector<string> labels = audioFileData.GetLabels(); AudioFormatManager m_formatManager; m_formatManager.registerBasicFormats(); //AudioSourceFeatureExtractor audioSourceFeatureExtractor = AudioSourceFeatureExtractor(); m_AudioSourceFeatureExtractor.Initialise(SAMPLERATE); for(size_t i=0; i<audioFileNames.size(); i++) { float percentcomplete = (float)i / (float)audioFileNames.size() * 100.0f; std::string outputStr = "Extracting features for " + audioFileNames[i] + "\n" + std::to_string(percentcomplete) + "% complete..."; cout << outputStr; //Create file from our audio data File audioFile(audioFileNames[i].c_str()); AudioFormatReader* m_audioFileReader = m_formatManager.createReaderFor(audioFile); if(m_audioFileReader != NULL) { m_fSampleRate = (float)(m_audioFileReader->sampleRate); m_iLengthInSamples = (int)m_audioFileReader->lengthInSamples; m_iNumOfChannels = m_audioFileReader->numChannels; if (m_fSampleRate != 22050.0f) { cout << "\n\n\nERROR: File is not the required 22050 Hz sample rate.!!!\n\n\n"; } //Get loudest 30 secs. of audio int numOfSamplesToCollect = (int)(analysisWindowSize * m_fSampleRate); if(m_iLengthInSamples <= numOfSamplesToCollect) { numOfSamplesToCollect = m_iLengthInSamples; } //Length of the full track in stereo; int* destSamples[2] = {0}; int* L = new int[(size_t)(m_iLengthInSamples)]; memset(L, 0, (size_t)m_iLengthInSamples*sizeof(float)); destSamples[0]=L; destSamples[1]=L; //30 sec clips to check energy levels float* destSamplesFloat = new float[(size_t)numOfSamplesToCollect]; memset(destSamplesFloat, 0, (size_t)numOfSamplesToCollect*sizeof(float)); ////30 sec clips to check energy levels //float* destSamplesFloatLoudest = new float[(size_t)numOfSamplesToCollect]; //memset(destSamplesFloatLoudest, 0, (size_t)numOfSamplesToCollect*sizeof(float)); int timesToLoop = 0; if(m_iLengthInSamples == numOfSamplesToCollect) { timesToLoop = 1; } else { timesToLoop = (int)((m_iLengthInSamples - numOfSamplesToCollect) / (ENERGYSEARCHTIME * m_fSampleRate)); } std::vector<float> thirtySecEnergy = std::vector<float>(); //float loudestEnergy = 0.0; m_audioFileReader->readSamples(destSamples, 2, 0, 0, m_iLengthInSamples); for(int j=0; j < timesToLoop;j++) { float fSum=0.f; for(int n=0; n<numOfSamplesToCollect; n++) { //Sum to mono if needed and workout the energy for each 30 sec. frame if(m_iNumOfChannels > 1) { destSamplesFloat[n] = ((float)((destSamples[0][int(j * ENERGYSEARCHTIME * m_fSampleRate) + n] + destSamples[1][int( j * ENERGYSEARCHTIME * m_fSampleRate) + n]) / 2) / (0x7fffffff)); } else { destSamplesFloat[n] = ((float)(destSamples[0][int(j * ENERGYSEARCHTIME * m_fSampleRate) + n]) / (0x7fffffff)); } fSum+=(destSamplesFloat[n] * destSamplesFloat[n]); } //Normalise and push onto the list of 30 sec clips. fSum /= numOfSamplesToCollect; //if (fSum > loudestEnergy) //{ // loudestEnergy = fSum; // destSamplesFloatLoudest = destSamplesFloat; //} thirtySecEnergy.push_back(fSum); } //Find the index of the section with the most energy int maxIdx = std::distance(thirtySecEnergy.begin(), max_element(thirtySecEnergy.begin(), thirtySecEnergy.end())); int* thirtySecSamples[2] = {0}; int* L30 = new int[(size_t)numOfSamplesToCollect]; memset(L30, 0, (size_t)numOfSamplesToCollect*sizeof(float)); thirtySecSamples[0]=L30; //Left channel thirtySecSamples[1]=L30; //Left right //Read the 30 secs. in m_audioFileReader->readSamples(thirtySecSamples, 2, 0, int(maxIdx * ENERGYSEARCHTIME * m_fSampleRate), numOfSamplesToCollect); memset(destSamplesFloat, 0, (size_t)numOfSamplesToCollect*sizeof(float)); for(int n=0; n<numOfSamplesToCollect; n++) { //Sum to mono if needed if(m_iNumOfChannels > 1) { destSamplesFloat[n] = ((float)((thirtySecSamples[0][n] + thirtySecSamples[1][n]) / 2) / (0x7fffffff)); } else { destSamplesFloat[n] = ((float)(thirtySecSamples[0][n]) / (0x7fffffff)); } } std::vector<ObservationData> newObs = m_AudioSourceFeatureExtractor.Process(destSamplesFloat, numOfSamplesToCollect); FeatureData newFeature = FeatureData(newObs, labels[i], audioFileNames[i], m_fSampleRate, FFTSIZE, (float)numOfSamplesToCollect, poolTimeSecs); writeCSV.Write(CSVFileName, newFeature); //Update the screen information; cout << cout << string(outputStr.length(),'\b'); //Cleanup if(L != NULL) { delete[] L; L = nullptr; } if(destSamplesFloat != NULL) { delete[] destSamplesFloat; destSamplesFloat = nullptr; //destSamplesFloatLoudest = nullptr; } //if(destSamplesFloatLoudest != NULL) //{ // delete[] destSamplesFloatLoudest; // //} } else { cout << "\n\n\nERROR: Could not find file!!!\n\n\n"; } //Cleanup if(m_audioFileReader != NULL) { delete[] m_audioFileReader; m_audioFileReader = nullptr; } } m_AudioSourceFeatureExtractor.Finalize(); return featureData; };