Mercurial > hg > mirex-meta-analysis
view 1-get_mirex_estimates.rb @ 2:624231da830b
Removed name from comments. Updated readme extensively. Renamed 2 files without significant changes. Added EP data as a bonus.
author | Jordan Smith <jordan.smith@eecs.qmul.ac.uk> |
---|---|
date | Fri, 20 Sep 2013 17:05:34 +0100 |
parents | 818a4b5f3384 |
children | 92b5a46bc67b |
line wrap: on
line source
require "CSV" require "open-uri" # require "simplexml" mirex_path = "/Users/me/Desktop/MIREX_data" # EDIT THIS TO BE YOUR OWN DESIRED PATH. # IT WILL NEED TO HOLD ROUGHLY 70 MB OF DATA. def url_download(uri, filename=".") tmp = File.open(filename,'w') tmptxt = [] open(uri) {|f| f.each_line {|line| tmptxt.push(line)} } tmp.write(tmptxt) tmp.close end def convert_file(filename) ann_out_file = filename[0..-4] + "_gt.txt" alg_out_file = filename[0..-4] + "_pred.txt" ann_out = File.open(ann_out_file,'w') alg_out = File.open(alg_out_file,'w') text = File.open(filename,'r').readlines[1..-4].join("").split(/[\[\]]/) ann = text[1] alg = text[3] ann_out.write(json_2_text(ann)) alg_out.write(json_2_text(alg)) ann_out.close alg_out.close end def json_2_text(json) txt = [] json = json.split("\n") json.each do |line| els = line.split(",") # Make a LAB-style annotation (3-column): # txt.push([els[0].split(" ")[-1].to_f, els[1].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t")) # Make a TXT-style annotation (2-column): txt.push([els[0].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t")) end txt.push([json[-1].split(",")[1].split(" ")[-1].to_f, "End"].join("\t")) return txt.join("\n") end # # # # PART 1: DOWNLOAD ALL THE STRUCTURAL ANALYSIS EVALUTION DATA PUBLISHED BY MIREX # Define list of algorithms and datasets: algos = ["SP1", "SMGA2", "MHRAF1", "SMGA1", "SBV1", "KSP2", "OYZS1", "KSP3", "KSP1"] datasets = ["mrx09", "mrx10_1", "mrx10_2", "sal"] # Create appropriate directory tree and download CSV files: puts("Downloading CSV files...\n") datasets.each do |dset| # Make dataset directory: dir_path = File.join(mirex_path,dset) Dir.mkdir(dir_path) unless File.directory?(dir_path) algos.each do |algo| # Make algorithm directory: algo_path = File.join(mirex_path,dset,algo) Dir.mkdir(algo_path) unless File.directory?(algo_path) # Download the CSV file to this directory: algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv") csv_path = File.join("http://nema.lis.illinois.edu/nema_out/mirex2012/results/struct",dset,algo,"per_track_results.csv") url_download(csv_path, algocsvpath) end end # Read each CSV file and download all the json files it points to: datasets.each do |dset| algos.each do |algo| puts( "Starting to download "+dset+ " dataset for " + algo + " algorithm...\n") algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv") csv_data = CSV.read(algocsvpath) header = csv_data.delete_at(0) download_folder = File.join(mirex_path,dset,algo) # For each line in the spreadsheet, extract the songid and download the corresponding json document. csv_data.each do |line| song_id = line[1] url = "http://nema.lis.illinois.edu/nema_out/mirex2012/results/struct/" + dset + "/" + algo.downcase + "segments" + song_id.delete("_") + ".js" download_path = File.join(download_folder,song_id + ".js") # download_path = download_folder + "/" + song_id + ".js" url_download(url, download_path) end end puts("Done with " + dset + " dataset!\n") end # Scan for all the json files, and convert each one into two text files, one for the algorithm output, one for the annotation: all_json_files = Dir.glob(File.join(mirex_path,"*","*","*.js")) all_json_files.each do |file| convert_file(file) end # # # # PART 2: GET (AND CONVERT) THE ANNOTATION DATA PUBLISHED BY OTHERS # Download and unzip all public annotations list_of_db_urls = ["http://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.CHORUS.zip", "http://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-C-2001.CHORUS.zip", "http://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-J-2001.CHORUS.zip", "http://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-G-2001.CHORUS.zip", "http://www.music.mcgill.ca/~jordan/salami/releases/SALAMI_data_v1.2.zip", "http://www.ifs.tuwien.ac.at/mir/audiosegmentation/dl/ep_groundtruth_excl_Paulus.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_full.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2001.BLOCKS_v001.zip", "http://www.isophonics.net/files/annotations/The%20Beatles%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Carole%20King%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Queen%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Michael%20Jackson%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Zweieck%20Annotations.tar.gz", "http://www.cs.tut.fi/sgn/arg/paulus/beatles_sections_TUT.zip", "http://www.iua.upf.edu/~perfe/annotations/sections/beatles/structure_Beatles.rar"] public_data_path = File.join(mirex_path,"public_data") Dir.mkdir(public_data_path) unless File.directory?(public_data_path) list_of_db_urls.each do |db_url| open(File.join(public_data_path,File.basename(db_url)), 'wb') do |foo| foo.print open(db_url).read end end # # # # NOW, PLEASE EXIT THE SCRIPT, AND UNZIP ALL THOSE PACKAGES. # # # # WHEN YOU'RE DONE, GO ONTO THE PARENT MATLAB FILE TO RUN THE ANALYSES.