mirex-meta-analysis: 1_get_mirex_estimates.rb annotate

annotate 1_get_mirex_estimates.rb @ 6:e2337cd691b1 tip

Finishing writing the matlab code to replicate all observations made in the article. Added the article to the repository. Renamed the two main scripts ("1-get_mirex_estimates.rb" and "2-generate_smith2013_ismir.m") to not have dashes (since this was annoying within Matlab) Added new Michael Jackson figure.

author	Jordan Smith <jordan.smith@eecs.qmul.ac.uk>
date	Wed, 05 Mar 2014 01:02:26 +0000
parents
children

rev	line source
jordan@6	1 require "CSV"
jordan@6	2 require "open-uri"
jordan@6	3 # require "simplexml"
jordan@6	4 mirex_path = "/Users/jordan/Desktop/MIREX_data" # EDIT THIS TO BE YOUR OWN DESIRED PATH.
jordan@6	5 # IT WILL NEED TO HOLD ROUGHLY 70 MB OF DATA.
jordan@6	6
jordan@6	7
jordan@6	8 # tmp = File.open(filename,'w')
jordan@6	9 # tmptxt = []
jordan@6	10 # open(uri) {\|f\|
jordan@6	11 # f.each_line {\|line\| tmptxt.push(line)}
jordan@6	12 # }
jordan@6	13 # tmp.write(tmptxt)
jordan@6	14 # tmp.close
jordan@6	15 #
jordan@6	16
jordan@6	17 def url_download(uri, filename=".")
jordan@6	18 open(filename, 'w') do \|foo\|
jordan@6	19 foo.print open(uri).read
jordan@6	20 end
jordan@6	21 end
jordan@6	22
jordan@6	23 def convert_file(filename)
jordan@6	24 ann_out_file = filename[0..-4] + "_gt.txt"
jordan@6	25 alg_out_file = filename[0..-4] + "_pred.txt"
jordan@6	26 ann_out = File.open(ann_out_file,'w')
jordan@6	27 alg_out = File.open(alg_out_file,'w')
jordan@6	28 text = File.open(filename,'r').readlines[1..-4].join("").split(/[\[\]]/)
jordan@6	29 text = File.open(filename,'r').readlines(sep=",").join("").split(/[\[\]]/)
jordan@6	30 ann = text[2].split(/[\{\}]/)
jordan@6	31 alg = text[4].split(/[\{\}]/)
jordan@6	32 ann_out.write(json_2_text(ann))
jordan@6	33 alg_out.write(json_2_text(alg))
jordan@6	34 ann_out.close
jordan@6	35 alg_out.close
jordan@6	36 end
jordan@6	37
jordan@6	38 def json_2_text(json)
jordan@6	39 txt = []
jordan@6	40 (1..json.length).step(2).to_a.each do \|indx\|
jordan@6	41 line = json[indx]
jordan@6	42 els = line.split(",")
jordan@6	43 # Make a LAB-style annotation (3-column):
jordan@6	44 # txt.push([els[0].split(" ")[-1].to_f, els[1].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t"))
jordan@6	45 # Make a TXT-style annotation (2-column):
jordan@6	46 txt.push([els[0].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t"))
jordan@6	47 end
jordan@6	48 txt.push([json[-1].split(",")[1].split(" ")[-1].to_f, "End"].join("\t"))
jordan@6	49 return txt.join("\n")
jordan@6	50 end
jordan@6	51
jordan@6	52
jordan@6	53 # # # # PART 1: DOWNLOAD ALL THE STRUCTURAL ANALYSIS EVALUTION DATA PUBLISHED BY MIREX
jordan@6	54
jordan@6	55 # Define list of algorithms and datasets:
jordan@6	56 algos = ["SP1", "SMGA2", "MHRAF1", "SMGA1", "SBV1", "KSP2", "OYZS1", "KSP3", "KSP1"]
jordan@6	57 datasets = ["mrx09", "mrx10_1", "mrx10_2", "sal"]
jordan@6	58 year = "2012"
jordan@6	59 puts "Thanks for starting the script! Stay tuned for periodic updates."
jordan@6	60
jordan@6	61 # Create appropriate directory tree and download CSV files:
jordan@6	62 Dir.mkdir(mirex_path) unless File.directory?(mirex_path)
jordan@6	63 puts("Downloading CSV files...\n")
jordan@6	64 datasets.each do \|dset\|
jordan@6	65 # Make dataset directory:
jordan@6	66 dir_path = File.join(mirex_path,dset)
jordan@6	67 Dir.mkdir(dir_path) unless File.directory?(dir_path)
jordan@6	68 algos.each do \|algo\|
jordan@6	69 # Make algorithm directory:
jordan@6	70 algo_path = File.join(mirex_path,dset,algo)
jordan@6	71 Dir.mkdir(algo_path) unless File.directory?(algo_path)
jordan@6	72 # Download the CSV file to this directory:
jordan@6	73 algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv")
jordan@6	74 csv_path = File.join(("http://nema.lis.illinois.edu/nema_out/mirex"+year),"/results/struct",dset,algo,"per_track_results.csv")
jordan@6	75 url_download(csv_path, algocsvpath)
jordan@6	76 end
jordan@6	77 end
jordan@6	78
jordan@6	79 puts "..done with that."
jordan@6	80
jordan@6	81 puts "Now we will download all the files output by each algorithm. This could take a while depending on your connection."
jordan@6	82 puts "Since this script points to " + datasets.length.to_s + " datasets and " + algos.length.to_s + " algorithms, you should expect to wait however long it takes between each of the next lines to appear, times " + (datasets.length*algos.length).to_s + "."
jordan@6	83
jordan@6	84 # Read each CSV file and download all the json files it points to:
jordan@6	85 datasets.each do \|dset\|
jordan@6	86 algos.each do \|algo\|
jordan@6	87 puts( "Starting to download "+dset+ " dataset for " + algo + " algorithm...\n")
jordan@6	88 algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv")
jordan@6	89 csv_data = File.read(algocsvpath).split("\n")
jordan@6	90 header = csv_data.delete_at(0)
jordan@6	91 download_folder = File.join(mirex_path,dset,algo)
jordan@6	92 # For each line in the spreadsheet, extract the songid and download the corresponding json document.
jordan@6	93 csv_data.each do \|line\|
jordan@6	94 line = line.split(",")
jordan@6	95 song_id = line[1]
jordan@6	96 url = "http://nema.lis.illinois.edu/nema_out/mirex" + year + "/results/struct/" + dset + "/" + algo.downcase + "segments" + song_id.delete("_") + ".js"
jordan@6	97 download_path = File.join(download_folder,song_id + ".js")
jordan@6	98 # download_path = download_folder + "/" + song_id + ".js"
jordan@6	99 url_download(url, download_path)
jordan@6	100 end
jordan@6	101 end
jordan@6	102 puts("Done with " + dset + " dataset!\n")
jordan@6	103 end
jordan@6	104
jordan@6	105 puts "..done with that."
jordan@6	106
jordan@6	107 puts "Now, a much faster step: turning all the json files you downloaded into simpler text files."
jordan@6	108 # Scan for all the json files, and convert each one into two text files, one for the algorithm output, one for the annotation:
jordan@6	109 all_json_files = Dir.glob(File.join(mirex_path,"","","*.js"))
jordan@6	110 all_json_files.each do \|file\|
jordan@6	111 convert_file(file)
jordan@6	112 puts file
jordan@6	113 end
jordan@6	114
jordan@6	115 puts "..done with that."
jordan@6	116
jordan@6	117 puts "Now, PART 2 of the script: we download all the zip files (from various websites) that contain the public collections of ground truth files. This will only take a couple minutes, depending on connection speed (it's about 4 MB total)."
jordan@6	118
jordan@6	119
jordan@6	120 # # # # PART 2: GET (AND CONVERT) THE ANNOTATION DATA PUBLISHED BY OTHERS
jordan@6	121
jordan@6	122 # Download and unzip all public annotations
jordan@6	123 list_of_db_urls = ["https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-C-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-J-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-G-2001.CHORUS.zip", "http://www.music.mcgill.ca/~jordan/salami/releases/SALAMI_data_v1.2.zip", "http://www.ifs.tuwien.ac.at/mir/audiosegmentation/dl/ep_groundtruth_excl_Paulus.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_full.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2001.BLOCKS_v001.zip", "http://www.isophonics.net/files/annotations/The%20Beatles%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Carole%20King%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Queen%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Michael%20Jackson%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Zweieck%20Annotations.tar.gz", "http://www.cs.tut.fi/sgn/arg/paulus/beatles_sections_TUT.zip", "http://www.iua.upf.edu/~perfe/annotations/sections/beatles/structure_Beatles.rar"]
jordan@6	124
jordan@6	125 public_data_path = File.join(mirex_path,"public_data")
jordan@6	126 Dir.mkdir(public_data_path) unless File.directory?(public_data_path)
jordan@6	127 list_of_db_urls.each do \|db_url\|
jordan@6	128 open(File.join(public_data_path,File.basename(db_url)), 'wb') do \|foo\|
jordan@6	129 foo.print open(db_url).read
jordan@6	130 end
jordan@6	131 end
jordan@6	132
jordan@6	133 # # # # NOW, PLEASE EXIT THE SCRIPT, AND UNZIP ALL THOSE PACKAGES.
jordan@6	134 # # # # WHEN YOU'RE DONE, GO ONTO THE PARENT MATLAB FILE TO RUN THE ANALYSES.
jordan@6	135 puts "..done with that.\n\n"
jordan@6	136 puts "Script apppears to have ended successfully. All files were downloaded and saved to " + public_data_path +"."
jordan@6	137 puts "To continue please unpack all zip files, start MATLAB, and run 2-generate_smith2013_ismir.m. You can read more on README."
jordan@6	138 puts "Important: be sure that the zip files unpack into the correct file structure. Again, see README for details."

Mercurial > hg > mirex-meta-analysis

annotate 1_get_mirex_estimates.rb @ 6:e2337cd691b1 tip