Mercurial > hg > mirex-meta-analysis
comparison 1_get_mirex_estimates.rb @ 6:e2337cd691b1 tip
Finishing writing the matlab code to replicate all observations made in the article.
Added the article to the repository.
Renamed the two main scripts ("1-get_mirex_estimates.rb" and "2-generate_smith2013_ismir.m") to not have dashes (since this was annoying within Matlab)
Added new Michael Jackson figure.
author | Jordan Smith <jordan.smith@eecs.qmul.ac.uk> |
---|---|
date | Wed, 05 Mar 2014 01:02:26 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:8d896eec680e | 6:e2337cd691b1 |
---|---|
1 require "CSV" | |
2 require "open-uri" | |
3 # require "simplexml" | |
4 mirex_path = "/Users/jordan/Desktop/MIREX_data" # EDIT THIS TO BE YOUR OWN DESIRED PATH. | |
5 # IT WILL NEED TO HOLD ROUGHLY 70 MB OF DATA. | |
6 | |
7 | |
8 # tmp = File.open(filename,'w') | |
9 # tmptxt = [] | |
10 # open(uri) {|f| | |
11 # f.each_line {|line| tmptxt.push(line)} | |
12 # } | |
13 # tmp.write(tmptxt) | |
14 # tmp.close | |
15 # | |
16 | |
17 def url_download(uri, filename=".") | |
18 open(filename, 'w') do |foo| | |
19 foo.print open(uri).read | |
20 end | |
21 end | |
22 | |
23 def convert_file(filename) | |
24 ann_out_file = filename[0..-4] + "_gt.txt" | |
25 alg_out_file = filename[0..-4] + "_pred.txt" | |
26 ann_out = File.open(ann_out_file,'w') | |
27 alg_out = File.open(alg_out_file,'w') | |
28 text = File.open(filename,'r').readlines[1..-4].join("").split(/[\[\]]/) | |
29 text = File.open(filename,'r').readlines(sep=",").join("").split(/[\[\]]/) | |
30 ann = text[2].split(/[\{\}]/) | |
31 alg = text[4].split(/[\{\}]/) | |
32 ann_out.write(json_2_text(ann)) | |
33 alg_out.write(json_2_text(alg)) | |
34 ann_out.close | |
35 alg_out.close | |
36 end | |
37 | |
38 def json_2_text(json) | |
39 txt = [] | |
40 (1..json.length).step(2).to_a.each do |indx| | |
41 line = json[indx] | |
42 els = line.split(",") | |
43 # Make a LAB-style annotation (3-column): | |
44 # txt.push([els[0].split(" ")[-1].to_f, els[1].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t")) | |
45 # Make a TXT-style annotation (2-column): | |
46 txt.push([els[0].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t")) | |
47 end | |
48 txt.push([json[-1].split(",")[1].split(" ")[-1].to_f, "End"].join("\t")) | |
49 return txt.join("\n") | |
50 end | |
51 | |
52 | |
53 # # # # PART 1: DOWNLOAD ALL THE STRUCTURAL ANALYSIS EVALUTION DATA PUBLISHED BY MIREX | |
54 | |
55 # Define list of algorithms and datasets: | |
56 algos = ["SP1", "SMGA2", "MHRAF1", "SMGA1", "SBV1", "KSP2", "OYZS1", "KSP3", "KSP1"] | |
57 datasets = ["mrx09", "mrx10_1", "mrx10_2", "sal"] | |
58 year = "2012" | |
59 puts "Thanks for starting the script! Stay tuned for periodic updates." | |
60 | |
61 # Create appropriate directory tree and download CSV files: | |
62 Dir.mkdir(mirex_path) unless File.directory?(mirex_path) | |
63 puts("Downloading CSV files...\n") | |
64 datasets.each do |dset| | |
65 # Make dataset directory: | |
66 dir_path = File.join(mirex_path,dset) | |
67 Dir.mkdir(dir_path) unless File.directory?(dir_path) | |
68 algos.each do |algo| | |
69 # Make algorithm directory: | |
70 algo_path = File.join(mirex_path,dset,algo) | |
71 Dir.mkdir(algo_path) unless File.directory?(algo_path) | |
72 # Download the CSV file to this directory: | |
73 algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv") | |
74 csv_path = File.join(("http://nema.lis.illinois.edu/nema_out/mirex"+year),"/results/struct",dset,algo,"per_track_results.csv") | |
75 url_download(csv_path, algocsvpath) | |
76 end | |
77 end | |
78 | |
79 puts "..done with that." | |
80 | |
81 puts "Now we will download all the files output by each algorithm. This could take a while depending on your connection." | |
82 puts "Since this script points to " + datasets.length.to_s + " datasets and " + algos.length.to_s + " algorithms, you should expect to wait however long it takes between each of the next lines to appear, times " + (datasets.length*algos.length).to_s + "." | |
83 | |
84 # Read each CSV file and download all the json files it points to: | |
85 datasets.each do |dset| | |
86 algos.each do |algo| | |
87 puts( "Starting to download "+dset+ " dataset for " + algo + " algorithm...\n") | |
88 algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv") | |
89 csv_data = File.read(algocsvpath).split("\n") | |
90 header = csv_data.delete_at(0) | |
91 download_folder = File.join(mirex_path,dset,algo) | |
92 # For each line in the spreadsheet, extract the songid and download the corresponding json document. | |
93 csv_data.each do |line| | |
94 line = line.split(",") | |
95 song_id = line[1] | |
96 url = "http://nema.lis.illinois.edu/nema_out/mirex" + year + "/results/struct/" + dset + "/" + algo.downcase + "segments" + song_id.delete("_") + ".js" | |
97 download_path = File.join(download_folder,song_id + ".js") | |
98 # download_path = download_folder + "/" + song_id + ".js" | |
99 url_download(url, download_path) | |
100 end | |
101 end | |
102 puts("Done with " + dset + " dataset!\n") | |
103 end | |
104 | |
105 puts "..done with that." | |
106 | |
107 puts "Now, a much faster step: turning all the json files you downloaded into simpler text files." | |
108 # Scan for all the json files, and convert each one into two text files, one for the algorithm output, one for the annotation: | |
109 all_json_files = Dir.glob(File.join(mirex_path,"*","*","*.js")) | |
110 all_json_files.each do |file| | |
111 convert_file(file) | |
112 puts file | |
113 end | |
114 | |
115 puts "..done with that." | |
116 | |
117 puts "Now, PART 2 of the script: we download all the zip files (from various websites) that contain the public collections of ground truth files. This will only take a couple minutes, depending on connection speed (it's about 4 MB total)." | |
118 | |
119 | |
120 # # # # PART 2: GET (AND CONVERT) THE ANNOTATION DATA PUBLISHED BY OTHERS | |
121 | |
122 # Download and unzip all public annotations | |
123 list_of_db_urls = ["https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-C-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-J-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-G-2001.CHORUS.zip", "http://www.music.mcgill.ca/~jordan/salami/releases/SALAMI_data_v1.2.zip", "http://www.ifs.tuwien.ac.at/mir/audiosegmentation/dl/ep_groundtruth_excl_Paulus.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_full.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2001.BLOCKS_v001.zip", "http://www.isophonics.net/files/annotations/The%20Beatles%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Carole%20King%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Queen%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Michael%20Jackson%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Zweieck%20Annotations.tar.gz", "http://www.cs.tut.fi/sgn/arg/paulus/beatles_sections_TUT.zip", "http://www.iua.upf.edu/~perfe/annotations/sections/beatles/structure_Beatles.rar"] | |
124 | |
125 public_data_path = File.join(mirex_path,"public_data") | |
126 Dir.mkdir(public_data_path) unless File.directory?(public_data_path) | |
127 list_of_db_urls.each do |db_url| | |
128 open(File.join(public_data_path,File.basename(db_url)), 'wb') do |foo| | |
129 foo.print open(db_url).read | |
130 end | |
131 end | |
132 | |
133 # # # # NOW, PLEASE EXIT THE SCRIPT, AND UNZIP ALL THOSE PACKAGES. | |
134 # # # # WHEN YOU'RE DONE, GO ONTO THE PARENT MATLAB FILE TO RUN THE ANALYSES. | |
135 puts "..done with that.\n\n" | |
136 puts "Script apppears to have ended successfully. All files were downloaded and saved to " + public_data_path +"." | |
137 puts "To continue please unpack all zip files, start MATLAB, and run 2-generate_smith2013_ismir.m. You can read more on README." | |
138 puts "Important: be sure that the zip files unpack into the correct file structure. Again, see README for details." |