comparison 1_get_mirex_estimates.rb @ 6:e2337cd691b1 tip

Finishing writing the matlab code to replicate all observations made in the article. Added the article to the repository. Renamed the two main scripts ("1-get_mirex_estimates.rb" and "2-generate_smith2013_ismir.m") to not have dashes (since this was annoying within Matlab) Added new Michael Jackson figure.
author Jordan Smith <jordan.smith@eecs.qmul.ac.uk>
date Wed, 05 Mar 2014 01:02:26 +0000
parents
children
comparison
equal deleted inserted replaced
5:8d896eec680e 6:e2337cd691b1
1 require "CSV"
2 require "open-uri"
3 # require "simplexml"
4 mirex_path = "/Users/jordan/Desktop/MIREX_data" # EDIT THIS TO BE YOUR OWN DESIRED PATH.
5 # IT WILL NEED TO HOLD ROUGHLY 70 MB OF DATA.
6
7
8 # tmp = File.open(filename,'w')
9 # tmptxt = []
10 # open(uri) {|f|
11 # f.each_line {|line| tmptxt.push(line)}
12 # }
13 # tmp.write(tmptxt)
14 # tmp.close
15 #
16
17 def url_download(uri, filename=".")
18 open(filename, 'w') do |foo|
19 foo.print open(uri).read
20 end
21 end
22
23 def convert_file(filename)
24 ann_out_file = filename[0..-4] + "_gt.txt"
25 alg_out_file = filename[0..-4] + "_pred.txt"
26 ann_out = File.open(ann_out_file,'w')
27 alg_out = File.open(alg_out_file,'w')
28 text = File.open(filename,'r').readlines[1..-4].join("").split(/[\[\]]/)
29 text = File.open(filename,'r').readlines(sep=",").join("").split(/[\[\]]/)
30 ann = text[2].split(/[\{\}]/)
31 alg = text[4].split(/[\{\}]/)
32 ann_out.write(json_2_text(ann))
33 alg_out.write(json_2_text(alg))
34 ann_out.close
35 alg_out.close
36 end
37
38 def json_2_text(json)
39 txt = []
40 (1..json.length).step(2).to_a.each do |indx|
41 line = json[indx]
42 els = line.split(",")
43 # Make a LAB-style annotation (3-column):
44 # txt.push([els[0].split(" ")[-1].to_f, els[1].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t"))
45 # Make a TXT-style annotation (2-column):
46 txt.push([els[0].split(" ")[-1].to_f, els[2].split("\"")[-1]].join("\t"))
47 end
48 txt.push([json[-1].split(",")[1].split(" ")[-1].to_f, "End"].join("\t"))
49 return txt.join("\n")
50 end
51
52
53 # # # # PART 1: DOWNLOAD ALL THE STRUCTURAL ANALYSIS EVALUTION DATA PUBLISHED BY MIREX
54
55 # Define list of algorithms and datasets:
56 algos = ["SP1", "SMGA2", "MHRAF1", "SMGA1", "SBV1", "KSP2", "OYZS1", "KSP3", "KSP1"]
57 datasets = ["mrx09", "mrx10_1", "mrx10_2", "sal"]
58 year = "2012"
59 puts "Thanks for starting the script! Stay tuned for periodic updates."
60
61 # Create appropriate directory tree and download CSV files:
62 Dir.mkdir(mirex_path) unless File.directory?(mirex_path)
63 puts("Downloading CSV files...\n")
64 datasets.each do |dset|
65 # Make dataset directory:
66 dir_path = File.join(mirex_path,dset)
67 Dir.mkdir(dir_path) unless File.directory?(dir_path)
68 algos.each do |algo|
69 # Make algorithm directory:
70 algo_path = File.join(mirex_path,dset,algo)
71 Dir.mkdir(algo_path) unless File.directory?(algo_path)
72 # Download the CSV file to this directory:
73 algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv")
74 csv_path = File.join(("http://nema.lis.illinois.edu/nema_out/mirex"+year),"/results/struct",dset,algo,"per_track_results.csv")
75 url_download(csv_path, algocsvpath)
76 end
77 end
78
79 puts "..done with that."
80
81 puts "Now we will download all the files output by each algorithm. This could take a while depending on your connection."
82 puts "Since this script points to " + datasets.length.to_s + " datasets and " + algos.length.to_s + " algorithms, you should expect to wait however long it takes between each of the next lines to appear, times " + (datasets.length*algos.length).to_s + "."
83
84 # Read each CSV file and download all the json files it points to:
85 datasets.each do |dset|
86 algos.each do |algo|
87 puts( "Starting to download "+dset+ " dataset for " + algo + " algorithm...\n")
88 algocsvpath = File.join(mirex_path,dset,algo,"per_track_results.csv")
89 csv_data = File.read(algocsvpath).split("\n")
90 header = csv_data.delete_at(0)
91 download_folder = File.join(mirex_path,dset,algo)
92 # For each line in the spreadsheet, extract the songid and download the corresponding json document.
93 csv_data.each do |line|
94 line = line.split(",")
95 song_id = line[1]
96 url = "http://nema.lis.illinois.edu/nema_out/mirex" + year + "/results/struct/" + dset + "/" + algo.downcase + "segments" + song_id.delete("_") + ".js"
97 download_path = File.join(download_folder,song_id + ".js")
98 # download_path = download_folder + "/" + song_id + ".js"
99 url_download(url, download_path)
100 end
101 end
102 puts("Done with " + dset + " dataset!\n")
103 end
104
105 puts "..done with that."
106
107 puts "Now, a much faster step: turning all the json files you downloaded into simpler text files."
108 # Scan for all the json files, and convert each one into two text files, one for the algorithm output, one for the annotation:
109 all_json_files = Dir.glob(File.join(mirex_path,"*","*","*.js"))
110 all_json_files.each do |file|
111 convert_file(file)
112 puts file
113 end
114
115 puts "..done with that."
116
117 puts "Now, PART 2 of the script: we download all the zip files (from various websites) that contain the public collections of ground truth files. This will only take a couple minutes, depending on connection speed (it's about 4 MB total)."
118
119
120 # # # # PART 2: GET (AND CONVERT) THE ANNOTATION DATA PUBLISHED BY OTHERS
121
122 # Download and unzip all public annotations
123 list_of_db_urls = ["https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-P-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-C-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-J-2001.CHORUS.zip", "https://staff.aist.go.jp/m.goto/RWC-MDB/AIST-Annotation/AIST.RWC-MDB-G-2001.CHORUS.zip", "http://www.music.mcgill.ca/~jordan/salami/releases/SALAMI_data_v1.2.zip", "http://www.ifs.tuwien.ac.at/mir/audiosegmentation/dl/ep_groundtruth_excl_Paulus.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_full.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced.zip", "http://musicdata.gforge.inria.fr/IRISA.RWC-MDB-P-2001.BLOCKS_v001.zip", "http://www.isophonics.net/files/annotations/The%20Beatles%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Carole%20King%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Queen%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Michael%20Jackson%20Annotations.tar.gz", "http://www.isophonics.net/files/annotations/Zweieck%20Annotations.tar.gz", "http://www.cs.tut.fi/sgn/arg/paulus/beatles_sections_TUT.zip", "http://www.iua.upf.edu/~perfe/annotations/sections/beatles/structure_Beatles.rar"]
124
125 public_data_path = File.join(mirex_path,"public_data")
126 Dir.mkdir(public_data_path) unless File.directory?(public_data_path)
127 list_of_db_urls.each do |db_url|
128 open(File.join(public_data_path,File.basename(db_url)), 'wb') do |foo|
129 foo.print open(db_url).read
130 end
131 end
132
133 # # # # NOW, PLEASE EXIT THE SCRIPT, AND UNZIP ALL THOSE PACKAGES.
134 # # # # WHEN YOU'RE DONE, GO ONTO THE PARENT MATLAB FILE TO RUN THE ANALYSES.
135 puts "..done with that.\n\n"
136 puts "Script apppears to have ended successfully. All files were downloaded and saved to " + public_data_path +"."
137 puts "To continue please unpack all zip files, start MATLAB, and run 2-generate_smith2013_ismir.m. You can read more on README."
138 puts "Important: be sure that the zip files unpack into the correct file structure. Again, see README for details."