comparison runner/JsonLDFeatureWriter.cpp @ 223:f4315a0ade89 json-ld

added JSON-LD feature writer files
author alo
date Mon, 22 Feb 2016 14:27:19 +0000
parents
children c5fc82b8caab
comparison
equal deleted inserted replaced
222:6153429ebf89 223:f4315a0ade89
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Sonic Annotator
5 A utility for batch feature extraction from audio files.
6 Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
7 Copyright 2007-2014 QMUL.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "JsonLDFeatureWriter.h"
17
18 using namespace std;
19 using Vamp::Plugin;
20 using Vamp::PluginBase;
21
22 #include "base/Exceptions.h"
23 #include "rdf/PluginRDFIndexer.h"
24
25 #include <QFileInfo>
26 #include <QTextCodec>
27 #include <QUuid>
28
29 #include "version.h"
30
31 JsonLDFeatureWriter::JsonLDFeatureWriter() :
32 FileFeatureWriter(SupportOneFilePerTrackTransform |
33 SupportOneFilePerTrack |
34 SupportOneFileTotal |
35 SupportStdOut,
36 "json"),
37 m_network(false),
38 m_networkRetrieved(false),
39 m_n(1),
40 m_m(1),
41 m_digits(6)
42 {
43 }
44
45 JsonLDFeatureWriter::~JsonLDFeatureWriter()
46 {
47 }
48
49 string
50 JsonLDFeatureWriter::getDescription() const
51 {
52 return "Write features to JSON files in JSON-LD format. WARNING: This is a provisional implementation! The output format may change in future releases to comply more effectively with the specification. Please report any problems you find with the current implementation.";
53 }
54
55 JsonLDFeatureWriter::ParameterList
56 JsonLDFeatureWriter::getSupportedParameters() const
57 {
58 ParameterList pl = FileFeatureWriter::getSupportedParameters();
59 Parameter p;
60
61 p.name = "digits";
62 p.description = "Specify the number of significant digits to use when printing transform outputs. Outputs are represented internally using single-precision floating-point, so digits beyond the 8th or 9th place are usually meaningless. The default is 6.";
63 p.hasArg = true;
64 pl.push_back(p);
65
66 p.name = "network";
67 p.description = "Attempt to retrieve RDF descriptions of plugins from network, if not available locally.";
68 p.hasArg = false;
69 pl.push_back(p);
70
71 return pl;
72 }
73
74 void
75 JsonLDFeatureWriter::setParameters(map<string, string> &params)
76 {
77 FileFeatureWriter::setParameters(params);
78
79 for (map<string, string>::iterator i = params.begin();
80 i != params.end(); ++i) {
81 if (i->first == "network") {
82 m_network = true;
83 } else if (i->first == "digits") {
84 int digits = atoi(i->second.c_str());
85 if (digits <= 0 || digits > 100) {
86 cerr << "JsonLDFeatureWriter: ERROR: Invalid or out-of-range value for number of significant digits: " << i->second << endl;
87 cerr << "JsonLDFeatureWriter: NOTE: Continuing with default settings" << endl;
88 } else {
89 m_digits = digits;
90 }
91 }
92 }
93 }
94
95 void
96 JsonLDFeatureWriter::setTrackMetadata(QString trackId, TrackMetadata metadata)
97 {
98 m_trackMetadata[trackId] = metadata;
99 }
100
101 static double
102 realTime2Sec(const Vamp::RealTime &r)
103 {
104 return r / Vamp::RealTime(1, 0);
105 }
106
107 void
108 JsonLDFeatureWriter::write(QString trackId,
109 const Transform &transform,
110 const Plugin::OutputDescriptor& ,
111 const Plugin::FeatureList& features,
112 std::string /* summaryType */)
113 {
114 QString transformId = transform.getIdentifier();
115
116 QTextStream *sptr = getOutputStream
117 (trackId, transformId, QTextCodec::codecForName("UTF-8"));
118 if (!sptr) {
119 throw FailedToOpenOutputStream(trackId, transformId);
120 }
121
122 DataId did(trackId, transform);
123
124 if (m_data.find(did) == m_data.end()) {
125 identifyTask(transform);
126 m_streamTracks[sptr].insert(trackId);
127 m_streamTasks[sptr].insert(m_tasks[transformId]);
128 m_streamData[sptr].insert(did);
129 }
130
131 if (m_trackTimelineGuids.find(trackId) == m_trackTimelineGuids.end()) {
132 QUuid uuid = QUuid::createUuid();
133 m_trackTimelineGuids[trackId] = QString(uuid.toString().replace("{", "").replace("}", ""));
134 }
135
136 QString d = m_data[did];
137
138 for (int i = 0; i < int(features.size()); ++i) {
139
140 if (d != "") {
141 d += ",\n";
142 }
143
144 d += "\t\t\t{ ";
145
146 Plugin::Feature f(features[i]);
147
148 QString timestr = f.timestamp.toString().c_str();
149 timestr.replace(QRegExp("^ +"), "");
150
151 QString durstr = "0.0";
152 if (f.hasDuration) {
153 durstr = f.duration.toString().c_str();
154 durstr.replace(QRegExp("^ +"), "");
155 d += " \"@type\": \"tl:Interval\", ";
156 }
157 else{
158 d += " \"@type\": \"tl:Instant\", ";
159 }
160
161 d += QString("\"tl:at\": %1 ")
162 .arg(timestr);
163
164 d += QString(", \"tl:timeline\": \"%1\" ")
165 .arg(m_trackTimelineGuids[trackId]);
166
167 if (f.hasDuration) {
168 d += QString(", \"tl:duration\": %2")
169 .arg(durstr);
170 }
171
172 if (f.label != "") {
173 if (f.values.empty()) {
174 d += QString(", \"afo:value\": \"%2\"").arg(f.label.c_str());
175 } else {
176 d += QString(", \"rdfs:label\": \"%2\"").arg(f.label.c_str());
177 }
178 }
179
180 if (!f.values.empty()) {
181 d += QString(", \"afo:value\": ");
182 if (f.values.size() > 1) {
183 d += "[ ";
184 }
185 for (int j = 0; j < int(f.values.size()); ++j) {
186 if (isnan(f.values[j])) {
187 d += "\"NaN\"";
188 } else if (isinf(f.values[j])) {
189 d += "\"Inf\"";
190 } else {
191 d += QString("%1").arg(f.values[j], 0, 'g', m_digits);
192 }
193 if (j + 1 < int(f.values.size())) {
194 d += ", ";
195 }
196 }
197 if (f.values.size() > 1) {
198 d += " ]";
199 }
200 }
201
202 d += " }";
203 }
204
205 m_data[did] = d;
206 }
207
208 void
209 JsonLDFeatureWriter::setNofM(int n, int m)
210 {
211 if (m_singleFileName != "" || m_stdout) {
212 m_n = n;
213 m_m = m;
214 } else {
215 m_n = 1;
216 m_m = 1;
217 }
218 }
219
220 void
221 JsonLDFeatureWriter::finish()
222 {
223 for (FileStreamMap::const_iterator stri = m_streams.begin();
224 stri != m_streams.end(); ++stri) {
225
226 QTextStream *sptr = stri->second;
227 QTextStream &stream = *sptr;
228
229 bool firstInStream = true;
230
231 for (TrackIds::const_iterator tri = m_streamTracks[sptr].begin();
232 tri != m_streamTracks[sptr].end(); ++tri) {
233
234 TrackId trackId = *tri;
235
236 if (firstInStream) {
237 if (m_streamTracks[sptr].size() > 1 || (m_m > 1 && m_n == 1)) {
238 stream << "[\n";
239 }
240 }
241
242 if (!firstInStream || (m_m > 1 && m_n > 1)) {
243 stream << ",\n";
244 }
245
246 stream << "{\n" << writeContext();
247 stream << "\t\"@type\": \"mo:Track\",\n"
248 << QString("\t\"mo:available_as\": \"%1\"").arg(QFileInfo(trackId).filePath());
249
250 if (m_trackMetadata.find(trackId) != m_trackMetadata.end()) {
251
252 if (m_trackMetadata[trackId].title != "") {
253 stream << QString(",\n\t\"dc:title\": \"%1\"")
254 .arg(m_trackMetadata[trackId].title);
255 }
256
257 if (m_trackMetadata[trackId].maker != "") {
258 stream << QString(",\n\t\"mo:artist\": { "
259 "\t\t\"@type\": \"mo:MusicArtist\",\n"
260 "\t\t\"foaf:name\": \"%1\" "
261 "\t}")
262 .arg(m_trackMetadata[trackId].maker);
263 }
264
265 QString durstr = m_trackMetadata[trackId].duration.toString().c_str();
266 durstr.replace(QRegExp("^ +"), "");
267 stream << QString(",\n\t\"mo:encodes\": {\n"
268 "\t\t\"@type\": \"mo:Signal\",\n"
269 "\t\t\"mo:time\": {\n "
270 "\t\t\t\"@type\": \"tl:Interval\",\n"
271 "\t\t\t\"tl:duration\": \"PT%1S\",\n"
272 "\t\t\t\"tl:timeline\": { \"@type\": \"tl:Timeline\", \"@id\": \"%2\" } "
273 "\n\t\t}").arg(durstr).arg(m_trackTimelineGuids[trackId]);
274 }
275
276 stream << "\n\t},\n";
277 stream << "\t\"afo:features\": [\n";
278
279 bool firstInTrack = true;
280
281 for (Tasks::const_iterator ti = m_streamTasks[sptr].begin();
282 ti != m_streamTasks[sptr].end(); ++ti) {
283
284 Task task = *ti;
285
286 for (DataIds::const_iterator di = m_streamData[sptr].begin();
287 di != m_streamData[sptr].end(); ++di) {
288
289 DataId did = *di;
290
291 QString trackId = did.first;
292 Transform transform = did.second;
293
294 if (m_tasks[transform.getIdentifier()] != task) continue;
295
296 QString data = m_data[did];
297
298 if (!firstInTrack) {
299 stream << ",\n";
300 }
301
302 stream << QString
303 ("\t{\n"
304 "\t\t\"@type\": \"afv:%1\",\n"
305 "\t\t\"afo:computed_by\": {\n"
306 "%2\t\t},\n"
307 "\t\t\"afo:values\": [\n")
308 .arg(transform.getOutput().replace(0, 1, transform.getOutput().at(0).toUpper()))
309 .arg(writeTransformToObjectContents(transform));
310
311 stream << data;
312
313 stream << "\n\t\t]\n\t}";
314 firstInTrack = false;
315 }
316 }
317
318 stream << "\n\t]";
319
320 stream << "\n}";
321 firstInStream = false;
322 }
323
324 if (!firstInStream) {
325 if (m_streamTracks[sptr].size() > 1 || (m_m > 1 && m_n == m_m)) {
326 stream << "\n\t]";
327 }
328 stream << "\n";
329 }
330 }
331
332 m_streamTracks.clear();
333 m_streamTasks.clear();
334 m_streamData.clear();
335 m_data.clear();
336
337 FileFeatureWriter::finish();
338 }
339
340 void
341 JsonLDFeatureWriter::loadRDFDescription(const Transform &transform)
342 {
343 QString pluginId = transform.getPluginIdentifier();
344 if (m_rdfDescriptions.find(pluginId) != m_rdfDescriptions.end()) return;
345
346 if (m_network && !m_networkRetrieved) {
347 PluginRDFIndexer::getInstance()->indexConfiguredURLs();
348 m_networkRetrieved = true;
349 }
350
351 m_rdfDescriptions[pluginId] = PluginRDFDescription(pluginId);
352
353 if (m_rdfDescriptions[pluginId].haveDescription()) {
354 cerr << "NOTE: Have RDF description for plugin ID \""
355 << pluginId << "\"" << endl;
356 } else {
357 cerr << "NOTE: No RDF description for plugin ID \""
358 << pluginId << "\"" << endl;
359 if (!m_network) {
360 cerr << " Consider using the --jams-network option to retrieve plugin descriptions" << endl;
361 cerr << " from the network where possible." << endl;
362 }
363 }
364 }
365
366 void
367 JsonLDFeatureWriter::identifyTask(const Transform &transform)
368 {
369 QString transformId = transform.getIdentifier();
370 if (m_tasks.find(transformId) != m_tasks.end()) return;
371
372 loadRDFDescription(transform);
373
374 Task task = UnknownTask;
375
376 QString pluginId = transform.getPluginIdentifier();
377 QString outputId = transform.getOutput();
378
379 const PluginRDFDescription &desc = m_rdfDescriptions[pluginId];
380
381 if (desc.haveDescription()) {
382
383 PluginRDFDescription::OutputDisposition disp =
384 desc.getOutputDisposition(outputId);
385
386 QString af = "http://purl.org/ontology/af/";
387
388 if (disp == PluginRDFDescription::OutputSparse) {
389
390 QString eventUri = desc.getOutputEventTypeURI(outputId);
391
392 //!!! todo: allow user to prod writer for task type
393
394 if (eventUri == af + "Note") {
395 task = NoteTask;
396 } else if (eventUri == af + "Beat") {
397 task = BeatTask;
398 } else if (eventUri == af + "ChordSegment") {
399 task = ChordTask;
400 } else if (eventUri == af + "KeyChange") {
401 task = KeyTask;
402 } else if (eventUri == af + "KeySegment") {
403 task = KeyTask;
404 } else if (eventUri == af + "Onset") {
405 task = OnsetTask;
406 } else if (eventUri == af + "NonTonalOnset") {
407 task = OnsetTask;
408 } else if (eventUri == af + "Segment") {
409 task = SegmentTask;
410 } else if (eventUri == af + "SpeechSegment") {
411 task = SegmentTask;
412 } else if (eventUri == af + "StructuralSegment") {
413 task = SegmentTask;
414 } else {
415 cerr << "WARNING: Unsupported event type URI <"
416 << eventUri << ">, proceeding with UnknownTask type"
417 << endl;
418 }
419
420 } else {
421
422 cerr << "WARNING: Cannot currently write dense or track-level outputs to JAMS format (only sparse ones). Will proceed using UnknownTask type, but this probably isn't going to work" << endl;
423 }
424 }
425
426 m_tasks[transformId] = task;
427 }
428
429 QString
430 JsonLDFeatureWriter::getTaskKey(Task task)
431 {
432 switch (task) {
433 case UnknownTask: return "unknown";
434 case BeatTask: return "beat";
435 case OnsetTask: return "onset";
436 case ChordTask: return "chord";
437 case SegmentTask: return "segment";
438 case KeyTask: return "key";
439 case NoteTask: return "note";
440 case MelodyTask: return "melody";
441 case PitchTask: return "pitch";
442 }
443 return "unknown";
444 }
445
446 QString
447 JsonLDFeatureWriter::writeTransformToObjectContents(const Transform &t)
448 {
449 QString json;
450 QString stpl("\t\t\t\"%1\": \"%2\",\n");
451 QString ntpl("\t\t\t\"%1\": %2,\n");
452
453 json += stpl.arg("@type").arg("vamp:Transform");
454 json += stpl.arg("vamp:plugin_id").arg(t.getPluginIdentifier());
455 json += stpl.arg("vamp:output_id").arg(t.getOutput());
456
457 if (t.getSummaryType() != Transform::NoSummary) {
458 json += stpl.arg("vamp:summary_type")
459 .arg(Transform::summaryTypeToString(t.getSummaryType()));
460 }
461
462 if (t.getPluginVersion() != QString()) {
463 json += stpl.arg("vamp:plugin_version").arg(t.getPluginVersion());
464 }
465
466 if (t.getProgram() != QString()) {
467 json += stpl.arg("vamp:program").arg(t.getProgram());
468 }
469
470 if (t.getStepSize() != 0) {
471 json += ntpl.arg("vamp:step_size").arg(t.getStepSize());
472 }
473
474 if (t.getBlockSize() != 0) {
475 json += ntpl.arg("vamp:block_size").arg(t.getBlockSize());
476 }
477
478 if (t.getWindowType() != HanningWindow) {
479 json += stpl.arg("vamp:window_type")
480 .arg(Window<float>::getNameForType(t.getWindowType()).c_str());
481 }
482
483 if (t.getStartTime() != RealTime::zeroTime) {
484 json += ntpl.arg("tl:start")
485 .arg(t.getStartTime().toDouble(), 0, 'g', 9);
486 }
487
488 if (t.getDuration() != RealTime::zeroTime) {
489 json += ntpl.arg("tl:duration")
490 .arg(t.getDuration().toDouble(), 0, 'g', 9);
491 }
492
493 if (t.getSampleRate() != 0) {
494 json += ntpl.arg("vamp:sample_rate").arg(t.getSampleRate());
495 }
496
497 if (!t.getParameters().empty()) {
498 json += QString("\t\t\t\"vamp:parameter_binding\": [\n");
499 Transform::ParameterMap parameters = t.getParameters();
500 for (Transform::ParameterMap::const_iterator i = parameters.begin();
501 i != parameters.end(); ++i) {
502 if (i != parameters.begin()) {
503 json += ",\n";
504 }
505 QString name = i->first;
506 float value = i->second;
507 json += QString("\t\t\t\t{\n");
508 json += QString("\t\t\t\t\t\"@type\": \"vamp:Parameter\",\n");
509 json += QString("\t\t\t\t\t\"vamp:identifier\": \"%1\",\n").arg(name);
510 json += QString("\t\t\t\t\t\"vamp:value\": %1").arg(value, 0, 'g', 8);
511 json += QString("\n\t\t\t\t}");
512 }
513 json += QString("\n\t\t\t],\n");
514 }
515
516 // no trailing comma on final property:
517 json += QString("\t\t\t\"vamp:transform_id\": \"%1\",\n").arg(t.getIdentifier());
518 json += QString("\t\t\t\"afo:implemented_in\": {\n");
519 json += QString("\t\t\t\t\"@type\": \"afo:SoftwareAgent\",\n");
520 json += QString("\t\t\t\t\"afo:name\": \"Sonic Annotator\",\n");
521 json += QString("\t\t\t\t\"afo:version\": \"%1\" \n").arg(RUNNER_VERSION);
522 json += QString("\t\t\t}\n");
523
524 return json;
525 }
526
527 QString
528 JsonLDFeatureWriter::writeContext() {
529 QString context;
530 context += QString("\t\"@context\": {\n");
531 context += QString("\t\t\"foaf\": \"http://xmlns.com/foaf/0.1/\",\n");
532 context += QString("\t\t\"afo\": \"http://sovarr.c4dm.eecs.qmul.ac.uk/af/ontology/1.1#\",\n");
533 context += QString("\t\t\"afv\": \"http://sovarr.c4dm.eecs.qmul.ac.uk/af/vocabulary/1.1#\",\n");
534 context += QString("\t\t\"mo\": \"http://purl.org/ontology/mo/\",\n");
535 context += QString("\t\t\"dc\": \"http://purl.org/dc/elements/1.1/\",\n");
536 context += QString("\t\t\"tl\": \"http://purl.org/NET/c4dm/timeline.owl#\",\n");
537 context += QString("\t\t\"vamp\": \"http://purl.org/ontology/vamp/\"\n");
538 context += QString("\t},\n");
539 return context;
540 }