comparison transform/FeatureExtractionModelTransformer.cpp @ 441:288f45533041

* Add region model and layer; improve assignment of model types to feature extraction transforms with duration
author Chris Cannam
date Thu, 18 Sep 2008 16:08:14 +0000
parents cff476cfce77
children 55cff2c6e4a0
comparison
equal deleted inserted replaced
440:5746c559af15 441:288f45533041
25 #include "data/model/SparseOneDimensionalModel.h" 25 #include "data/model/SparseOneDimensionalModel.h"
26 #include "data/model/SparseTimeValueModel.h" 26 #include "data/model/SparseTimeValueModel.h"
27 #include "data/model/EditableDenseThreeDimensionalModel.h" 27 #include "data/model/EditableDenseThreeDimensionalModel.h"
28 #include "data/model/DenseTimeValueModel.h" 28 #include "data/model/DenseTimeValueModel.h"
29 #include "data/model/NoteModel.h" 29 #include "data/model/NoteModel.h"
30 #include "data/model/RegionModel.h"
30 #include "data/model/FFTModel.h" 31 #include "data/model/FFTModel.h"
31 #include "data/model/WaveFileModel.h" 32 #include "data/model/WaveFileModel.h"
32 33
33 #include "TransformFactory.h" 34 #include "TransformFactory.h"
34 35
152 for (size_t i = 0; i < outputs.size(); ++i) { 153 for (size_t i = 0; i < outputs.size(); ++i) {
153 // std::cerr << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput().toStdString() << "\"" << std::endl; 154 // std::cerr << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput().toStdString() << "\"" << std::endl;
154 if (m_transform.getOutput() == "" || 155 if (m_transform.getOutput() == "" ||
155 outputs[i].identifier == m_transform.getOutput().toStdString()) { 156 outputs[i].identifier == m_transform.getOutput().toStdString()) {
156 m_outputFeatureNo = i; 157 m_outputFeatureNo = i;
157 m_descriptor = new Vamp::Plugin::OutputDescriptor 158 m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]);
158 (outputs[i]);
159 break; 159 break;
160 } 160 }
161 } 161 }
162 162
163 if (!m_descriptor) { 163 if (!m_descriptor) {
205 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: 205 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
206 modelRate = size_t(m_descriptor->sampleRate + 0.001); 206 modelRate = size_t(m_descriptor->sampleRate + 0.001);
207 break; 207 break;
208 } 208 }
209 209
210 if (binCount == 0) { 210 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
211
212 if (binCount == 0 &&
213 (preDurationPlugin || !m_descriptor->hasDuration)) {
211 214
212 m_output = new SparseOneDimensionalModel(modelRate, modelResolution, 215 m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
213 false); 216 false);
214 217
215 } else if (binCount == 1) { 218 } else if ((preDurationPlugin && binCount > 1 &&
219 (m_descriptor->sampleType ==
220 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
221 (!preDurationPlugin && m_descriptor->hasDuration)) {
222
223 // For plugins using the old v1 API without explicit duration,
224 // we treat anything that has multiple bins (i.e. that has the
225 // potential to have value and duration) and a variable sample
226 // rate as a note model, taking its values as pitch, duration
227 // and velocity (if present) respectively. This is the same
228 // behaviour as always applied by SV to these plugins in the
229 // past.
230
231 // For plugins with the newer API, we treat anything with
232 // duration as either a note model with pitch and velocity, or
233 // a region model.
234
235 // How do we know whether it's an interval or note model?
236 // What's the essential difference? Is a note model any
237 // interval model using a Hz or "MIDI pitch" scale? There
238 // isn't really a reliable test for "MIDI pitch"... Does a
239 // note model always have velocity? This is a good question
240 // to be addressed by accompanying RDF, but for the moment we
241 // will do the following...
242
243 bool isNoteModel = false;
244
245 // Regions have only value (and duration -- we can't extract a
246 // region model from an old-style plugin that doesn't support
247 // duration)
248 if (binCount > 1) isNoteModel = true;
249
250 // Regions do not have units of Hz (a sweeping assumption!)
251 if (m_descriptor->unit == "Hz") isNoteModel = true;
252
253 // If we had a "sparse 3D model", we would have the additional
254 // problem of determining whether to use that here (if bin
255 // count > 1). But we don't.
256
257 if (isNoteModel) {
258
259 NoteModel *model;
260 if (haveExtents) {
261 model = new NoteModel
262 (modelRate, modelResolution, minValue, maxValue, false);
263 } else {
264 model = new NoteModel
265 (modelRate, modelResolution, false);
266 }
267 model->setScaleUnits(m_descriptor->unit.c_str());
268 m_output = model;
269
270 } else {
271
272 RegionModel *model;
273 if (haveExtents) {
274 model = new RegionModel
275 (modelRate, modelResolution, minValue, maxValue, false);
276 } else {
277 model = new RegionModel
278 (modelRate, modelResolution, false);
279 }
280 model->setScaleUnits(m_descriptor->unit.c_str());
281 m_output = model;
282 }
283
284 } else if (binCount == 1 ||
285 (m_descriptor->sampleType ==
286 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
287
288 // Anything that is not a 1D, note, or interval model and that
289 // has only one value per result must be a sparse time value
290 // model.
291
292 // Anything that is not a 1D, note, or interval model and that
293 // has a variable sample rate is also treated as a sparse time
294 // value model regardless of its bin count, because we lack a
295 // sparse 3D model.
216 296
217 SparseTimeValueModel *model; 297 SparseTimeValueModel *model;
218 if (haveExtents) { 298 if (haveExtents) {
219 model = new SparseTimeValueModel 299 model = new SparseTimeValueModel
220 (modelRate, modelResolution, minValue, maxValue, false); 300 (modelRate, modelResolution, minValue, maxValue, false);
224 } 304 }
225 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); 305 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
226 306
227 m_output = model; 307 m_output = model;
228 308
229 } else if (m_descriptor->sampleType ==
230 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
231
232 // We don't have a sparse 3D model, so interpret this as a
233 // note model. There's nothing to define which values to use
234 // as which parameters of the note -- for the moment let's
235 // treat the first as pitch, second as duration in frames,
236 // third (if present) as velocity. (Our note model doesn't
237 // yet store velocity.)
238 //!!! todo: ask the user!
239
240 NoteModel *model;
241 if (haveExtents) {
242 model = new NoteModel
243 (modelRate, modelResolution, minValue, maxValue, false);
244 } else {
245 model = new NoteModel
246 (modelRate, modelResolution, false);
247 }
248 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());
249
250 m_output = model;
251
252 } else { 309 } else {
310
311 // Anything that is not a 1D, note, or interval model and that
312 // has a fixed sample rate and more than one value per result
313 // must be a dense 3D model.
253 314
254 EditableDenseThreeDimensionalModel *model = 315 EditableDenseThreeDimensionalModel *model =
255 new EditableDenseThreeDimensionalModel 316 new EditableDenseThreeDimensionalModel
256 (modelRate, modelResolution, binCount, false); 317 (modelRate, modelResolution, binCount, false);
257 318
539 } else { 600 } else {
540 frame = m_output->getEndFrame(); 601 frame = m_output->getEndFrame();
541 } 602 }
542 } 603 }
543 604
544 if (binCount == 0) { 605 // Rather than repeat the complicated tests from the constructor
545 606 // to determine what sort of model we must be adding the features
546 SparseOneDimensionalModel *model = 607 // to, we instead test what sort of model the constructor decided
608 // to create.
609
610 if (isOutput<SparseOneDimensionalModel>()) {
611
612 SparseOneDimensionalModel *model =
547 getConformingOutput<SparseOneDimensionalModel>(); 613 getConformingOutput<SparseOneDimensionalModel>();
548 if (!model) return; 614 if (!model) return;
549 615
550 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); 616 model->addPoint(SparseOneDimensionalModel::Point
617 (frame, feature.label.c_str()));
551 618
552 } else if (binCount == 1) { 619 } else if (isOutput<SparseTimeValueModel>()) {
553 620
554 float value = 0.0; 621 float value = 0.0;
555 if (feature.values.size() > 0) value = feature.values[0]; 622 if (feature.values.size() > 0) value = feature.values[0];
556 623
557 SparseTimeValueModel *model = 624 SparseTimeValueModel *model =
558 getConformingOutput<SparseTimeValueModel>(); 625 getConformingOutput<SparseTimeValueModel>();
559 if (!model) return; 626 if (!model) return;
560 627
561 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); 628 model->addPoint(SparseTimeValueModel::Point
562 // std::cerr << "SparseTimeValueModel::addPoint(" << frame << ", " << value << "), " << feature.label.c_str() << std::endl; 629 (frame, value, feature.label.c_str()));
563 630
564 } else if (m_descriptor->sampleType == 631 } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) {
565 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { 632
566 633 int index = 0;
567 float pitch = 0.0; 634
568 if (feature.values.size() > 0) pitch = feature.values[0]; 635 float value = 0.0;
636 if (feature.values.size() > index) {
637 value = feature.values[index++];
638 }
569 639
570 float duration = 1; 640 float duration = 1;
571 if (feature.values.size() > 1) duration = feature.values[1]; 641 if (feature.hasDuration) {
642 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
643 } else {
644 if (feature.values.size() > index) {
645 duration = feature.values[index++];
646 }
647 }
572 648
573 float velocity = 100; 649 if (isOutput<NoteModel>()) {
574 if (feature.values.size() > 2) velocity = feature.values[2]; 650
575 if (velocity < 0) velocity = 127; 651 float velocity = 100;
576 if (velocity > 127) velocity = 127; 652 if (feature.values.size() > index) {
577 653 velocity = feature.values[index++];
578 NoteModel *model = getConformingOutput<NoteModel>(); 654 }
579 if (!model) return; 655 if (velocity < 0) velocity = 127;
580 656 if (velocity > 127) velocity = 127;
581 model->addPoint(NoteModel::Point(frame, pitch, 657
582 lrintf(duration), 658 NoteModel *model = getConformingOutput<NoteModel>();
583 velocity / 127.f, 659 if (!model) return;
584 feature.label.c_str())); 660 model->addPoint(NoteModel::Point(frame, value, // value is pitch
661 lrintf(duration),
662 velocity / 127.f,
663 feature.label.c_str()));
664 } else {
665 RegionModel *model = getConformingOutput<RegionModel>();
666 if (model) {
667 model->addPoint(RegionModel::Point(frame, value,
668 lrintf(duration),
669 feature.label.c_str()));
670 } else return;
671 }
585 672
586 } else { 673 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
587 674
588 DenseThreeDimensionalModel::Column values = feature.values; 675 DenseThreeDimensionalModel::Column values = feature.values;
589 676
590 EditableDenseThreeDimensionalModel *model = 677 EditableDenseThreeDimensionalModel *model =
591 getConformingOutput<EditableDenseThreeDimensionalModel>(); 678 getConformingOutput<EditableDenseThreeDimensionalModel>();
592 if (!model) return; 679 if (!model) return;
593 680
594 model->setColumn(frame / model->getResolution(), values); 681 model->setColumn(frame / model->getResolution(), values);
682
683 } else {
684 std::cerr << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << std::endl;
595 } 685 }
596 } 686 }
597 687
598 void 688 void
599 FeatureExtractionModelTransformer::setCompletion(int completion) 689 FeatureExtractionModelTransformer::setCompletion(int completion)
604 } 694 }
605 695
606 // std::cerr << "FeatureExtractionModelTransformer::setCompletion(" 696 // std::cerr << "FeatureExtractionModelTransformer::setCompletion("
607 // << completion << ")" << std::endl; 697 // << completion << ")" << std::endl;
608 698
609 if (binCount == 0) { 699 if (isOutput<SparseOneDimensionalModel>()) {
610 700
611 SparseOneDimensionalModel *model = 701 SparseOneDimensionalModel *model =
612 getConformingOutput<SparseOneDimensionalModel>(); 702 getConformingOutput<SparseOneDimensionalModel>();
613 if (!model) return; 703 if (!model) return;
614 model->setCompletion(completion, true); //!!!m_context.updates); 704 model->setCompletion(completion, true);
615 705
616 } else if (binCount == 1) { 706 } else if (isOutput<SparseTimeValueModel>()) {
617 707
618 SparseTimeValueModel *model = 708 SparseTimeValueModel *model =
619 getConformingOutput<SparseTimeValueModel>(); 709 getConformingOutput<SparseTimeValueModel>();
620 if (!model) return; 710 if (!model) return;
621 model->setCompletion(completion, true); //!!!m_context.updates); 711 model->setCompletion(completion, true);
622 712
623 } else if (m_descriptor->sampleType == 713 } else if (isOutput<NoteModel>()) {
624 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { 714
625 715 NoteModel *model = getConformingOutput<NoteModel>();
626 NoteModel *model =
627 getConformingOutput<NoteModel>();
628 if (!model) return; 716 if (!model) return;
629 model->setCompletion(completion, true); //!!!m_context.updates); 717 model->setCompletion(completion, true);
630 718
631 } else { 719 } else if (isOutput<RegionModel>()) {
720
721 RegionModel *model = getConformingOutput<RegionModel>();
722 if (!model) return;
723 model->setCompletion(completion, true);
724
725 } else if (isOutput<EditableDenseThreeDimensionalModel>()) {
632 726
633 EditableDenseThreeDimensionalModel *model = 727 EditableDenseThreeDimensionalModel *model =
634 getConformingOutput<EditableDenseThreeDimensionalModel>(); 728 getConformingOutput<EditableDenseThreeDimensionalModel>();
635 if (!model) return; 729 if (!model) return;
636 model->setCompletion(completion, true); //!!!m_context.updates); 730 model->setCompletion(completion, true); //!!!m_context.updates);