Mercurial > hg > svcore
comparison transform/FeatureExtractionModelTransformer.cpp @ 441:288f45533041
* Add region model and layer; improve assignment of model types to
feature extraction transforms with duration
author | Chris Cannam |
---|---|
date | Thu, 18 Sep 2008 16:08:14 +0000 |
parents | cff476cfce77 |
children | 55cff2c6e4a0 |
comparison
equal
deleted
inserted
replaced
440:5746c559af15 | 441:288f45533041 |
---|---|
25 #include "data/model/SparseOneDimensionalModel.h" | 25 #include "data/model/SparseOneDimensionalModel.h" |
26 #include "data/model/SparseTimeValueModel.h" | 26 #include "data/model/SparseTimeValueModel.h" |
27 #include "data/model/EditableDenseThreeDimensionalModel.h" | 27 #include "data/model/EditableDenseThreeDimensionalModel.h" |
28 #include "data/model/DenseTimeValueModel.h" | 28 #include "data/model/DenseTimeValueModel.h" |
29 #include "data/model/NoteModel.h" | 29 #include "data/model/NoteModel.h" |
30 #include "data/model/RegionModel.h" | |
30 #include "data/model/FFTModel.h" | 31 #include "data/model/FFTModel.h" |
31 #include "data/model/WaveFileModel.h" | 32 #include "data/model/WaveFileModel.h" |
32 | 33 |
33 #include "TransformFactory.h" | 34 #include "TransformFactory.h" |
34 | 35 |
152 for (size_t i = 0; i < outputs.size(); ++i) { | 153 for (size_t i = 0; i < outputs.size(); ++i) { |
153 // std::cerr << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput().toStdString() << "\"" << std::endl; | 154 // std::cerr << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput().toStdString() << "\"" << std::endl; |
154 if (m_transform.getOutput() == "" || | 155 if (m_transform.getOutput() == "" || |
155 outputs[i].identifier == m_transform.getOutput().toStdString()) { | 156 outputs[i].identifier == m_transform.getOutput().toStdString()) { |
156 m_outputFeatureNo = i; | 157 m_outputFeatureNo = i; |
157 m_descriptor = new Vamp::Plugin::OutputDescriptor | 158 m_descriptor = new Vamp::Plugin::OutputDescriptor(outputs[i]); |
158 (outputs[i]); | |
159 break; | 159 break; |
160 } | 160 } |
161 } | 161 } |
162 | 162 |
163 if (!m_descriptor) { | 163 if (!m_descriptor) { |
205 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: | 205 case Vamp::Plugin::OutputDescriptor::FixedSampleRate: |
206 modelRate = size_t(m_descriptor->sampleRate + 0.001); | 206 modelRate = size_t(m_descriptor->sampleRate + 0.001); |
207 break; | 207 break; |
208 } | 208 } |
209 | 209 |
210 if (binCount == 0) { | 210 bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2); |
211 | |
212 if (binCount == 0 && | |
213 (preDurationPlugin || !m_descriptor->hasDuration)) { | |
211 | 214 |
212 m_output = new SparseOneDimensionalModel(modelRate, modelResolution, | 215 m_output = new SparseOneDimensionalModel(modelRate, modelResolution, |
213 false); | 216 false); |
214 | 217 |
215 } else if (binCount == 1) { | 218 } else if ((preDurationPlugin && binCount > 1 && |
219 (m_descriptor->sampleType == | |
220 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) || | |
221 (!preDurationPlugin && m_descriptor->hasDuration)) { | |
222 | |
223 // For plugins using the old v1 API without explicit duration, | |
224 // we treat anything that has multiple bins (i.e. that has the | |
225 // potential to have value and duration) and a variable sample | |
226 // rate as a note model, taking its values as pitch, duration | |
227 // and velocity (if present) respectively. This is the same | |
228 // behaviour as always applied by SV to these plugins in the | |
229 // past. | |
230 | |
231 // For plugins with the newer API, we treat anything with | |
232 // duration as either a note model with pitch and velocity, or | |
233 // a region model. | |
234 | |
235 // How do we know whether it's an interval or note model? | |
236 // What's the essential difference? Is a note model any | |
237 // interval model using a Hz or "MIDI pitch" scale? There | |
238 // isn't really a reliable test for "MIDI pitch"... Does a | |
239 // note model always have velocity? This is a good question | |
240 // to be addressed by accompanying RDF, but for the moment we | |
241 // will do the following... | |
242 | |
243 bool isNoteModel = false; | |
244 | |
245 // Regions have only value (and duration -- we can't extract a | |
246 // region model from an old-style plugin that doesn't support | |
247 // duration) | |
248 if (binCount > 1) isNoteModel = true; | |
249 | |
250 // Regions do not have units of Hz (a sweeping assumption!) | |
251 if (m_descriptor->unit == "Hz") isNoteModel = true; | |
252 | |
253 // If we had a "sparse 3D model", we would have the additional | |
254 // problem of determining whether to use that here (if bin | |
255 // count > 1). But we don't. | |
256 | |
257 if (isNoteModel) { | |
258 | |
259 NoteModel *model; | |
260 if (haveExtents) { | |
261 model = new NoteModel | |
262 (modelRate, modelResolution, minValue, maxValue, false); | |
263 } else { | |
264 model = new NoteModel | |
265 (modelRate, modelResolution, false); | |
266 } | |
267 model->setScaleUnits(m_descriptor->unit.c_str()); | |
268 m_output = model; | |
269 | |
270 } else { | |
271 | |
272 RegionModel *model; | |
273 if (haveExtents) { | |
274 model = new RegionModel | |
275 (modelRate, modelResolution, minValue, maxValue, false); | |
276 } else { | |
277 model = new RegionModel | |
278 (modelRate, modelResolution, false); | |
279 } | |
280 model->setScaleUnits(m_descriptor->unit.c_str()); | |
281 m_output = model; | |
282 } | |
283 | |
284 } else if (binCount == 1 || | |
285 (m_descriptor->sampleType == | |
286 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) { | |
287 | |
288 // Anything that is not a 1D, note, or interval model and that | |
289 // has only one value per result must be a sparse time value | |
290 // model. | |
291 | |
292 // Anything that is not a 1D, note, or interval model and that | |
293 // has a variable sample rate is also treated as a sparse time | |
294 // value model regardless of its bin count, because we lack a | |
295 // sparse 3D model. | |
216 | 296 |
217 SparseTimeValueModel *model; | 297 SparseTimeValueModel *model; |
218 if (haveExtents) { | 298 if (haveExtents) { |
219 model = new SparseTimeValueModel | 299 model = new SparseTimeValueModel |
220 (modelRate, modelResolution, minValue, maxValue, false); | 300 (modelRate, modelResolution, minValue, maxValue, false); |
224 } | 304 } |
225 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | 305 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); |
226 | 306 |
227 m_output = model; | 307 m_output = model; |
228 | 308 |
229 } else if (m_descriptor->sampleType == | |
230 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | |
231 | |
232 // We don't have a sparse 3D model, so interpret this as a | |
233 // note model. There's nothing to define which values to use | |
234 // as which parameters of the note -- for the moment let's | |
235 // treat the first as pitch, second as duration in frames, | |
236 // third (if present) as velocity. (Our note model doesn't | |
237 // yet store velocity.) | |
238 //!!! todo: ask the user! | |
239 | |
240 NoteModel *model; | |
241 if (haveExtents) { | |
242 model = new NoteModel | |
243 (modelRate, modelResolution, minValue, maxValue, false); | |
244 } else { | |
245 model = new NoteModel | |
246 (modelRate, modelResolution, false); | |
247 } | |
248 model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str()); | |
249 | |
250 m_output = model; | |
251 | |
252 } else { | 309 } else { |
310 | |
311 // Anything that is not a 1D, note, or interval model and that | |
312 // has a fixed sample rate and more than one value per result | |
313 // must be a dense 3D model. | |
253 | 314 |
254 EditableDenseThreeDimensionalModel *model = | 315 EditableDenseThreeDimensionalModel *model = |
255 new EditableDenseThreeDimensionalModel | 316 new EditableDenseThreeDimensionalModel |
256 (modelRate, modelResolution, binCount, false); | 317 (modelRate, modelResolution, binCount, false); |
257 | 318 |
539 } else { | 600 } else { |
540 frame = m_output->getEndFrame(); | 601 frame = m_output->getEndFrame(); |
541 } | 602 } |
542 } | 603 } |
543 | 604 |
544 if (binCount == 0) { | 605 // Rather than repeat the complicated tests from the constructor |
545 | 606 // to determine what sort of model we must be adding the features |
546 SparseOneDimensionalModel *model = | 607 // to, we instead test what sort of model the constructor decided |
608 // to create. | |
609 | |
610 if (isOutput<SparseOneDimensionalModel>()) { | |
611 | |
612 SparseOneDimensionalModel *model = | |
547 getConformingOutput<SparseOneDimensionalModel>(); | 613 getConformingOutput<SparseOneDimensionalModel>(); |
548 if (!model) return; | 614 if (!model) return; |
549 | 615 |
550 model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str())); | 616 model->addPoint(SparseOneDimensionalModel::Point |
617 (frame, feature.label.c_str())); | |
551 | 618 |
552 } else if (binCount == 1) { | 619 } else if (isOutput<SparseTimeValueModel>()) { |
553 | 620 |
554 float value = 0.0; | 621 float value = 0.0; |
555 if (feature.values.size() > 0) value = feature.values[0]; | 622 if (feature.values.size() > 0) value = feature.values[0]; |
556 | 623 |
557 SparseTimeValueModel *model = | 624 SparseTimeValueModel *model = |
558 getConformingOutput<SparseTimeValueModel>(); | 625 getConformingOutput<SparseTimeValueModel>(); |
559 if (!model) return; | 626 if (!model) return; |
560 | 627 |
561 model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str())); | 628 model->addPoint(SparseTimeValueModel::Point |
562 // std::cerr << "SparseTimeValueModel::addPoint(" << frame << ", " << value << "), " << feature.label.c_str() << std::endl; | 629 (frame, value, feature.label.c_str())); |
563 | 630 |
564 } else if (m_descriptor->sampleType == | 631 } else if (isOutput<NoteModel>() || isOutput<RegionModel>()) { |
565 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | 632 |
566 | 633 int index = 0; |
567 float pitch = 0.0; | 634 |
568 if (feature.values.size() > 0) pitch = feature.values[0]; | 635 float value = 0.0; |
636 if (feature.values.size() > index) { | |
637 value = feature.values[index++]; | |
638 } | |
569 | 639 |
570 float duration = 1; | 640 float duration = 1; |
571 if (feature.values.size() > 1) duration = feature.values[1]; | 641 if (feature.hasDuration) { |
642 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate); | |
643 } else { | |
644 if (feature.values.size() > index) { | |
645 duration = feature.values[index++]; | |
646 } | |
647 } | |
572 | 648 |
573 float velocity = 100; | 649 if (isOutput<NoteModel>()) { |
574 if (feature.values.size() > 2) velocity = feature.values[2]; | 650 |
575 if (velocity < 0) velocity = 127; | 651 float velocity = 100; |
576 if (velocity > 127) velocity = 127; | 652 if (feature.values.size() > index) { |
577 | 653 velocity = feature.values[index++]; |
578 NoteModel *model = getConformingOutput<NoteModel>(); | 654 } |
579 if (!model) return; | 655 if (velocity < 0) velocity = 127; |
580 | 656 if (velocity > 127) velocity = 127; |
581 model->addPoint(NoteModel::Point(frame, pitch, | 657 |
582 lrintf(duration), | 658 NoteModel *model = getConformingOutput<NoteModel>(); |
583 velocity / 127.f, | 659 if (!model) return; |
584 feature.label.c_str())); | 660 model->addPoint(NoteModel::Point(frame, value, // value is pitch |
661 lrintf(duration), | |
662 velocity / 127.f, | |
663 feature.label.c_str())); | |
664 } else { | |
665 RegionModel *model = getConformingOutput<RegionModel>(); | |
666 if (model) { | |
667 model->addPoint(RegionModel::Point(frame, value, | |
668 lrintf(duration), | |
669 feature.label.c_str())); | |
670 } else return; | |
671 } | |
585 | 672 |
586 } else { | 673 } else if (isOutput<EditableDenseThreeDimensionalModel>()) { |
587 | 674 |
588 DenseThreeDimensionalModel::Column values = feature.values; | 675 DenseThreeDimensionalModel::Column values = feature.values; |
589 | 676 |
590 EditableDenseThreeDimensionalModel *model = | 677 EditableDenseThreeDimensionalModel *model = |
591 getConformingOutput<EditableDenseThreeDimensionalModel>(); | 678 getConformingOutput<EditableDenseThreeDimensionalModel>(); |
592 if (!model) return; | 679 if (!model) return; |
593 | 680 |
594 model->setColumn(frame / model->getResolution(), values); | 681 model->setColumn(frame / model->getResolution(), values); |
682 | |
683 } else { | |
684 std::cerr << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << std::endl; | |
595 } | 685 } |
596 } | 686 } |
597 | 687 |
598 void | 688 void |
599 FeatureExtractionModelTransformer::setCompletion(int completion) | 689 FeatureExtractionModelTransformer::setCompletion(int completion) |
604 } | 694 } |
605 | 695 |
606 // std::cerr << "FeatureExtractionModelTransformer::setCompletion(" | 696 // std::cerr << "FeatureExtractionModelTransformer::setCompletion(" |
607 // << completion << ")" << std::endl; | 697 // << completion << ")" << std::endl; |
608 | 698 |
609 if (binCount == 0) { | 699 if (isOutput<SparseOneDimensionalModel>()) { |
610 | 700 |
611 SparseOneDimensionalModel *model = | 701 SparseOneDimensionalModel *model = |
612 getConformingOutput<SparseOneDimensionalModel>(); | 702 getConformingOutput<SparseOneDimensionalModel>(); |
613 if (!model) return; | 703 if (!model) return; |
614 model->setCompletion(completion, true); //!!!m_context.updates); | 704 model->setCompletion(completion, true); |
615 | 705 |
616 } else if (binCount == 1) { | 706 } else if (isOutput<SparseTimeValueModel>()) { |
617 | 707 |
618 SparseTimeValueModel *model = | 708 SparseTimeValueModel *model = |
619 getConformingOutput<SparseTimeValueModel>(); | 709 getConformingOutput<SparseTimeValueModel>(); |
620 if (!model) return; | 710 if (!model) return; |
621 model->setCompletion(completion, true); //!!!m_context.updates); | 711 model->setCompletion(completion, true); |
622 | 712 |
623 } else if (m_descriptor->sampleType == | 713 } else if (isOutput<NoteModel>()) { |
624 Vamp::Plugin::OutputDescriptor::VariableSampleRate) { | 714 |
625 | 715 NoteModel *model = getConformingOutput<NoteModel>(); |
626 NoteModel *model = | |
627 getConformingOutput<NoteModel>(); | |
628 if (!model) return; | 716 if (!model) return; |
629 model->setCompletion(completion, true); //!!!m_context.updates); | 717 model->setCompletion(completion, true); |
630 | 718 |
631 } else { | 719 } else if (isOutput<RegionModel>()) { |
720 | |
721 RegionModel *model = getConformingOutput<RegionModel>(); | |
722 if (!model) return; | |
723 model->setCompletion(completion, true); | |
724 | |
725 } else if (isOutput<EditableDenseThreeDimensionalModel>()) { | |
632 | 726 |
633 EditableDenseThreeDimensionalModel *model = | 727 EditableDenseThreeDimensionalModel *model = |
634 getConformingOutput<EditableDenseThreeDimensionalModel>(); | 728 getConformingOutput<EditableDenseThreeDimensionalModel>(); |
635 if (!model) return; | 729 if (!model) return; |
636 model->setCompletion(completion, true); //!!!m_context.updates); | 730 model->setCompletion(completion, true); //!!!m_context.updates); |