changeset 197:2b142542990b

More docs
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 10 Feb 2017 10:56:24 +0000
parents 9a36c8850b1b
children ab7f270ae453
files capnp/piper.capnp
diffstat 1 files changed, 44 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/capnp/piper.capnp	Thu Feb 09 19:00:11 2017 +0000
+++ b/capnp/piper.capnp	Fri Feb 10 10:56:24 2017 +0000
@@ -1,3 +1,11 @@
+
+# Piper audio feature extraction: schema for low-level operation
+#
+# This file is formatted to 130 characters width, in order to fit the
+# comments next to the schema definitions.
+#
+# Copyright (c) 2015-2017 Queen Mary, University of London, provided
+# under a BSD-style licence. See the file COPYING for details.
 
 @0xc4b1c6c44c999206;
 
@@ -91,7 +99,7 @@
     minChannelCount    @6  :Int32;                # Minimum number of input channels of audio this extractor can accept.
     maxChannelCount    @7  :Int32;                # Maximum number of input channels of audio this extractor can accept.
     parameters         @8  :List(ParameterDescriptor);    # List of configurable parameter properties for the feature extractor.
-    programs           @9  :List(Text);           # ??? review
+    programs           @9  :List(Text);           # List of predefined programs. For backward-compatibility, not recommended.
     inputDomain        @10 :InputDomain;          # Whether the extractor requires time-domain or frequency-domain input audio.
     basicOutputInfo    @11 :List(Basic);          # Basic metadata about all of the outputs of the extractor.
 }
@@ -107,14 +115,14 @@
 struct ProcessInput {
     # Audio and timing input data provided to a process request.
 
-    inputBuffers       @0  :List(List(Float32));  # For each input channel, a single block of audio data (time or frequency domain).
-    timestamp          @1  :RealTime;             # Time of start of input block (time-domain) or "centre" of it (frequency-domain).
+    inputBuffers       @0  :List(List(Float32));  # A single block of audio data (time or frequency domain) for each channel.
+    timestamp          @1  :RealTime;             # Time of start of block (time-domain) or "centre" of it (frequency-domain).
 }
 
 struct Feature {
     # A single feature calculated and returned from a process or finish request.
 
-    hasTimestamp       @0  :Bool        = false;  # True if feature has a timestamp. Must be true if on a variableSampleRate output.
+    hasTimestamp       @0  :Bool        = false;  # True if feature has a timestamp. Must be true for a variableSampleRate output.
     timestamp          @1  :RealTime;             # Timestamp of feature, if hasTimestamp.
     hasDuration        @2  :Bool        = false;  # True if feature has a duration. Must be true if output's hasDuration is true.
     duration           @3  :RealTime;             # Duration of feature, if hasDuration.
@@ -123,27 +131,48 @@
 }
 
 struct FeatureSet {
+    # The set of all features, across all outputs, calculated and returned from
+    # a single process or finish request.
+
     struct FSPair {
-        output         @0  :Text;
-        features       @1  :List(Feature) = [];
+        # A mapping between output identifier and ordered list of features for
+	# that output.
+	
+        output         @0  :Text;                 # Output id, matching the output's descriptor's basic identifier.
+        features       @1  :List(Feature) = [];   # Features calculated for that output during the current request, in time order.
     }
-    featurePairs       @0  :List(FSPair);
+    
+    featurePairs       @0  :List(FSPair);         # The feature lists for all outputs for which any features have been calculated.
 }
 
 struct Framing {
-    stepSize           @0  :Int32;
-    blockSize          @1  :Int32;
+    # Determines how audio should be split up into frames (blocks, chunks,
+    # buffers, etc) for input. If the feature extractor accepts frequency-domain
+    # input, then this framing applies prior to the STFT transform.
+    #
+    # These values are sometimes mandatory, but in other contexts one or both may
+    # be set to zero to mean "don't care". See documentation for structures that
+    # include a framing field for details.
+    
+    blockSize          @0  :Int32;                # Number of time-domain audio samples per frame (on each channel).
+    stepSize           @1  :Int32;                # Number of samples to advance between frames: equals blockSize for no overlap.
 }
 
 struct Configuration {
+    # Bundle of parameter values and other configuration data for a feature-
+    # extraction procedure.
+
     struct PVPair {
-        parameter      @0  :Text;
-        value          @1  :Float32;
+        # A mapping between parameter identifier and value.
+	
+        parameter      @0  :Text;                 # Parameter id, matching the parameter's descriptor's basic identifier.
+        value          @1  :Float32;              # Value to set parameter to (within constraints given in parameter descriptor).
     }
-    parameterValues    @0  :List(PVPair);
-    currentProgram     @1  :Text;
-    channelCount       @2  :Int32;
-    framing            @3  :Framing;
+    
+    parameterValues    @0  :List(PVPair);         # Values for all parameters, or at least any that are to change from defaults.
+    currentProgram     @1  :Text;                 # Selection of predefined program. For backward-compatibility, not recommended. 
+    channelCount       @2  :Int32;                # Number of audio channels of input.
+    framing            @3  :Framing;              # Step and block size for framing the input.
 }
 
 enum AdapterFlag {