annotate capnp/piper.capnp @ 199:01700726e02e

Copyright -> rights, and document
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 10 Feb 2017 17:12:48 +0000
parents ab7f270ae453
children 4635c5c2c4fc
rev   line source
cannam@197 1
cannam@197 2 # Piper audio feature extraction: schema for low-level operation
cannam@197 3 #
cannam@197 4 # This file is formatted to 130 characters width, in order to fit the
cannam@197 5 # comments next to the schema definitions.
cannam@197 6 #
cannam@197 7 # Copyright (c) 2015-2017 Queen Mary, University of London, provided
cannam@197 8 # under a BSD-style licence. See the file COPYING for details.
c@174 9
c@174 10 @0xc4b1c6c44c999206;
c@174 11
c@174 12 using Cxx = import "/capnp/c++.capnp";
c@174 13 $Cxx.namespace("piper");
c@174 14
c@174 15 struct Basic {
cannam@195 16 # Basic metadata common to many Piper structures.
cannam@195 17
cannam@196 18 identifier @0 :Text; # A computer-readable string. Must match the regex /^[a-zA-Z0-9_-]+$/.
cannam@196 19 name @1 :Text; # A short human-readable name or label. Must be present.
cannam@196 20 description @2 :Text; # An optional human-readable descriptive text that may accompany the name.
c@174 21 }
c@174 22
c@174 23 struct ParameterDescriptor {
cannam@196 24 # Properties of an adjustable parameter. A parameter's value is just a single
cannam@195 25 # float, but the descriptor explains how to interpret and present that value.
cannam@196 26 # A Piper feature extractor has a static list of parameters. The properties of
cannam@196 27 # a given parameter never change, in contrast to output descriptors, which
cannam@196 28 # may have different properties depending on the configuration of the extractor.
cannam@195 29
cannam@196 30 basic @0 :Basic; # Basic metadata about the parameter.
cannam@196 31 unit @1 :Text; # Human-recognisable unit of the parameter (e.g. Hz). May be left empty.
cannam@196 32 minValue @2 :Float32 = 0.0; # Minimum value. Must be provided.
cannam@196 33 maxValue @3 :Float32 = 0.0; # Maximum value. Must be provided.
cannam@196 34 defaultValue @4 :Float32 = 0.0; # Default if the parameter is not set to anything else. Must be provided.
cannam@196 35 isQuantized @5 :Bool = false; # True if parameter values are quantized to a particular resolution.
cannam@196 36 quantizeStep @6 :Float32 = 0.0; # Quantization resolution, if isQuantized.
cannam@196 37 valueNames @7 :List(Text) = []; # Optional human-readable labels for the values, if isQuantized.
c@174 38 }
c@174 39
c@174 40 enum SampleType {
cannam@195 41 # How returned features are spaced on the input timeline.
cannam@195 42
cannam@196 43 oneSamplePerStep @0; # Each process input returns a feature aligned with that input's timestamp.
cannam@196 44 fixedSampleRate @1; # Features are equally spaced at a given sample rate.
cannam@196 45 variableSampleRate @2; # Features have their own individual timestamps.
c@174 46 }
c@174 47
c@174 48 struct ConfiguredOutputDescriptor {
cannam@196 49 # Properties of an output, that is, a single stream of features produced
cannam@196 50 # in response to process and finish requests. A feature extractor may
cannam@196 51 # have any number of outputs, and it always calculates and returns features
cannam@196 52 # from all of them when processing; this is useful in cases where more
cannam@196 53 # than one feature can be easily calculated using a single method.
cannam@196 54 # This structure contains the properties of an output that are not static,
cannam@196 55 # i.e. that may depend on the parameter values provided at configuration.
cannam@196 56
cannam@196 57 unit @0 :Text; # Human-recognisable unit of the bin values in output features. May be empty.
cannam@196 58 hasFixedBinCount @1 :Bool = false; # True if this output has an equal number of values in each returned feature.
cannam@196 59 binCount @2 :Int32 = 0; # Number of values per feature for this output, if hasFixedBinCount.
cannam@196 60 binNames @3 :List(Text) = []; # Optional human-readable labels for the value bins, if hasFixedBinCount.
cannam@196 61 hasKnownExtents @4 :Bool = false; # True if all feature values fall within the same fixed min/max range.
cannam@196 62 minValue @5 :Float32 = 0.0; # Minimum value in range for any value from this output, if hasKnownExtents.
cannam@196 63 maxValue @6 :Float32 = 0.0; # Maximum value in range for any value from this output, if hasKnownExtents.
cannam@196 64 isQuantized @7 :Bool = false; # True if feature values are quantized to a particular resolution.
cannam@196 65 quantizeStep @8 :Float32 = 0.0; # Quantization resolution, if isQuantized.
cannam@196 66 sampleType @9 :SampleType; # How returned features from this output are spaced on the input timeline.
cannam@196 67 sampleRate @10 :Float32 = 0.0; # Sample rate (features per second) if sampleType == fixedSampleRate.
cannam@196 68 hasDuration @11 :Bool = false; # True if features returned from this output will have a duration.
c@174 69 }
c@174 70
c@174 71 struct OutputDescriptor {
cannam@196 72 # All the properties of an output, both static (the basic metadata) and
cannam@196 73 # potentially dependent on configuration parameters (the configured descriptor).
cannam@196 74
cannam@196 75 basic @0 :Basic; # Basic metadata about the output.
cannam@196 76 configured @1 :ConfiguredOutputDescriptor; # Properties of the output that may depend on configuration parameters.
c@174 77 }
c@174 78
c@174 79 enum InputDomain {
cannam@196 80 # Whether a feature extractor requires time-domain audio input (i.e.
cannam@196 81 # "normal" or "unprocessed" audio samples) or frequency-domain input
cannam@196 82 # (i.e. resulting from windowed, usually overlapping, short-time
cannam@196 83 # Fourier transforms).
cannam@196 84
cannam@196 85 timeDomain @0; # The plugin requires time-domain audio samples as input.
cannam@196 86 frequencyDomain @1; # The plugin requires input to have been pre-processed using windowed STFTs.
c@174 87 }
c@174 88
c@174 89 struct ExtractorStaticData {
cannam@196 90 # Static properties of a feature extractor. That is, metadata about the
cannam@196 91 # extractor that are the same regardless of how you configure or run it.
cannam@196 92
cannam@199 93 key @0 :Text; # String that "globally" identifies the extractor, used to load it (see docs).
cannam@196 94 basic @1 :Basic; # Basic metadata about the extractor.
cannam@196 95 maker @2 :Text; # Human-readable text naming the author or vendor of the extractor.
cannam@199 96 rights @3 :Text; # Human-readable summary of copyright and/or licensing terms for the extractor.
cannam@199 97 version @4 :Int32; # Version number of extractor; must increase if new algorithm changes results.
cannam@199 98 category @5 :List(Text); # List of general->specific category labels for this extractor (see docs).
cannam@196 99 minChannelCount @6 :Int32; # Minimum number of input channels of audio this extractor can accept.
cannam@196 100 maxChannelCount @7 :Int32; # Maximum number of input channels of audio this extractor can accept.
cannam@196 101 parameters @8 :List(ParameterDescriptor); # List of configurable parameter properties for the feature extractor.
cannam@197 102 programs @9 :List(Text); # List of predefined programs. For backward-compatibility, not recommended.
cannam@196 103 inputDomain @10 :InputDomain; # Whether the extractor requires time-domain or frequency-domain input audio.
cannam@196 104 basicOutputInfo @11 :List(Basic); # Basic metadata about all of the outputs of the extractor.
c@174 105 }
c@174 106
c@174 107 struct RealTime {
cannam@196 108 # Time structure. When used as a timestamp, this is relative to "start
cannam@196 109 # of audio".
cannam@196 110
cannam@196 111 sec @0 :Int32 = 0; # Number of seconds.
cannam@196 112 nsec @1 :Int32 = 0; # Number of nanoseconds. Must have same sign as sec unless sec == 0.
c@174 113 }
c@174 114
c@174 115 struct ProcessInput {
cannam@196 116 # Audio and timing input data provided to a process request.
cannam@196 117
cannam@197 118 inputBuffers @0 :List(List(Float32)); # A single block of audio data (time or frequency domain) for each channel.
cannam@197 119 timestamp @1 :RealTime; # Time of start of block (time-domain) or "centre" of it (frequency-domain).
c@174 120 }
c@174 121
c@174 122 struct Feature {
cannam@196 123 # A single feature calculated and returned from a process or finish request.
cannam@196 124
cannam@197 125 hasTimestamp @0 :Bool = false; # True if feature has a timestamp. Must be true for a variableSampleRate output.
cannam@196 126 timestamp @1 :RealTime; # Timestamp of feature, if hasTimestamp.
cannam@196 127 hasDuration @2 :Bool = false; # True if feature has a duration. Must be true if output's hasDuration is true.
cannam@196 128 duration @3 :RealTime; # Duration of feature, if hasDuration.
cannam@196 129 label @4 :Text; # Optional human-readable text attached to feature.
cannam@196 130 featureValues @5 :List(Float32) = []; # The feature values themselves (of size binCount, if output hasFixedBinCount).
c@174 131 }
c@174 132
c@174 133 struct FeatureSet {
cannam@197 134 # The set of all features, across all outputs, calculated and returned from
cannam@197 135 # a single process or finish request.
cannam@197 136
c@174 137 struct FSPair {
cannam@197 138 # A mapping between output identifier and ordered list of features for
cannam@197 139 # that output.
cannam@197 140
cannam@197 141 output @0 :Text; # Output id, matching the output's descriptor's basic identifier.
cannam@197 142 features @1 :List(Feature) = []; # Features calculated for that output during the current request, in time order.
c@174 143 }
cannam@197 144
cannam@197 145 featurePairs @0 :List(FSPair); # The feature lists for all outputs for which any features have been calculated.
c@174 146 }
c@174 147
cannam@191 148 struct Framing {
cannam@198 149 # Determines how audio should be split up into individual buffers for input.
cannam@198 150 # If the feature extractor accepts frequency-domain input, then this
cannam@198 151 # applies prior to the STFT transform.
cannam@197 152 #
cannam@197 153 # These values are sometimes mandatory, but in other contexts one or both may
cannam@197 154 # be set to zero to mean "don't care". See documentation for structures that
cannam@197 155 # include a framing field for details.
cannam@197 156
cannam@198 157 blockSize @0 :Int32; # Number of time-domain audio samples per buffer (on each channel).
cannam@198 158 stepSize @1 :Int32; # Number of samples to advance between buffers: equals blockSize for no overlap.
cannam@191 159 }
cannam@191 160
c@174 161 struct Configuration {
cannam@197 162 # Bundle of parameter values and other configuration data for a feature-
cannam@197 163 # extraction procedure.
cannam@197 164
c@174 165 struct PVPair {
cannam@197 166 # A mapping between parameter identifier and value.
cannam@197 167
cannam@197 168 parameter @0 :Text; # Parameter id, matching the parameter's descriptor's basic identifier.
cannam@197 169 value @1 :Float32; # Value to set parameter to (within constraints given in parameter descriptor).
c@174 170 }
cannam@197 171
cannam@197 172 parameterValues @0 :List(PVPair); # Values for all parameters, or at least any that are to change from defaults.
cannam@197 173 currentProgram @1 :Text; # Selection of predefined program. For backward-compatibility, not recommended.
cannam@197 174 channelCount @2 :Int32; # Number of audio channels of input.
cannam@197 175 framing @3 :Framing; # Step and block size for framing the input.
c@174 176 }
c@174 177
c@174 178 enum AdapterFlag {
cannam@198 179 # Flags that may be used when requesting a server to load a feature
cannam@198 180 # extractor, to ask the server to do some of the work of framing and input
cannam@198 181 # conversion instead of leaving it to the client side. These affect the
cannam@198 182 # apparent behaviour of the loaded extractor.
cannam@198 183
cannam@198 184 adaptInputDomain @0; # Input-domain conversion, so the extractor always expects time-domain input.
cannam@198 185 adaptChannelCount @1; # Channel mixing or duplication, so any number of input channels is acceptable.
cannam@198 186 adaptBufferSize @2; # Framing, so the extractor accepts any blockSize of non-overlapping buffers.
c@174 187 }
c@174 188
c@174 189 const adaptAllSafe :List(AdapterFlag) =
cannam@198 190 [ adaptInputDomain, adaptChannelCount ];
cannam@198 191 # The set of adapter flags that can always be applied, leaving results unchanged.
c@174 192
c@174 193 const adaptAll :List(AdapterFlag) =
cannam@198 194 [ adaptInputDomain, adaptChannelCount, adaptBufferSize ];
cannam@198 195 # The set of adapter flags that may cause "equivalent" results to be returned (see documentation).
c@174 196
c@174 197 struct ListRequest {
cannam@198 198 # Request a server to provide a list of available feature extractors.
cannam@198 199
cannam@198 200 from @0 :List(Text); # If non-empty, provide only extractors found in the given list of "libraries".
c@174 201 }
c@174 202
c@174 203 struct ListResponse {
cannam@198 204 # Response to a successful list request.
cannam@198 205
cannam@198 206 available @0 :List(ExtractorStaticData); # List of static data about available feature extractors.
c@174 207 }
c@174 208
c@174 209 struct LoadRequest {
cannam@198 210 # Request a server to load a feature extractor and return a handle to it.
cannam@198 211
cannam@198 212 key @0 :Text; # Key as found in the extractor's static data structure.
cannam@198 213 inputSampleRate @1 :Float32; # Sample rate for input audio. Properties of the extractor may depend on this.
cannam@198 214 adapterFlags @2 :List(AdapterFlag); # Set of optional flags to make any framing and input conversion requests.
c@174 215 }
c@174 216
c@174 217 struct LoadResponse {
cannam@198 218 # Response to a successful load request.
cannam@198 219
cannam@198 220 handle @0 :Int32; # Handle to be used to refer to the loaded feature extractor in future requests.
cannam@198 221 staticData @1 :ExtractorStaticData; # Static data about this feature extractor, identical to that in list response.
cannam@198 222 defaultConfiguration @2 :Configuration; # Extractor's default parameter values and preferred input framing.
c@174 223 }
c@174 224
c@174 225 struct ConfigurationRequest {
cannam@198 226 # Request a server to configure a loaded feature extractor and prepare
cannam@198 227 # it for use. This request must be carried out on a feature extractor
cannam@198 228 # before any process request can be made.
cannam@198 229
cannam@198 230 handle @0 :Int32; # Handle as returned in the load response from the loading of this extractor.
cannam@198 231 configuration @1 :Configuration; # Bundle of parameter values to set, and client's preferred input framing.
c@174 232 }
c@174 233
c@174 234 struct ConfigurationResponse {
cannam@198 235 # Response to a successful configuration request.
cannam@198 236
cannam@198 237 handle @0 :Int32; # Handle of extractor, as passed in the configuration request.
cannam@198 238 outputs @1 :List(OutputDescriptor); # Full set of properties of all outputs following configuration.
cannam@198 239 framing @2 :Framing; # Input framing that must be used for subsequent process requests.
c@174 240 }
c@174 241
c@174 242 struct ProcessRequest {
cannam@198 243 # Request a server to process a buffer of audio using a loaded and
cannam@198 244 # configured feature extractor.
cannam@198 245
cannam@198 246 handle @0 :Int32; # Handle as returned in the load response from the loading of this extractor.
cannam@198 247 processInput @1 :ProcessInput; # Audio in the input domain, with framing as in the configuration response.
c@174 248 }
c@174 249
c@174 250 struct ProcessResponse {
cannam@198 251 # Response to a successful process request.
cannam@198 252
cannam@198 253 handle @0 :Int32; # Handle of extractor, as passed in the process request.
cannam@198 254 features @1 :FeatureSet; # All features across all outputs calculated during this process request.
c@174 255 }
c@174 256
c@174 257 struct FinishRequest {
cannam@198 258 # Request a server to finish processing and unload a loaded feature
cannam@198 259 # extractor. This request may be made at any time -- the extractor does
cannam@198 260 # not have to have been configured or used. The extractor handle cannot
cannam@198 261 # be used again with this server afterwards.
cannam@198 262
cannam@198 263 handle @0 :Int32; # Handle as returned in the load response from the loading of this extractor.
c@174 264 }
c@174 265
c@174 266 struct FinishResponse {
cannam@198 267 # Response to a successful finish request.
cannam@198 268
cannam@198 269 handle @0 :Int32; # Handle of extractor, as passed in the finish request. May not be used again.
cannam@198 270 features @1 :FeatureSet; # Features the extractor has calculated now that it knows all input has ended.
c@174 271 }
c@174 272
c@174 273 struct Error {
cannam@198 274 # Response to any request that fails.
cannam@198 275
cannam@198 276 code @0 :Int32; # Error code.
cannam@198 277 message @1 :Text; # Error message.
c@174 278 }
c@174 279
c@174 280 struct RpcRequest {
c@174 281 # Request bundle for use when using Cap'n Proto serialisation without
c@174 282 # Cap'n Proto RPC layer. For Cap'n Proto RPC, see piper.rpc.capnp.
cannam@198 283
c@175 284 id :union {
cannam@198 285 # Identifier used solely to associate a response packet with its
cannam@198 286 # originating request. Server does not examine the contents of this,
cannam@198 287 # it just copies the request id into the response.
cannam@198 288
c@175 289 number @0 :Int32;
c@175 290 tag @1 :Text;
c@175 291 none @2 :Void;
c@175 292 }
cannam@198 293
c@174 294 request :union {
cannam@198 295 # For more details, see the documentation for the individual
cannam@198 296 # request structures.
cannam@198 297
cannam@198 298 list @3 :ListRequest; # Provide a list of available feature extractors.
cannam@198 299 load @4 :LoadRequest; # Load a feature extractor and return a handle to it.
cannam@198 300 configure @5 :ConfigurationRequest; # Configure a loaded feature extractor, set parameters, and prepare it for use.
cannam@198 301 process @6 :ProcessRequest; # Process a single fixed-size buffer of audio and return calculated features.
cannam@198 302 finish @7 :FinishRequest; # Get any remaining features and unload the extractor.
c@174 303 }
c@174 304 }
c@174 305
c@174 306 struct RpcResponse {
c@174 307 # Response bundle for use when using Cap'n Proto serialisation without
c@174 308 # Cap'n Proto RPC layer. For Cap'n Proto RPC, see piper.rpc.capnp.
cannam@198 309
c@175 310 id :union {
cannam@198 311 # Identifier used solely to associate a response packet with its
cannam@198 312 # originating request. Server does not examine the contents of this,
cannam@198 313 # it just copies the request id into the response.
cannam@198 314
c@175 315 number @0 :Int32;
c@175 316 tag @1 :Text;
c@175 317 none @2 :Void;
c@175 318 }
cannam@198 319
c@174 320 response :union {
cannam@198 321 # For more details, see the documentation for the individual
cannam@198 322 # response structures.
cannam@198 323
cannam@198 324 error @3 :Error; # The request (of whatever type) failed.
cannam@198 325 list @4 :ListResponse; # List succeeded, here is static data about the requested extractors.
cannam@198 326 load @5 :LoadResponse; # Load succeeded, here is a handle for the loaded extractor.
cannam@198 327 configure @6 :ConfigurationResponse;# Configure succeeded, ready to process, here are values such as block size.
cannam@198 328 process @7 :ProcessResponse; # Process succeeded, here are all features calculated from this input block.
cannam@198 329 finish @8 :FinishResponse; # Finish succeeded, extractor unloaded, here are all remaining features.
c@174 330 }
c@174 331 }
c@174 332