annotate capnp/piper.capnp @ 216:72a3b8faba89 tip master

programParameters should not have a pattern enforced on its keys - a program name can be anything
author Chris Cannam <cannam@all-day-breakfast.com>
date Wed, 08 Apr 2020 14:57:24 +0100
parents 8923b382c055
children
rev   line source
cannam@197 1
cannam@197 2 # Piper audio feature extraction: schema for low-level operation
cannam@197 3 #
cannam@197 4 # This file is formatted to 130 characters width, in order to fit the
cannam@197 5 # comments next to the schema definitions.
cannam@197 6 #
cannam@197 7 # Copyright (c) 2015-2017 Queen Mary, University of London, provided
cannam@197 8 # under a BSD-style licence. See the file COPYING for details.
c@174 9
c@174 10 @0xc4b1c6c44c999206;
c@174 11
c@174 12 using Cxx = import "/capnp/c++.capnp";
c@174 13 $Cxx.namespace("piper");
c@174 14
c@174 15 struct Basic {
cannam@195 16 # Basic metadata common to many Piper structures.
cannam@195 17
cannam@196 18 identifier @0 :Text; # A computer-readable string. Must match the regex /^[a-zA-Z0-9_-]+$/.
cannam@196 19 name @1 :Text; # A short human-readable name or label. Must be present.
cannam@196 20 description @2 :Text; # An optional human-readable descriptive text that may accompany the name.
c@174 21 }
c@174 22
c@174 23 struct ParameterDescriptor {
cannam@196 24 # Properties of an adjustable parameter. A parameter's value is just a single
cannam@195 25 # float, but the descriptor explains how to interpret and present that value.
cannam@196 26 # A Piper feature extractor has a static list of parameters. The properties of
cannam@196 27 # a given parameter never change, in contrast to output descriptors, which
cannam@196 28 # may have different properties depending on the configuration of the extractor.
cannam@195 29
cannam@196 30 basic @0 :Basic; # Basic metadata about the parameter.
cannam@196 31 unit @1 :Text; # Human-recognisable unit of the parameter (e.g. Hz). May be left empty.
cannam@196 32 minValue @2 :Float32 = 0.0; # Minimum value. Must be provided.
cannam@196 33 maxValue @3 :Float32 = 0.0; # Maximum value. Must be provided.
cannam@196 34 defaultValue @4 :Float32 = 0.0; # Default if the parameter is not set to anything else. Must be provided.
cannam@196 35 isQuantized @5 :Bool = false; # True if parameter values are quantized to a particular resolution.
cannam@196 36 quantizeStep @6 :Float32 = 0.0; # Quantization resolution, if isQuantized.
cannam@196 37 valueNames @7 :List(Text) = []; # Optional human-readable labels for the values, if isQuantized.
c@174 38 }
c@174 39
c@174 40 enum SampleType {
cannam@195 41 # How returned features are spaced on the input timeline.
cannam@195 42
cannam@196 43 oneSamplePerStep @0; # Each process input returns a feature aligned with that input's timestamp.
cannam@196 44 fixedSampleRate @1; # Features are equally spaced at a given sample rate.
cannam@196 45 variableSampleRate @2; # Features have their own individual timestamps.
c@174 46 }
c@174 47
cannam@206 48 struct StaticOutputDescriptor {
cannam@206 49
cannam@206 50 # Properties of an output, that is, a single stream of features
cannam@206 51 # produced in response to process and finish requests. A feature
cannam@206 52 # extractor may have any number of outputs, and it always
cannam@206 53 # calculates and returns features from all of them when
cannam@206 54 # processing; this is useful in cases where more than one feature
cannam@206 55 # can be easily calculated using a single method.
cannam@206 56 #
cannam@206 57 # This structure contains the properties of an output that are
cannam@206 58 # static, i.e. that do not depend on the parameter values provided
cannam@206 59 # at configuration, excluding the Basic struct parameters like id
cannam@206 60 # and description. The Basic struct properties are not included
cannam@206 61 # for historical reasons: they were already referenced separately
cannam@206 62 # in the OutputDescriptor and ExtractorStaticData before this
cannam@206 63 # struct was introduced.
cannam@206 64
cannam@206 65 typeURI @0 :Text; # URI indicating the sort of feature that this output returns (see docs).
cannam@206 66 }
cannam@206 67
c@174 68 struct ConfiguredOutputDescriptor {
cannam@196 69 # Properties of an output, that is, a single stream of features produced
cannam@196 70 # in response to process and finish requests. A feature extractor may
cannam@196 71 # have any number of outputs, and it always calculates and returns features
cannam@196 72 # from all of them when processing; this is useful in cases where more
cannam@196 73 # than one feature can be easily calculated using a single method.
cannam@196 74 # This structure contains the properties of an output that are not static,
cannam@196 75 # i.e. that may depend on the parameter values provided at configuration.
cannam@196 76
cannam@196 77 unit @0 :Text; # Human-recognisable unit of the bin values in output features. May be empty.
cannam@196 78 hasFixedBinCount @1 :Bool = false; # True if this output has an equal number of values in each returned feature.
cannam@196 79 binCount @2 :Int32 = 0; # Number of values per feature for this output, if hasFixedBinCount.
cannam@196 80 binNames @3 :List(Text) = []; # Optional human-readable labels for the value bins, if hasFixedBinCount.
cannam@196 81 hasKnownExtents @4 :Bool = false; # True if all feature values fall within the same fixed min/max range.
cannam@196 82 minValue @5 :Float32 = 0.0; # Minimum value in range for any value from this output, if hasKnownExtents.
cannam@196 83 maxValue @6 :Float32 = 0.0; # Maximum value in range for any value from this output, if hasKnownExtents.
cannam@196 84 isQuantized @7 :Bool = false; # True if feature values are quantized to a particular resolution.
cannam@196 85 quantizeStep @8 :Float32 = 0.0; # Quantization resolution, if isQuantized.
cannam@196 86 sampleType @9 :SampleType; # How returned features from this output are spaced on the input timeline.
cannam@196 87 sampleRate @10 :Float32 = 0.0; # Sample rate (features per second) if sampleType == fixedSampleRate.
cannam@196 88 hasDuration @11 :Bool = false; # True if features returned from this output will have a duration.
c@174 89 }
c@174 90
c@174 91 struct OutputDescriptor {
cannam@206 92 # All the properties of an output, both static (the basic metadata and static
cannam@206 93 # descriptor) and potentially dependent on configuration parameters (the
cannam@206 94 # configured descriptor).
cannam@196 95
cannam@196 96 basic @0 :Basic; # Basic metadata about the output.
cannam@196 97 configured @1 :ConfiguredOutputDescriptor; # Properties of the output that may depend on configuration parameters.
cannam@206 98 static @2 :StaticOutputDescriptor; # Properties (other than Basic) that do not depend on parameters.
c@174 99 }
c@174 100
c@174 101 enum InputDomain {
cannam@196 102 # Whether a feature extractor requires time-domain audio input (i.e.
cannam@196 103 # "normal" or "unprocessed" audio samples) or frequency-domain input
cannam@196 104 # (i.e. resulting from windowed, usually overlapping, short-time
cannam@196 105 # Fourier transforms).
cannam@196 106
cannam@196 107 timeDomain @0; # The plugin requires time-domain audio samples as input.
cannam@196 108 frequencyDomain @1; # The plugin requires input to have been pre-processed using windowed STFTs.
c@174 109 }
c@174 110
c@174 111 struct ExtractorStaticData {
cannam@196 112 # Static properties of a feature extractor. That is, metadata about the
cannam@196 113 # extractor that are the same regardless of how you configure or run it.
cannam@196 114
cannam@199 115 key @0 :Text; # String that "globally" identifies the extractor, used to load it (see docs).
cannam@196 116 basic @1 :Basic; # Basic metadata about the extractor.
cannam@196 117 maker @2 :Text; # Human-readable text naming the author or vendor of the extractor.
cannam@199 118 rights @3 :Text; # Human-readable summary of copyright and/or licensing terms for the extractor.
cannam@199 119 version @4 :Int32; # Version number of extractor; must increase if new algorithm changes results.
cannam@199 120 category @5 :List(Text); # List of general->specific category labels for this extractor (see docs).
cannam@196 121 minChannelCount @6 :Int32; # Minimum number of input channels of audio this extractor can accept.
cannam@196 122 maxChannelCount @7 :Int32; # Maximum number of input channels of audio this extractor can accept.
cannam@196 123 parameters @8 :List(ParameterDescriptor); # List of configurable parameter properties for the feature extractor.
cannam@215 124 programs @9 :List(Text); # List of predefined programs.
cannam@196 125 inputDomain @10 :InputDomain; # Whether the extractor requires time-domain or frequency-domain input audio.
cannam@196 126 basicOutputInfo @11 :List(Basic); # Basic metadata about all of the outputs of the extractor.
cannam@206 127
cannam@206 128 struct SOPair {
cannam@206 129 # A mapping between output identifier and static descriptor for
cannam@206 130 # that output.
cannam@206 131
cannam@206 132 output @0 :Text; # Output id, matching the output's descriptor's basic identifier.
cannam@206 133 static @1 :StaticOutputDescriptor;
cannam@206 134 }
cannam@206 135
cannam@206 136 staticOutputInfo @12 :List(SOPair); # Static descriptors for all outputs that have any static metadata.
c@174 137 }
c@174 138
c@174 139 struct RealTime {
cannam@196 140 # Time structure. When used as a timestamp, this is relative to "start
cannam@196 141 # of audio".
cannam@196 142
cannam@196 143 sec @0 :Int32 = 0; # Number of seconds.
cannam@196 144 nsec @1 :Int32 = 0; # Number of nanoseconds. Must have same sign as sec unless sec == 0.
c@174 145 }
c@174 146
c@174 147 struct ProcessInput {
cannam@196 148 # Audio and timing input data provided to a process request.
cannam@196 149
cannam@197 150 inputBuffers @0 :List(List(Float32)); # A single block of audio data (time or frequency domain) for each channel.
cannam@197 151 timestamp @1 :RealTime; # Time of start of block (time-domain) or "centre" of it (frequency-domain).
c@174 152 }
c@174 153
c@174 154 struct Feature {
cannam@196 155 # A single feature calculated and returned from a process or finish request.
cannam@196 156
cannam@197 157 hasTimestamp @0 :Bool = false; # True if feature has a timestamp. Must be true for a variableSampleRate output.
cannam@196 158 timestamp @1 :RealTime; # Timestamp of feature, if hasTimestamp.
cannam@196 159 hasDuration @2 :Bool = false; # True if feature has a duration. Must be true if output's hasDuration is true.
cannam@196 160 duration @3 :RealTime; # Duration of feature, if hasDuration.
cannam@196 161 label @4 :Text; # Optional human-readable text attached to feature.
cannam@196 162 featureValues @5 :List(Float32) = []; # The feature values themselves (of size binCount, if output hasFixedBinCount).
c@174 163 }
c@174 164
c@174 165 struct FeatureSet {
cannam@197 166 # The set of all features, across all outputs, calculated and returned from
cannam@197 167 # a single process or finish request.
cannam@197 168
c@174 169 struct FSPair {
cannam@197 170 # A mapping between output identifier and ordered list of features for
cannam@197 171 # that output.
cannam@197 172
cannam@197 173 output @0 :Text; # Output id, matching the output's descriptor's basic identifier.
cannam@197 174 features @1 :List(Feature) = []; # Features calculated for that output during the current request, in time order.
c@174 175 }
cannam@197 176
cannam@197 177 featurePairs @0 :List(FSPair); # The feature lists for all outputs for which any features have been calculated.
c@174 178 }
c@174 179
cannam@191 180 struct Framing {
cannam@198 181 # Determines how audio should be split up into individual buffers for input.
cannam@198 182 # If the feature extractor accepts frequency-domain input, then this
cannam@198 183 # applies prior to the STFT transform.
cannam@197 184 #
cannam@197 185 # These values are sometimes mandatory, but in other contexts one or both may
cannam@197 186 # be set to zero to mean "don't care". See documentation for structures that
cannam@197 187 # include a framing field for details.
cannam@197 188
cannam@198 189 blockSize @0 :Int32; # Number of time-domain audio samples per buffer (on each channel).
cannam@198 190 stepSize @1 :Int32; # Number of samples to advance between buffers: equals blockSize for no overlap.
cannam@191 191 }
cannam@191 192
c@174 193 struct Configuration {
cannam@197 194 # Bundle of parameter values and other configuration data for a feature-
cannam@197 195 # extraction procedure.
cannam@197 196
c@174 197 struct PVPair {
cannam@197 198 # A mapping between parameter identifier and value.
cannam@197 199
cannam@197 200 parameter @0 :Text; # Parameter id, matching the parameter's descriptor's basic identifier.
cannam@197 201 value @1 :Float32; # Value to set parameter to (within constraints given in parameter descriptor).
c@174 202 }
cannam@197 203
cannam@197 204 parameterValues @0 :List(PVPair); # Values for all parameters, or at least any that are to change from defaults.
cannam@197 205 currentProgram @1 :Text; # Selection of predefined program. For backward-compatibility, not recommended.
cannam@197 206 channelCount @2 :Int32; # Number of audio channels of input.
cannam@197 207 framing @3 :Framing; # Step and block size for framing the input.
c@174 208 }
c@174 209
c@174 210 enum AdapterFlag {
cannam@198 211 # Flags that may be used when requesting a server to load a feature
cannam@198 212 # extractor, to ask the server to do some of the work of framing and input
cannam@198 213 # conversion instead of leaving it to the client side. These affect the
cannam@198 214 # apparent behaviour of the loaded extractor.
cannam@198 215
cannam@198 216 adaptInputDomain @0; # Input-domain conversion, so the extractor always expects time-domain input.
cannam@198 217 adaptChannelCount @1; # Channel mixing or duplication, so any number of input channels is acceptable.
cannam@198 218 adaptBufferSize @2; # Framing, so the extractor accepts any blockSize of non-overlapping buffers.
c@174 219 }
c@174 220
c@174 221 const adaptAllSafe :List(AdapterFlag) =
cannam@198 222 [ adaptInputDomain, adaptChannelCount ];
cannam@198 223 # The set of adapter flags that can always be applied, leaving results unchanged.
c@174 224
c@174 225 const adaptAll :List(AdapterFlag) =
cannam@198 226 [ adaptInputDomain, adaptChannelCount, adaptBufferSize ];
cannam@198 227 # The set of adapter flags that may cause "equivalent" results to be returned (see documentation).
c@174 228
c@174 229 struct ListRequest {
cannam@198 230 # Request a server to provide a list of available feature extractors.
cannam@198 231
cannam@198 232 from @0 :List(Text); # If non-empty, provide only extractors found in the given list of "libraries".
c@174 233 }
c@174 234
c@174 235 struct ListResponse {
cannam@198 236 # Response to a successful list request.
cannam@198 237
cannam@198 238 available @0 :List(ExtractorStaticData); # List of static data about available feature extractors.
c@174 239 }
c@174 240
c@174 241 struct LoadRequest {
cannam@198 242 # Request a server to load a feature extractor and return a handle to it.
cannam@198 243
cannam@198 244 key @0 :Text; # Key as found in the extractor's static data structure.
cannam@198 245 inputSampleRate @1 :Float32; # Sample rate for input audio. Properties of the extractor may depend on this.
cannam@198 246 adapterFlags @2 :List(AdapterFlag); # Set of optional flags to make any framing and input conversion requests.
c@174 247 }
c@174 248
c@174 249 struct LoadResponse {
cannam@198 250 # Response to a successful load request.
cannam@198 251
cannam@198 252 handle @0 :Int32; # Handle to be used to refer to the loaded feature extractor in future requests.
cannam@198 253 staticData @1 :ExtractorStaticData; # Static data about this feature extractor, identical to that in list response.
cannam@198 254 defaultConfiguration @2 :Configuration; # Extractor's default parameter values and preferred input framing.
cannam@215 255
cannam@215 256 struct PPPair {
cannam@215 257 # A mapping between program name and parameter values for that program.
cannam@215 258
cannam@215 259 program @0 :Text; # Program name, one of those listed in the static data.
cannam@215 260 parameters @1 :List(Configuration.PVPair);
cannam@215 261 # Parameter values for all parameters changed from defaults by that program setting.
cannam@215 262 }
cannam@215 263
cannam@215 264 programParameters @3 :List(PPPair);
c@174 265 }
c@174 266
c@174 267 struct ConfigurationRequest {
cannam@198 268 # Request a server to configure a loaded feature extractor and prepare
cannam@198 269 # it for use. This request must be carried out on a feature extractor
cannam@198 270 # before any process request can be made.
cannam@198 271
cannam@198 272 handle @0 :Int32; # Handle as returned in the load response from the loading of this extractor.
cannam@198 273 configuration @1 :Configuration; # Bundle of parameter values to set, and client's preferred input framing.
c@174 274 }
c@174 275
c@174 276 struct ConfigurationResponse {
cannam@198 277 # Response to a successful configuration request.
cannam@198 278
cannam@198 279 handle @0 :Int32; # Handle of extractor, as passed in the configuration request.
cannam@198 280 outputs @1 :List(OutputDescriptor); # Full set of properties of all outputs following configuration.
cannam@198 281 framing @2 :Framing; # Input framing that must be used for subsequent process requests.
c@174 282 }
c@174 283
c@174 284 struct ProcessRequest {
cannam@198 285 # Request a server to process a buffer of audio using a loaded and
cannam@198 286 # configured feature extractor.
cannam@198 287
cannam@198 288 handle @0 :Int32; # Handle as returned in the load response from the loading of this extractor.
cannam@198 289 processInput @1 :ProcessInput; # Audio in the input domain, with framing as in the configuration response.
c@174 290 }
c@174 291
c@174 292 struct ProcessResponse {
cannam@198 293 # Response to a successful process request.
cannam@198 294
cannam@198 295 handle @0 :Int32; # Handle of extractor, as passed in the process request.
cannam@198 296 features @1 :FeatureSet; # All features across all outputs calculated during this process request.
c@174 297 }
c@174 298
c@174 299 struct FinishRequest {
cannam@198 300 # Request a server to finish processing and unload a loaded feature
cannam@198 301 # extractor. This request may be made at any time -- the extractor does
cannam@198 302 # not have to have been configured or used. The extractor handle cannot
cannam@198 303 # be used again with this server afterwards.
cannam@198 304
cannam@198 305 handle @0 :Int32; # Handle as returned in the load response from the loading of this extractor.
c@174 306 }
c@174 307
c@174 308 struct FinishResponse {
cannam@198 309 # Response to a successful finish request.
cannam@198 310
cannam@198 311 handle @0 :Int32; # Handle of extractor, as passed in the finish request. May not be used again.
cannam@198 312 features @1 :FeatureSet; # Features the extractor has calculated now that it knows all input has ended.
c@174 313 }
c@174 314
c@174 315 struct Error {
cannam@198 316 # Response to any request that fails.
cannam@198 317
cannam@198 318 code @0 :Int32; # Error code.
cannam@198 319 message @1 :Text; # Error message.
c@174 320 }
c@174 321
c@174 322 struct RpcRequest {
c@174 323 # Request bundle for use when using Cap'n Proto serialisation without
c@174 324 # Cap'n Proto RPC layer. For Cap'n Proto RPC, see piper.rpc.capnp.
cannam@198 325
c@175 326 id :union {
cannam@198 327 # Identifier used solely to associate a response packet with its
cannam@198 328 # originating request. Server does not examine the contents of this,
cannam@200 329 # it just copies the request id structure into the response.
cannam@198 330
c@175 331 number @0 :Int32;
c@175 332 tag @1 :Text;
c@175 333 none @2 :Void;
c@175 334 }
cannam@198 335
c@174 336 request :union {
cannam@198 337 # For more details, see the documentation for the individual
cannam@198 338 # request structures.
cannam@198 339
cannam@198 340 list @3 :ListRequest; # Provide a list of available feature extractors.
cannam@198 341 load @4 :LoadRequest; # Load a feature extractor and return a handle to it.
cannam@198 342 configure @5 :ConfigurationRequest; # Configure a loaded feature extractor, set parameters, and prepare it for use.
cannam@198 343 process @6 :ProcessRequest; # Process a single fixed-size buffer of audio and return calculated features.
cannam@198 344 finish @7 :FinishRequest; # Get any remaining features and unload the extractor.
c@174 345 }
c@174 346 }
c@174 347
c@174 348 struct RpcResponse {
c@174 349 # Response bundle for use when using Cap'n Proto serialisation without
c@174 350 # Cap'n Proto RPC layer. For Cap'n Proto RPC, see piper.rpc.capnp.
cannam@198 351
c@175 352 id :union {
cannam@198 353 # Identifier used solely to associate a response packet with its
cannam@198 354 # originating request. Server does not examine the contents of this,
cannam@200 355 # it just copies the request id structure into the response.
cannam@198 356
c@175 357 number @0 :Int32;
c@175 358 tag @1 :Text;
c@175 359 none @2 :Void;
c@175 360 }
cannam@198 361
c@174 362 response :union {
cannam@198 363 # For more details, see the documentation for the individual
cannam@198 364 # response structures.
cannam@198 365
cannam@198 366 error @3 :Error; # The request (of whatever type) failed.
cannam@200 367 list @4 :ListResponse; # List succeeded: here is static data about the requested extractors.
cannam@200 368 load @5 :LoadResponse; # Load succeeded: here is a handle for the loaded extractor.
cannam@200 369 configure @6 :ConfigurationResponse;# Configure succeeded: ready to process, here are values such as block size.
cannam@200 370 process @7 :ProcessResponse; # Process succeeded: here are all features calculated from this input block.
cannam@200 371 finish @8 :FinishResponse; # Finish succeeded: extractor unloaded, here are all remaining features.
c@174 372 }
c@174 373 }
c@174 374