To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/piper-audio/piper .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Revision:

root / capnp / piper.capnp @ 208:01b5abe6f170

History | View | Annotate | Download (19.3 KB)

1

    
2
# Piper audio feature extraction: schema for low-level operation
3
#
4
# This file is formatted to 130 characters width, in order to fit the
5
# comments next to the schema definitions.
6
#
7
# Copyright (c) 2015-2017 Queen Mary, University of London, provided
8
# under a BSD-style licence. See the file COPYING for details.
9

    
10
@0xc4b1c6c44c999206;
11

    
12
using Cxx = import "/capnp/c++.capnp";
13
$Cxx.namespace("piper");
14

    
15
struct Basic {
16
    # Basic metadata common to many Piper structures.
17

    
18
    identifier         @0  :Text;                 # A computer-readable string. Must match the regex /^[a-zA-Z0-9_-]+$/.
19
    name               @1  :Text;                 # A short human-readable name or label. Must be present.
20
    description        @2  :Text;                 # An optional human-readable descriptive text that may accompany the name.
21
}
22

    
23
struct ParameterDescriptor {
24
    # Properties of an adjustable parameter. A parameter's value is just a single
25
    # float, but the descriptor explains how to interpret and present that value.
26
    # A Piper feature extractor has a static list of parameters. The properties of
27
    # a given parameter never change, in contrast to output descriptors, which
28
    # may have different properties depending on the configuration of the extractor.
29

    
30
    basic              @0  :Basic;                # Basic metadata about the parameter.
31
    unit               @1  :Text;                 # Human-recognisable unit of the parameter (e.g. Hz). May be left empty.
32
    minValue           @2  :Float32     = 0.0;    # Minimum value. Must be provided.
33
    maxValue           @3  :Float32     = 0.0;    # Maximum value. Must be provided.
34
    defaultValue       @4  :Float32     = 0.0;    # Default if the parameter is not set to anything else. Must be provided.
35
    isQuantized        @5  :Bool        = false;  # True if parameter values are quantized to a particular resolution.
36
    quantizeStep       @6  :Float32     = 0.0;    # Quantization resolution, if isQuantized.
37
    valueNames         @7  :List(Text)  = [];     # Optional human-readable labels for the values, if isQuantized.
38
}
39

    
40
enum SampleType {
41
    # How returned features are spaced on the input timeline.
42

    
43
    oneSamplePerStep   @0;                        # Each process input returns a feature aligned with that input's timestamp.
44
    fixedSampleRate    @1;                        # Features are equally spaced at a given sample rate.
45
    variableSampleRate @2;                        # Features have their own individual timestamps.
46
}
47

    
48
struct StaticOutputDescriptor {
49

    
50
    # Properties of an output, that is, a single stream of features
51
    # produced in response to process and finish requests. A feature
52
    # extractor may have any number of outputs, and it always
53
    # calculates and returns features from all of them when
54
    # processing; this is useful in cases where more than one feature
55
    # can be easily calculated using a single method.
56
    # 
57
    # This structure contains the properties of an output that are
58
    # static, i.e. that do not depend on the parameter values provided
59
    # at configuration, excluding the Basic struct parameters like id
60
    # and description.  The Basic struct properties are not included
61
    # for historical reasons: they were already referenced separately
62
    # in the OutputDescriptor and ExtractorStaticData before this
63
    # struct was introduced.
64
    
65
    typeURI            @0  :Text;                 # URI indicating the sort of feature that this output returns (see docs).
66
}
67

    
68
struct ConfiguredOutputDescriptor {
69
    # Properties of an output, that is, a single stream of features produced
70
    # in response to process and finish requests. A feature extractor may
71
    # have any number of outputs, and it always calculates and returns features
72
    # from all of them when processing; this is useful in cases where more
73
    # than one feature can be easily calculated using a single method.
74
    # This structure contains the properties of an output that are not static,
75
    # i.e. that may depend on the parameter values provided at configuration.
76

    
77
    unit               @0  :Text;                 # Human-recognisable unit of the bin values in output features. May be empty.
78
    hasFixedBinCount   @1  :Bool        = false;  # True if this output has an equal number of values in each returned feature.
79
    binCount           @2  :Int32       = 0;      # Number of values per feature for this output, if hasFixedBinCount.
80
    binNames           @3  :List(Text)  = [];     # Optional human-readable labels for the value bins, if hasFixedBinCount.
81
    hasKnownExtents    @4  :Bool        = false;  # True if all feature values fall within the same fixed min/max range.
82
    minValue           @5  :Float32     = 0.0;    # Minimum value in range for any value from this output, if hasKnownExtents.
83
    maxValue           @6  :Float32     = 0.0;    # Maximum value in range for any value from this output, if hasKnownExtents.
84
    isQuantized        @7  :Bool        = false;  # True if feature values are quantized to a particular resolution.
85
    quantizeStep       @8  :Float32     = 0.0;    # Quantization resolution, if isQuantized.
86
    sampleType         @9  :SampleType;           # How returned features from this output are spaced on the input timeline.
87
    sampleRate         @10 :Float32     = 0.0;    # Sample rate (features per second) if sampleType == fixedSampleRate.
88
    hasDuration        @11 :Bool        = false;  # True if features returned from this output will have a duration.
89
}
90

    
91
struct OutputDescriptor {
92
    # All the properties of an output, both static (the basic metadata and static
93
    # descriptor) and potentially dependent on configuration parameters (the
94
    # configured descriptor).
95

    
96
    basic              @0  :Basic;                # Basic metadata about the output.
97
    configured         @1  :ConfiguredOutputDescriptor;    # Properties of the output that may depend on configuration parameters.
98
    static             @2  :StaticOutputDescriptor;        # Properties (other than Basic) that do not depend on parameters.
99
}
100

    
101
enum InputDomain {
102
    # Whether a feature extractor requires time-domain audio input (i.e.
103
    # "normal" or "unprocessed" audio samples) or frequency-domain input
104
    # (i.e. resulting from windowed, usually overlapping, short-time
105
    # Fourier transforms).
106

    
107
    timeDomain         @0;                        # The plugin requires time-domain audio samples as input.
108
    frequencyDomain    @1;                        # The plugin requires input to have been pre-processed using windowed STFTs.
109
}
110

    
111
struct ExtractorStaticData {
112
    # Static properties of a feature extractor. That is, metadata about the
113
    # extractor that are the same regardless of how you configure or run it.
114

    
115
    key                @0  :Text;                 # String that "globally" identifies the extractor, used to load it (see docs).
116
    basic              @1  :Basic;                # Basic metadata about the extractor.
117
    maker              @2  :Text;                 # Human-readable text naming the author or vendor of the extractor.
118
    rights             @3  :Text;                 # Human-readable summary of copyright and/or licensing terms for the extractor.
119
    version            @4  :Int32;                # Version number of extractor; must increase if new algorithm changes results.
120
    category           @5  :List(Text);           # List of general->specific category labels for this extractor (see docs).
121
    minChannelCount    @6  :Int32;                # Minimum number of input channels of audio this extractor can accept.
122
    maxChannelCount    @7  :Int32;                # Maximum number of input channels of audio this extractor can accept.
123
    parameters         @8  :List(ParameterDescriptor);    # List of configurable parameter properties for the feature extractor.
124
    programs           @9  :List(Text);           # List of predefined programs. For backward-compatibility, not recommended.
125
    inputDomain        @10 :InputDomain;          # Whether the extractor requires time-domain or frequency-domain input audio.
126
    basicOutputInfo    @11 :List(Basic);          # Basic metadata about all of the outputs of the extractor.
127

    
128
    struct SOPair {
129
        # A mapping between output identifier and static descriptor for
130
	# that output.
131
	
132
        output         @0  :Text;                 # Output id, matching the output's descriptor's basic identifier.
133
        static         @1  :StaticOutputDescriptor;
134
    }
135

    
136
    staticOutputInfo   @12 :List(SOPair);         # Static descriptors for all outputs that have any static metadata.
137
}
138

    
139
struct RealTime {
140
    # Time structure. When used as a timestamp, this is relative to "start
141
    # of audio".
142
    
143
    sec                @0  :Int32       = 0;      # Number of seconds.
144
    nsec               @1  :Int32       = 0;      # Number of nanoseconds. Must have same sign as sec unless sec == 0.
145
}
146

    
147
struct ProcessInput {
148
    # Audio and timing input data provided to a process request.
149

    
150
    inputBuffers       @0  :List(List(Float32));  # A single block of audio data (time or frequency domain) for each channel.
151
    timestamp          @1  :RealTime;             # Time of start of block (time-domain) or "centre" of it (frequency-domain).
152
}
153

    
154
struct Feature {
155
    # A single feature calculated and returned from a process or finish request.
156

    
157
    hasTimestamp       @0  :Bool        = false;  # True if feature has a timestamp. Must be true for a variableSampleRate output.
158
    timestamp          @1  :RealTime;             # Timestamp of feature, if hasTimestamp.
159
    hasDuration        @2  :Bool        = false;  # True if feature has a duration. Must be true if output's hasDuration is true.
160
    duration           @3  :RealTime;             # Duration of feature, if hasDuration.
161
    label              @4  :Text;                 # Optional human-readable text attached to feature.
162
    featureValues      @5  :List(Float32) = [];   # The feature values themselves (of size binCount, if output hasFixedBinCount).
163
}
164

    
165
struct FeatureSet {
166
    # The set of all features, across all outputs, calculated and returned from
167
    # a single process or finish request.
168

    
169
    struct FSPair {
170
        # A mapping between output identifier and ordered list of features for
171
	# that output.
172
	
173
        output         @0  :Text;                 # Output id, matching the output's descriptor's basic identifier.
174
        features       @1  :List(Feature) = [];   # Features calculated for that output during the current request, in time order.
175
    }
176
    
177
    featurePairs       @0  :List(FSPair);         # The feature lists for all outputs for which any features have been calculated.
178
}
179

    
180
struct Framing {
181
    # Determines how audio should be split up into individual buffers for input.
182
    # If the feature extractor accepts frequency-domain input, then this
183
    # applies prior to the STFT transform.
184
    #
185
    # These values are sometimes mandatory, but in other contexts one or both may
186
    # be set to zero to mean "don't care". See documentation for structures that
187
    # include a framing field for details.
188
    
189
    blockSize          @0  :Int32;                # Number of time-domain audio samples per buffer (on each channel).
190
    stepSize           @1  :Int32;                # Number of samples to advance between buffers: equals blockSize for no overlap.
191
}
192

    
193
struct Configuration {
194
    # Bundle of parameter values and other configuration data for a feature-
195
    # extraction procedure.
196

    
197
    struct PVPair {
198
        # A mapping between parameter identifier and value.
199
	
200
        parameter      @0  :Text;                 # Parameter id, matching the parameter's descriptor's basic identifier.
201
        value          @1  :Float32;              # Value to set parameter to (within constraints given in parameter descriptor).
202
    }
203
    
204
    parameterValues    @0  :List(PVPair);         # Values for all parameters, or at least any that are to change from defaults.
205
    currentProgram     @1  :Text;                 # Selection of predefined program. For backward-compatibility, not recommended. 
206
    channelCount       @2  :Int32;                # Number of audio channels of input.
207
    framing            @3  :Framing;              # Step and block size for framing the input.
208
}
209

    
210
enum AdapterFlag {
211
    # Flags that may be used when requesting a server to load a feature
212
    # extractor, to ask the server to do some of the work of framing and input
213
    # conversion instead of leaving it to the client side. These affect the
214
    # apparent behaviour of the loaded extractor.
215

    
216
    adaptInputDomain   @0;                        # Input-domain conversion, so the extractor always expects time-domain input.
217
    adaptChannelCount  @1;                        # Channel mixing or duplication, so any number of input channels is acceptable. 
218
    adaptBufferSize    @2;                        # Framing, so the extractor accepts any blockSize of non-overlapping buffers.
219
}
220

    
221
const adaptAllSafe :List(AdapterFlag) =
222
    [ adaptInputDomain, adaptChannelCount ];
223
    # The set of adapter flags that can always be applied, leaving results unchanged.
224

    
225
const adaptAll :List(AdapterFlag) =
226
    [ adaptInputDomain, adaptChannelCount, adaptBufferSize ];
227
    # The set of adapter flags that may cause "equivalent" results to be returned (see documentation).
228

    
229
struct ListRequest {
230
    # Request a server to provide a list of available feature extractors.
231
    
232
    from               @0  :List(Text);           # If non-empty, provide only extractors found in the given list of "libraries".
233
}
234

    
235
struct ListResponse {
236
    # Response to a successful list request.
237
    
238
    available          @0  :List(ExtractorStaticData);    # List of static data about available feature extractors.
239
}
240

    
241
struct LoadRequest {
242
    # Request a server to load a feature extractor and return a handle to it.
243
    
244
    key                @0  :Text;                 # Key as found in the extractor's static data structure.
245
    inputSampleRate    @1  :Float32;              # Sample rate for input audio. Properties of the extractor may depend on this.
246
    adapterFlags       @2  :List(AdapterFlag);    # Set of optional flags to make any framing and input conversion requests.
247
}
248

    
249
struct LoadResponse {
250
    # Response to a successful load request.
251
    
252
    handle             @0  :Int32;                # Handle to be used to refer to the loaded feature extractor in future requests.
253
    staticData         @1  :ExtractorStaticData;  # Static data about this feature extractor, identical to that in list response.
254
    defaultConfiguration @2  :Configuration;      # Extractor's default parameter values and preferred input framing.
255
}
256

    
257
struct ConfigurationRequest {
258
    # Request a server to configure a loaded feature extractor and prepare
259
    # it for use. This request must be carried out on a feature extractor
260
    # before any process request can be made.
261
    
262
    handle             @0  :Int32;                # Handle as returned in the load response from the loading of this extractor.
263
    configuration      @1  :Configuration;        # Bundle of parameter values to set, and client's preferred input framing.
264
}
265

    
266
struct ConfigurationResponse {
267
    # Response to a successful configuration request.
268

    
269
    handle             @0  :Int32;                # Handle of extractor, as passed in the configuration request.
270
    outputs            @1  :List(OutputDescriptor);       # Full set of properties of all outputs following configuration.
271
    framing            @2  :Framing;              # Input framing that must be used for subsequent process requests.
272
}
273

    
274
struct ProcessRequest {
275
    # Request a server to process a buffer of audio using a loaded and
276
    # configured feature extractor.
277

    
278
    handle             @0  :Int32;                # Handle as returned in the load response from the loading of this extractor.
279
    processInput       @1  :ProcessInput;         # Audio in the input domain, with framing as in the configuration response.
280
}
281

    
282
struct ProcessResponse {
283
    # Response to a successful process request.
284

    
285
    handle             @0  :Int32;                # Handle of extractor, as passed in the process request.
286
    features           @1  :FeatureSet;           # All features across all outputs calculated during this process request.
287
}
288

    
289
struct FinishRequest {
290
    # Request a server to finish processing and unload a loaded feature
291
    # extractor. This request may be made at any time -- the extractor does
292
    # not have to have been configured or used. The extractor handle cannot
293
    # be used again with this server afterwards.
294

    
295
    handle             @0  :Int32;                # Handle as returned in the load response from the loading of this extractor.
296
}
297

    
298
struct FinishResponse {
299
    # Response to a successful finish request.
300

    
301
    handle             @0  :Int32;                # Handle of extractor, as passed in the finish request. May not be used again.
302
    features           @1  :FeatureSet;           # Features the extractor has calculated now that it knows all input has ended.
303
}
304

    
305
struct Error {
306
    # Response to any request that fails.
307

    
308
    code               @0  :Int32;                # Error code. 
309
    message            @1  :Text;                 # Error message.
310
}
311

    
312
struct RpcRequest {
313
    # Request bundle for use when using Cap'n Proto serialisation without
314
    # Cap'n Proto RPC layer. For Cap'n Proto RPC, see piper.rpc.capnp.
315

    
316
    id :union {
317
        # Identifier used solely to associate a response packet with its
318
	# originating request. Server does not examine the contents of this,
319
	# it just copies the request id structure into the response.
320
	
321
        number         @0  :Int32;
322
        tag            @1  :Text;
323
        none           @2  :Void;
324
    }
325
    
326
    request :union {
327
        # For more details, see the documentation for the individual
328
	# request structures.
329
	
330
	list           @3  :ListRequest;          # Provide a list of available feature extractors.
331
	load           @4  :LoadRequest;          # Load a feature extractor and return a handle to it.
332
	configure      @5  :ConfigurationRequest; # Configure a loaded feature extractor, set parameters, and prepare it for use.
333
	process        @6  :ProcessRequest;       # Process a single fixed-size buffer of audio and return calculated features.
334
	finish         @7  :FinishRequest;        # Get any remaining features and unload the extractor.
335
    }
336
}
337

    
338
struct RpcResponse {
339
    # Response bundle for use when using Cap'n Proto serialisation without
340
    # Cap'n Proto RPC layer. For Cap'n Proto RPC, see piper.rpc.capnp.
341

    
342
    id :union {
343
        # Identifier used solely to associate a response packet with its
344
	# originating request. Server does not examine the contents of this,
345
	# it just copies the request id structure into the response.
346
	
347
        number         @0  :Int32;
348
        tag            @1  :Text;
349
        none           @2  :Void;
350
    }
351

    
352
    response :union {
353
        # For more details, see the documentation for the individual
354
	# response structures.
355
	
356
        error          @3  :Error;                # The request (of whatever type) failed.
357
	list           @4  :ListResponse;         # List succeeded: here is static data about the requested extractors.
358
	load           @5  :LoadResponse;         # Load succeeded: here is a handle for the loaded extractor.
359
	configure      @6  :ConfigurationResponse;# Configure succeeded: ready to process, here are values such as block size.
360
	process        @7  :ProcessResponse;      # Process succeeded: here are all features calculated from this input block.
361
	finish         @8  :FinishResponse;       # Finish succeeded: extractor unloaded, here are all remaining features.
362
    }
363
}
364