comparison main/IMAFencoder.h @ 634:ba338234c001 imaf_enc

IMAF load code from Jesus Corral Garcia
author Chris Cannam
date Mon, 04 Nov 2013 17:15:52 +0000
parents
children
comparison
equal deleted inserted replaced
633:a8da6db5a2c9 634:ba338234c001
1 #ifndef IMAFENCODER_H
2 #define IMAFENCODER_H
3
4 //***********************************************************//
5 // Interactive Music Audio Format (IMAF) ENCODER //
6 // Version 2.0 //
7 // //
8 // Eugenio Oñate Hospital //
9 // Costantino Taglialatela & Jesus Corral Garcìa //
10 // //
11 // Copyright (c) 2013 Centre for Digital Music (C4DM) //
12 // Queen Mary University of London. All rights reserved. //
13 //***********************************************************//
14 // IM_AF Encoder.h //
15 //***********************************************************//
16
17
18 /* for FILE typedef, */
19 #include <stdio.h>
20
21 #define maxtracks 8 //change this value to support more than 8 audio tracks. This value was 6 before I changed it
22 #define maxgroups 2
23 #define maxpreset 10
24 #define maxrules 10
25 #define maxfilters 3 //Max number of Filters for an EQ preset
26 #define maxdynamic 2 //Max number of Dynamic Volume changes
27 #define num_ch 2 //Number of channel outputs (STEREO)
28
29 typedef long long u64;
30 typedef unsigned int u32;
31 typedef unsigned short u16;
32 typedef unsigned char u8;
33
34 //typedef struct nametrack { // Stores the different titles of the tracks
35 // char title[20];
36 //}nametrack[maxtracks];
37
38 typedef struct FileTypeBox
39 {
40 u32 size;
41 u32 type; // ftyp
42 u32 major_brand; // brand identifier
43 u32 minor_version; // informative integer for the mirror version
44 u32 compatible_brands[2]; //list of brands
45 }FileTypeBox;
46
47 typedef struct MovieBox //extends Box('moov')
48 {
49 u32 size;
50 u32 type; // moov
51
52 struct MovieHeaderBox
53 {
54 u32 size;
55 u32 type; // mvhd
56 u32 version; // version + flag
57 u32 creation_time;
58 u32 modification_time;
59 u32 timescale; // specifies the time-scale
60 u32 duration;
61 u32 rate; // typically 1.0
62 u16 volume; // typically full volume
63 u16 reserved; // =0
64 u32 reserved2[2]; //=0
65 u32 matrix[9]; // information matrix for video (u,v,w)
66 u32 pre_defined[6]; // =0
67 u32 next_track_ID; //non zero value for the next track ID
68 }MovieHeaderBox;
69
70 struct TrackBox
71 {
72 u32 size;
73 u32 type;
74 struct TrackHeaderBox
75 {
76 u32 size;
77 u32 type;
78 u32 version; // version + flag
79 u32 creation_time;
80 u32 modification_time;
81 u32 track_ID;
82 u32 reserved; // =0
83 u32 duration;
84 u32 reserved2[2]; // =0
85 u16 layer; // =0 // for video
86 u16 alternate_group; // =0
87 u16 volume; // full volume is 1 = 0x0100
88 u16 reserved3;// =0
89 u32 matrix[9]; // for video
90 u32 width; // video
91 u32 height; // video
92 }TrackHeaderBox;
93
94 struct MediaBox // extends Box('mdia')
95 {
96 u32 size;
97 u32 type;
98 struct MediaHeaderBox // extends FullBox('mdhd', version,0)
99 {
100 u32 size;
101 u32 type;
102 u32 version; // version + flag
103 u32 creation_time;
104 u32 modification_time;
105 u32 timescale;
106 u32 duration;
107 u16 language; // [pad,5x3] = 16 bits and pad = 0
108 u16 pre_defined; // =0
109 }MediaHeaderBox;
110 struct HandlerBox // extends FullBox('hdlr')
111 {
112 u32 size;
113 u32 type;
114 u32 version; // version = 0 + flag
115 u32 pre_defined; // =0
116 u32 handler_type; // = 'soun' for audio track, text or hint
117 u32 reserved[3]; // =0
118 unsigned char data[5]; // Does not work! only 4 bytes
119
120 }HandlerBox;
121 struct MediaInformationBox //extends Box('minf')
122 {
123 u32 size;
124 u32 type;
125 // smhd in sound track only!!
126 struct SoundMediaHeaderBox //extends FullBox('smhd')
127 {
128 u32 size;
129 u32 type;
130 u32 version;
131 u16 balance; // =0 place mono tracks in stereo. 0 is center
132 u16 reserved; // =0
133 }SoundMediaHeaderBox;
134 struct NullMediaHeaderBox //extends FullBox('nmhd')
135 {
136 u32 size;
137 u32 type;
138 u32 flags;
139 }NullMediaHeaderBox;
140 struct DataInformationBox //extends Box('dinf')
141 {
142 u32 size;
143 u32 type;
144 struct DataReferenceBox
145 {
146 u32 size;
147 u32 type;
148 u32 flags;
149 u32 entry_count; // counts the actual entries.
150 struct DataEntryUrlBox //extends FullBox('url', version=0, flags)
151 {
152 u32 size;
153 u32 type;
154 u32 flags;
155 }DataEntryUrlBox;
156 }DataReferenceBox;
157 }DataInformationBox;
158 struct SampleTableBox // extends Box('stbl')
159 {
160 u32 size;
161 u32 type;
162 struct TimeToSampleBox{
163 u32 size;
164 u32 type;
165 u32 version;
166 u32 entry_count;
167 u32 sample_count[3000];
168 u32 sample_delta[3000];
169 }TimeToSampleBox;
170 struct SampleDescriptionBox // stsd
171 {
172 u32 size;
173 u32 type;
174 u32 version;
175 u32 entry_count; // = 1 number of entries
176 // unsigned char esds[88];
177 struct TextSampleEntry{
178 u32 size;
179 u32 type; //tx3g
180 u32 a;
181 u32 b;
182 u32 displayFlags;
183 u8 horizontaljustification;
184 u8 verticaljustification;
185 u8 backgroundcolorrgba[4];
186 u16 top;
187 u16 left;
188 u16 bottom;
189 u16 right;
190 //StyleRecord
191 u16 startChar;
192 u16 endChar;
193 u16 fontID;
194 u8 facestyleflags;
195 u8 fontsize;
196 u8 textcolorrgba[4];
197 struct FontTableBoX{
198 u32 size;
199 u32 type;
200 u16 entrycount;
201 u16 fontID;
202 u8 fontnamelenght;
203 u8 font[5]; //Serif
204 }FontTableBox;
205 }TextSampleEntry;
206 struct AudioSampleEntry{
207 u32 size;
208 u32 type; //mp4a
209 char reserved[6];
210 u16 data_reference_index; // = 1
211 u32 reserved2[2];
212 u16 channelcount; // = 2
213 u16 samplesize; // = 16
214 u32 reserved3;
215 u32 samplerate; // 44100 << 16
216 // unsigned char esds[81];
217 struct ESbox{
218 u32 size;
219 u32 type;
220 u32 version;
221 struct ES_Descriptor{
222 unsigned char tag;
223 unsigned char length;
224 u16 ES_ID;
225 unsigned char mix;
226 struct DecoderConfigDescriptor{
227 unsigned char tag;
228 unsigned char length;
229 unsigned char objectProfileInd;
230 u32 mix;
231 u32 maxBitRate;
232 u32 avgBitrate;
233 /* struct DecoderSpecificInfo{
234 unsigned char tag;
235 unsigned length;
236 // unsigned char decSpecificInfosize;
237 unsigned char decSpecificInfoData[2];
238 }DecoderSpecificInfo;
239 */ }DecoderConfigDescriptor;
240 struct SLConfigDescriptor{
241 unsigned char tag;
242 unsigned char length;
243 unsigned char predifined;
244 }SLConfigDescriptor;
245 }ES_Descriptor;
246 }ESbox;
247 }AudioSampleEntry;
248 }SampleDescriptionBox;
249 struct SampleSizeBox{
250 u32 size;
251 u32 type;
252 u32 version;
253 u32 sample_size; // =0
254 u32 sample_count;
255 u32 entry_size[9000];
256 }SampleSizeBox;
257 struct SampleToChunk{
258 u32 size;
259 u32 type;
260 u32 version;
261 u32 entry_count;
262 u32 first_chunk;
263 u32 samples_per_chunk;
264 u32 sample_description_index;
265 }SampleToChunk;
266 struct ChunkOffsetBox{
267 u32 size;
268 u32 type;
269 u32 version;
270 u32 entry_count;
271 u32 chunk_offset[maxtracks];
272 }ChunkOffsetBox;
273 }SampleTableBox;
274 }MediaInformationBox;
275 }MediaBox;
276 }TrackBox[maxtracks]; // max 10 tracks
277
278 struct PresetContainerBox // extends Box('prco')
279 {
280 u32 size;
281 u32 type;
282 unsigned char num_preset;
283 unsigned char default_preset_ID;
284 struct PresetBox //extends FullBox('prst',version=0,flags)
285 {
286 u32 size;
287 u32 type;
288 u32 flags;
289 unsigned char preset_ID;
290 unsigned char num_preset_elements;
291 struct presElemId{
292 u32 preset_element_ID;
293 }presElemId[maxtracks];
294 unsigned char preset_type;
295 unsigned char preset_global_volume;
296
297 // if (preset_type == 0) || (preset_type == 8) - Static track volume preset
298 struct StaticTrackVolume{
299 struct presVolumElem{
300 u8 preset_volume_element;
301 struct EQ{ // if preset_type == 8 (with EQ)
302 u8 num_eq_filters;
303 struct Filter{
304 u8 filter_type;
305 u16 filter_reference_frequency;
306 u8 filter_gain;
307 u8 filter_bandwidth;
308 }Filter[maxfilters];
309 }EQ;
310 }presVolumElem[maxtracks];
311 }StaticTrackVolume;
312
313 // if (preset_type == 1) || (preset_type == 9) - Static object volume preset
314 struct StaticObjectVolume{
315 struct InputCH{
316 u8 num_input_channel;
317 }InputCH[maxtracks];
318 u8 output_channel_type;
319 struct presElVol_1{
320 struct Input{
321 struct Output{
322 u8 preset_volume_element;
323 }Output[num_ch];
324 struct EQ_1{ // if preset_type == 9 (with EQ)
325 u8 num_eq_filters;
326 struct Filter_1{
327 u8 filter_type;
328 u16 filter_reference_frequency;
329 u8 filter_gain;
330 u8 filter_bandwidth;
331 }Filter[maxfilters];
332 }EQ;
333 }Input[num_ch];
334 }presElVol[maxtracks];
335 }StaticObjectVolume;
336
337 // if (preset_type == 2) || (preset_type == 10) - Dynamic track volume preset
338 struct DynamicTrackVolume{
339 u16 num_updates;
340 struct DynamicChange{
341 u16 updated_sample_number;
342 struct presVolumElem_2{
343 u8 preset_volume_element;
344 struct EQ_2{ // if preset_type == 10 (with EQ)
345 u8 num_eq_filters;
346 struct Filter_2{
347 u8 filter_type;
348 u16 filter_reference_frequency;
349 u8 filter_gain;
350 u8 filter_bandwidth;
351 }Filter[maxfilters];
352 }EQ;
353 }presVolumElem[maxtracks];
354 }DynamicChange[maxdynamic];
355 }DynamicTrackVolume;
356
357 // if (preset_type == 3) || (preset_type == 11) - Dynamic object volume preset
358 struct DynamicObjectVolume{
359 u16 num_updates;
360 struct InputCH_3{
361 u8 num_input_channel;
362 }InputCH[maxtracks];
363 u8 output_channel_type;
364 struct DynamicChange_3{
365 u16 updated_sample_number;
366 struct presElVol{
367 struct Input_3{
368 struct Output_3{
369 u8 preset_volume_element;
370 }Output[num_ch];
371 struct EQ_3{ // if preset_type == 11 (with EQ)
372 u8 num_eq_filters;
373 struct Filter_3{
374 u8 filter_type;
375 u16 filter_reference_frequency;
376 u8 filter_gain;
377 u8 filter_bandwidth;
378 }Filter[maxfilters];
379 }EQ;
380 }Input[num_ch];
381 }presElVol[maxtracks];
382 }DynamicChange[maxdynamic];
383 }DynamicObjectVolume;
384
385 // if (preset_type == 4) || (preset_type == 12) - Dynamic track approximated volume preset
386 struct DynamicTrackApproxVolume{
387 u16 num_updates;
388 struct DynamicChange_4{
389 u16 start_sample_number;
390 u16 duration_update;
391 struct presElVol_4{
392 u8 end_preset_volume_element;
393 struct EQ_4{ // if preset_type == 12 (with EQ)
394 u8 num_eq_filters;
395 struct Filter_4{
396 u8 filter_type;
397 u16 filter_reference_frequency;
398 u8 end_filter_gain;
399 u8 filter_bandwidth;
400 }Filter[maxfilters];
401 }EQ;
402 }presElVol[maxtracks];
403 }DynamicChange[maxdynamic];
404 }DynamicTrackApproxVolume;
405
406 // if (preset_type == 5) || (preset_type == 13) - Dynamic object approximated volume preset
407 // THIS STRUCTURE GIVES STACK OVERFLOW PROBLEMS - MORE STACK SIZE NEEDED -> Needs investigation
408 struct DynamicObjectApproxVolume{
409 u16 num_updates;
410 struct InputCH_5{
411 u8 num_input_channel;
412 }InputCH[maxtracks];
413 u8 output_channel_type;
414 struct DynamicChange_5{
415 u16 start_sample_number;
416 u16 duration_update;
417 struct presElVol_5{
418 struct Input_5{
419 struct Output_5{
420 u8 preset_volume_element;
421 }Output[num_ch];
422 struct EQ_5{ // if preset_type == 11 (with EQ)
423 u8 num_eq_filters;
424 struct Filter_5{
425 u8 filter_type;
426 u16 filter_reference_frequency;
427 u8 end_filter_gain;
428 u8 filter_bandwidth;
429 }Filter[maxfilters];
430 }EQ;
431 }Input[num_ch];
432 }presElVol[maxtracks];
433 }DynamicChange[maxdynamic];
434 }DynamicObjectApproxVolume;
435
436 char preset_name[50];
437
438 }PresetBox[maxpreset];
439
440 }PresetContainerBox;
441
442 struct RulesContainer{
443 u32 size;
444 u32 type;
445 u16 num_selection_rules;
446 u16 num_mixing_rules;
447 struct SelectionRules{
448 u32 size;
449 u32 type;
450 u32 version;
451 u16 selection_rule_ID;
452 unsigned char selection_rule_type;
453 u32 element_ID;
454 // Only for Min/Max Rule
455 // if (selection_rule_type==0)
456 u16 min_num_elements;
457 u16 max_num_elements;
458 // Only for Exclusion and Implication Rules
459 // if (selection_rule_type==1 || selection_rule_type==3)
460 u32 key_element_ID;
461 char rule_description[20];
462 }SelectionRules;
463 struct MixingRules{
464 u32 size;
465 u32 type;
466 u32 version;
467 u16 mixing_rule_ID;
468 unsigned char mixing_type;
469 u32 element_ID;
470 u16 min_volume;
471 u16 max_volume;
472 u32 key_elem_ID;
473 char mix_description[17];
474 }MixingRules;
475 }RulesContainer;
476 struct GroupContainerBox{ //extends Box('grco')
477 u32 size; // = 10 + sizeGRUP
478 u32 type;
479 u16 num_groups;
480 struct GroupBox{ // extends FullBox('grup')
481 u32 size; // = 21 + 22 + 32 (+2 if group_activation_mode = 2)
482 u32 type;
483 u32 version;
484 u32 group_ID;
485 u16 num_elements;
486 struct groupElemId{
487 u32 element_ID;
488 }groupElemId[maxtracks];
489 unsigned char group_activation_mode;
490 u16 group_activation_elements_number;
491 u16 group_reference_volume;
492 char group_name[22];
493 char group_description[32];
494 }GroupBox[maxgroups];
495 }GroupContainerBox;
496 }MovieBox;
497
498 typedef struct MetaBox // extends FullBox ('meta')
499 {
500 u32 size;
501 u32 type;
502 u32 version;
503 struct theHandler //extends FullBox HandlerBox('hdlr')
504 {
505 u32 size;
506 u32 type;
507 u32 version; // version = 0 + flag
508 u32 pre_defined; // =0
509 u32 handler_type; // = 'meta' for Timed Metadata track
510 u32 reserved[3]; // =0
511 unsigned char name[4];
512 }theHandler;
513 struct file_locations //extends Box DataInformationBox('dinf')
514 {
515 u32 size;
516 u32 type;
517 /* struct DataReferenceBox2
518 {
519 u32 size;
520 u32 type;
521 u32 flags;
522 u32 entry_count; // = 1
523 struct DataEntryUrlBox2 //extends FullBox('url', version=0, flags)
524 {
525 u32 size;
526 u32 type;
527 u32 flags;
528 }DataEntryUrlBox;
529 }DataReferenceBox; */
530 }file_locations;
531 struct item_locations //extends FullBox ItemLocationBox('iloc')
532 {
533 u32 size;
534 u32 type;
535 u32 version; // version = 0 + flags
536 unsigned char offset_size; // = 4 bytes
537 unsigned char lenght_size; // = 4 bytes
538 unsigned char base_offset_size; // = 4 bytes
539 unsigned char reserved; // = 0
540 u16 item_count; // = 1
541 u16 item_ID; // = 1
542 u16 data_reference_index; // = 0 (this file)
543 u32 base_offset; // size=(base_offset_size*8)=4*8
544 u16 extent_count; // = 1
545 u32 extent_offset; // size=(offset_size*8)=4*8
546 u32 extent_length; // size=(lenght_size*8)=4*8
547 }item_locations;
548 struct item_infos //extends FullBox ItemInfoBox('iinf')
549 {
550 u32 size;
551 u32 type;
552 u32 version; // version = 0 + flag
553 u16 entry_count; // = 1
554 struct info_entry// extends FullBox ItemInfoEntry('infe')
555 {
556 u32 size;
557 u32 type;
558 u32 version; // = 0
559 u16 item_ID; // = 1
560 u16 item_protection_index; // = 0 for "unprotected"
561 char item_name[6]; // name with max 5 characters
562 char content_type[18]; // = 'application/other' -> 17 characters
563 char content_encoding[4]; // = 'jpg' for JPEG image -> 3 characters
564 }info_entry;
565 }item_infos;
566 struct XMLBox // extends FullBox('xml ')
567 {
568 u32 size;
569 u32 type;
570 u32 version;
571 char string[2000];
572 }XMLBox;
573 }MetaBox;
574
575 typedef struct MediaDataBox // extends Box('mdat')
576 {
577 u32 size;
578 u32 type;
579 unsigned char data;
580 }MediaDataBox;
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609 #endif // IMAFENCODER_H