mas01cr@498
|
1 extern "C" {
|
mas01cr@498
|
2 #include "audioDB_API.h"
|
mas01cr@498
|
3 }
|
mas01cr@498
|
4 #include "audioDB-internals.h"
|
mas01cr@239
|
5
|
mas01cr@498
|
6 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
|
mas01cr@498
|
7 adb_header_t *header = adb->header;
|
mas01cr@509
|
8 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
|
mas01cr@498
|
9 return true;
|
mas01cr@498
|
10 } else {
|
mas01cr@498
|
11 /* FIXME: timesTableOffset isn't necessarily the next biggest
|
mas01cr@498
|
12 * offset after dataOffset. Maybe make the offsets into an array
|
mas01cr@498
|
13 * that we can iterate over... */
|
mas01cr@498
|
14 return (header->timesTableOffset >
|
mas01cr@498
|
15 (header->dataOffset + header->length + size));
|
mas01cr@239
|
16 }
|
mas01cr@239
|
17 }
|
mas01cr@239
|
18
|
mas01cr@498
|
19 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
|
mas01cr@498
|
20 /* FIXME: the comment above about the ordering of the tables applies
|
mas01cr@498
|
21 here too. */
|
mas01cr@498
|
22 adb_header_t *header = adb->header;
|
mas01cr@498
|
23 off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
|
mas01cr@498
|
24 off_t track_table_length = header->dataOffset - header->trackTableOffset;
|
mas01cr@509
|
25 int fmaxfiles = file_table_length / ADB_FILETABLE_ENTRY_SIZE;
|
mas01cr@509
|
26 int tmaxfiles = track_table_length / ADB_TRACKTABLE_ENTRY_SIZE;
|
mas01cr@498
|
27 /* maxfiles is the _minimum_ of the two. Do not be confused... */
|
mas01cr@498
|
28 int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
|
mas01cr@509
|
29 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
|
mas01cr@498
|
30 /* by default, these tables are created with the same size as the
|
mas01cr@498
|
31 * fileTable (which should be called key_table); relying on that
|
mas01cr@498
|
32 * always being the case, though, smacks of optimism, so instead
|
mas01cr@498
|
33 * we code defensively... */
|
mas01cr@498
|
34 off_t data_table_length = header->timesTableOffset - header->dataOffset;
|
mas01cr@498
|
35 off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
|
mas01cr@498
|
36 off_t power_table_length = header->dbSize - header->powerTableOffset;
|
mas01cr@509
|
37 int dmaxfiles = data_table_length / ADB_FILETABLE_ENTRY_SIZE;
|
mas01cr@509
|
38 int timaxfiles = times_table_length / ADB_FILETABLE_ENTRY_SIZE;
|
mas01cr@509
|
39 int pmaxfiles = power_table_length / ADB_FILETABLE_ENTRY_SIZE;
|
mas01cr@498
|
40 /* ... even though it means a certain amount of tedium. */
|
mas01cr@498
|
41 maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
|
mas01cr@498
|
42 maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
|
mas01cr@498
|
43 maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
|
mas01cr@498
|
44 }
|
mas01cr@498
|
45 return (header->numFiles < (unsigned int) maxfiles);
|
mas01cr@498
|
46 }
|
mas01cr@498
|
47
|
mas01cr@498
|
48 /*
|
mas01cr@498
|
49 * Hey, look, a comment. Normally I wouldn't bother, as the code
|
mas01cr@498
|
50 * should be self-documenting, but a lot of logic is concentrated in
|
mas01cr@498
|
51 * this one place, so let's give an overview beforehand. To insert a
|
mas01cr@498
|
52 * datum into the database, we:
|
mas01cr@498
|
53 *
|
mas01cr@498
|
54 * 1. check write permission;
|
mas01cr@498
|
55 * 2. check for enough space;
|
mas01cr@498
|
56 * 3. check that datum->dim and adb->header->dim agree (or that the
|
mas01cr@498
|
57 * header dimension is zero, in which case write datum->dim to
|
mas01cr@498
|
58 * adb->header->dim).
|
mas01cr@498
|
59 * 4. check for presence of datum->key in adb->keymap;
|
mas01cr@509
|
60 * 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
|
mas01cr@509
|
61 * and times and ADB_HEADER_FLAG_TIMES;
|
mas01cr@498
|
62 * 6. write in data, power, times as appropriate; add to track
|
mas01cr@498
|
63 * and key tables too;
|
mas01cr@509
|
64 * 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
|
mas01cr@509
|
65 * compute norms and fill in table;
|
mas01cr@498
|
66 * 8. update adb->keys, adb->keymap, adb->track_lengths,
|
mas01cr@498
|
67 * adb->track_offsets and adb->header;
|
mas01cr@498
|
68 * 9. sync adb->header with disk.
|
mas01cr@498
|
69 *
|
mas01cr@498
|
70 * Step 9 essentially commits the transaction; until we update
|
mas01cr@498
|
71 * header->length, nothing will recognize the newly-written data. In
|
mas01cr@498
|
72 * principle, if it fails, we should roll back, which we can in fact
|
mas01cr@498
|
73 * do on the assumption that nothing in step 8 can ever fail; on the
|
mas01cr@498
|
74 * other hand, if it's failed, then it's unlikely that rolling back by
|
mas01cr@498
|
75 * syncing the original header back to disk is going to work
|
mas01cr@498
|
76 * desperately well. We should perhaps take an operating-system lock
|
mas01cr@498
|
77 * around step 9, so that we can't be interrupted part-way through
|
mas01cr@498
|
78 * (except of course for SIGKILL, but if we're hit with that we will
|
mas01cr@498
|
79 * always lose).
|
mas01cr@498
|
80 */
|
mas01cr@498
|
81 static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {
|
mas01cr@498
|
82
|
mas01cr@498
|
83 off_t size, offset, nfiles;
|
mas01cr@498
|
84 double *l2norm_buffer = NULL;
|
mas01cr@498
|
85
|
mas01cr@498
|
86 /* 1. check write permission; */
|
mas01cr@498
|
87 if(!(adb->flags & O_RDWR)) {
|
mas01cr@498
|
88 return 1;
|
mas01cr@498
|
89 }
|
mas01cr@498
|
90 /* 2. check for enough space; */
|
mas01cr@498
|
91 size = sizeof(double) * datum->nvectors * datum->dim;
|
mas01cr@498
|
92 if(!audiodb_enough_data_space_free(adb, size)) {
|
mas01cr@498
|
93 return 1;
|
mas01cr@498
|
94 }
|
mas01cr@498
|
95 if(!audiodb_enough_per_file_space_free(adb)) {
|
mas01cr@498
|
96 return 1;
|
mas01cr@498
|
97 }
|
mas01cr@498
|
98 /* 3. check that datum->dim and adb->header->dim agree (or that the
|
mas01cr@498
|
99 * header dimension is zero, in which case write datum->dim to
|
mas01cr@498
|
100 * adb->header->dim).
|
mas01cr@498
|
101 */
|
mas01cr@498
|
102 if(adb->header->dim == 0) {
|
mas01cr@498
|
103 adb->header->dim = datum->dim;
|
mas01cr@498
|
104 } else if (adb->header->dim != datum->dim) {
|
mas01cr@498
|
105 return 1;
|
mas01cr@498
|
106 }
|
mas01cr@498
|
107 /* 4. check for presence of datum->key in adb->keymap; */
|
mas01cr@498
|
108 if(adb->keymap->count(datum->key)) {
|
mas01cr@498
|
109 /* not part of an explicit API/ABI, but we need a distinguished
|
mas01cr@498
|
110 value in this circumstance to preserve somewhat wonky behaviour
|
mas01cr@498
|
111 of audioDB::batchinsert. */
|
mas01cr@498
|
112 return 2;
|
mas01cr@498
|
113 }
|
mas01cr@509
|
114 /* 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
|
mas01cr@509
|
115 * and times and ADB_HEADER_FLAG_TIMES;
|
mas01cr@498
|
116 */
|
mas01cr@509
|
117 if((datum->power && !(adb->header->flags & ADB_HEADER_FLAG_POWER)) ||
|
mas01cr@509
|
118 ((adb->header->flags & ADB_HEADER_FLAG_POWER) && !datum->power)) {
|
mas01cr@498
|
119 return 1;
|
mas01cr@498
|
120 }
|
mas01cr@509
|
121 if(datum->times && !(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
|
mas01cr@498
|
122 if(adb->header->numFiles == 0) {
|
mas01cr@509
|
123 adb->header->flags |= ADB_HEADER_FLAG_TIMES;
|
mas01cr@498
|
124 } else {
|
mas01cr@498
|
125 return 1;
|
mas01cr@239
|
126 }
|
mas01cr@509
|
127 } else if ((adb->header->flags & ADB_HEADER_FLAG_TIMES) && !datum->times) {
|
mas01cr@498
|
128 return 1;
|
mas01cr@498
|
129 }
|
mas01cr@498
|
130 /* 6. write in data, power, times as appropriate; add to track
|
mas01cr@498
|
131 * and key tables too;
|
mas01cr@498
|
132 */
|
mas01cr@498
|
133 offset = adb->header->length;
|
mas01cr@498
|
134 nfiles = adb->header->numFiles;
|
mas01cr@498
|
135
|
mas01cr@498
|
136 /* FIXME: checking for all these lseek()s */
|
mas01cr@509
|
137 lseek(adb->fd, adb->header->fileTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
|
mas01cr@498
|
138 write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1);
|
mas01cr@509
|
139 lseek(adb->fd, adb->header->trackTableOffset + nfiles * ADB_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
|
mas01cr@509
|
140 write_or_goto_error(adb->fd, &datum->nvectors, ADB_TRACKTABLE_ENTRY_SIZE);
|
mas01cr@509
|
141 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
|
mas01cr@498
|
142 char cwd[PATH_MAX];
|
mas01cr@498
|
143 char slash = '/';
|
mas01cr@498
|
144
|
mas01cr@498
|
145 if(!getcwd(cwd, PATH_MAX)) {
|
mas01cr@498
|
146 goto error;
|
mas01cr@498
|
147 }
|
mas01cr@509
|
148 lseek(adb->fd, adb->header->dataOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
|
mas01cr@498
|
149 if(*((char *) datum->data) != '/') {
|
mas01cr@498
|
150 write_or_goto_error(adb->fd, cwd, strlen(cwd));
|
mas01cr@498
|
151 write_or_goto_error(adb->fd, &slash, 1);
|
mas01cr@498
|
152 }
|
mas01cr@498
|
153 write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1);
|
mas01cr@498
|
154 if(datum->power) {
|
mas01cr@509
|
155 lseek(adb->fd, adb->header->powerTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
|
mas01cr@498
|
156 if(*((char *) datum->power) != '/') {
|
mas01cr@498
|
157 write_or_goto_error(adb->fd, cwd, strlen(cwd));
|
mas01cr@498
|
158 write_or_goto_error(adb->fd, &slash, 1);
|
mas01cr@498
|
159 }
|
mas01cr@498
|
160 write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1);
|
mas01cr@498
|
161 }
|
mas01cr@498
|
162 if(datum->times) {
|
mas01cr@509
|
163 lseek(adb->fd, adb->header->timesTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
|
mas01cr@498
|
164 if(*((char *) datum->times) != '/') {
|
mas01cr@498
|
165 write_or_goto_error(adb->fd, cwd, strlen(cwd));
|
mas01cr@498
|
166 write_or_goto_error(adb->fd, &slash, 1);
|
mas01cr@498
|
167 }
|
mas01cr@498
|
168 write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1);
|
mas01cr@498
|
169 }
|
mas01cr@498
|
170 } else {
|
mas01cr@498
|
171 lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
|
mas01cr@498
|
172 write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
|
mas01cr@498
|
173 if(datum->power) {
|
mas01cr@498
|
174 lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
|
mas01cr@498
|
175 write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors);
|
mas01cr@498
|
176 }
|
mas01cr@498
|
177 if(datum->times) {
|
mas01cr@498
|
178 lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
|
mas01cr@498
|
179 write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
|
mas01cr@498
|
180 }
|
mas01cr@498
|
181 }
|
mas01cr@498
|
182
|
mas01cr@509
|
183 /* 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
|
mas01cr@509
|
184 * compute norms and fill in table;
|
mas01cr@498
|
185 */
|
mas01cr@509
|
186 if((adb->header->flags & ADB_HEADER_FLAG_L2NORM) &&
|
mas01cr@509
|
187 !(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
|
mas01cr@498
|
188 l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
|
mas01mc@324
|
189
|
mas01cr@498
|
190 audiodb_l2norm_buffer((double *) datum->data, datum->dim, datum->nvectors, l2norm_buffer);
|
mas01cr@498
|
191 lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
|
mas01cr@498
|
192 write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
|
mas01cr@498
|
193 free(l2norm_buffer);
|
mas01cr@498
|
194 l2norm_buffer = NULL;
|
mas01cr@498
|
195 }
|
mas01cr@498
|
196
|
mas01cr@498
|
197 /* 8. update adb->keys, adb->keymap, adb->track_lengths,
|
mas01cr@498
|
198 * adb->track_offsets and adb->header;
|
mas01cr@498
|
199 */
|
mas01cr@498
|
200 adb->keys->push_back(datum->key);
|
mas01cr@498
|
201 (*adb->keymap)[datum->key] = adb->header->numFiles;
|
mas01cr@498
|
202 adb->track_lengths->push_back(datum->nvectors);
|
mas01cr@498
|
203 adb->track_offsets->push_back(offset);
|
mas01cr@498
|
204 adb->header->numFiles += 1;
|
mas01cr@498
|
205 adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
|
mas01cr@498
|
206
|
mas01cr@498
|
207 /* 9. sync adb->header with disk. */
|
mas01cr@498
|
208 return audiodb_sync_header(adb);
|
mas01cr@498
|
209
|
mas01cr@498
|
210 error:
|
mas01cr@498
|
211 if(l2norm_buffer) {
|
mas01cr@498
|
212 free(l2norm_buffer);
|
mas01cr@498
|
213 }
|
mas01cr@498
|
214 return 1;
|
mas01cr@498
|
215 }
|
mas01cr@498
|
216
|
mas01cr@498
|
217 int audiodb_insert_datum(adb_t *adb, const adb_datum_t *datum) {
|
mas01cr@509
|
218 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
|
mas01cr@498
|
219 return 1;
|
mas01cr@498
|
220 } else {
|
mas01cr@498
|
221 adb_datum_internal_t d;
|
mas01cr@498
|
222 d.nvectors = datum->nvectors;
|
mas01cr@498
|
223 d.dim = datum->dim;
|
mas01cr@498
|
224 d.key = datum->key;
|
mas01cr@498
|
225 d.data = datum->data;
|
mas01cr@498
|
226 d.times = datum->times;
|
mas01cr@498
|
227 d.power = datum->power;
|
mas01cr@498
|
228 return audiodb_insert_datum_internal(adb, &d);
|
mas01cr@498
|
229 }
|
mas01cr@498
|
230 }
|
mas01cr@498
|
231
|
mas01cr@498
|
232 int audiodb_insert_reference(adb_t *adb, const adb_reference_t *reference) {
|
mas01cr@509
|
233 if(!(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
|
mas01cr@498
|
234 return 1;
|
mas01cr@498
|
235 } else {
|
mas01cr@498
|
236 adb_datum_internal_t d;
|
mas01cr@498
|
237 struct stat st;
|
mas01cr@498
|
238 int fd;
|
mas01cr@498
|
239 off_t size;
|
mas01mc@324
|
240
|
mas01cr@498
|
241 if((fd = open(reference->features, O_RDONLY)) == -1) {
|
mas01cr@498
|
242 return 1;
|
mas01cr@239
|
243 }
|
mas01cr@498
|
244 if(fstat(fd, &st)) {
|
mas01cr@498
|
245 goto error;
|
mas01cr@239
|
246 }
|
mas01cr@498
|
247 read_or_goto_error(fd, &(d.dim), sizeof(uint32_t));
|
mas01cr@498
|
248 close(fd);
|
mas01cr@498
|
249 fd = 0;
|
mas01cr@498
|
250 size = st.st_size - sizeof(uint32_t);
|
mas01cr@498
|
251 d.nvectors = size / (sizeof(double) * d.dim);
|
mas01cr@498
|
252 d.data = (void *) reference->features;
|
mas01cr@498
|
253 if(reference->power) {
|
mas01cr@498
|
254 if(stat(reference->power, &st)) {
|
mas01cr@498
|
255 return 1;
|
mas01cr@498
|
256 }
|
mas01cr@498
|
257 }
|
mas01cr@498
|
258 d.power = (void *) reference->power;
|
mas01cr@498
|
259 if(reference->times) {
|
mas01cr@498
|
260 if(stat(reference->times, &st)) {
|
mas01cr@498
|
261 return 1;
|
mas01cr@498
|
262 }
|
mas01cr@498
|
263 }
|
mas01cr@498
|
264 d.times = (void *) reference->times;
|
mas01cr@498
|
265 d.key = reference->key ? reference->key : reference->features;
|
mas01cr@498
|
266 return audiodb_insert_datum_internal(adb, &d);
|
mas01cr@498
|
267 error:
|
mas01cr@498
|
268 if(fd) {
|
mas01cr@498
|
269 close(fd);
|
mas01cr@498
|
270 }
|
mas01cr@498
|
271 return 1;
|
mas01cr@498
|
272 }
|
mas01cr@498
|
273 }
|
mas01cr@498
|
274
|
mas01cr@498
|
275 int audiodb_free_datum(adb_datum_t *datum) {
|
mas01cr@498
|
276 if(datum->data) {
|
mas01cr@498
|
277 free(datum->data);
|
mas01cr@498
|
278 datum->data = NULL;
|
mas01cr@498
|
279 }
|
mas01cr@498
|
280 if(datum->power) {
|
mas01cr@498
|
281 free(datum->power);
|
mas01cr@498
|
282 datum->power = NULL;
|
mas01cr@498
|
283 }
|
mas01cr@498
|
284 if(datum->times) {
|
mas01cr@498
|
285 free(datum->times);
|
mas01cr@498
|
286 datum->times = NULL;
|
mas01cr@498
|
287 }
|
mas01cr@498
|
288 return 0;
|
mas01cr@498
|
289 }
|
mas01cr@498
|
290
|
mas01mc@541
|
291 int audiodb_free_datum_cache(adb_fd_cache_t *cache){
|
mas01mc@541
|
292 if(cache){
|
mas01mc@541
|
293 if(cache->fname){
|
mas01mc@541
|
294 free(cache->fname);
|
mas01mc@541
|
295 cache->fname = NULL;
|
mas01mc@541
|
296 }
|
mas01mc@541
|
297 if(cache->data_fd){
|
mas01mc@541
|
298 close(cache->data_fd);
|
mas01mc@541
|
299 cache->data_fd = 0;
|
mas01mc@541
|
300 }
|
mas01mc@541
|
301 if(cache->power_fd){
|
mas01mc@541
|
302 close(cache->power_fd);
|
mas01mc@541
|
303 cache->power_fd = 0;
|
mas01mc@541
|
304 }
|
mas01mc@541
|
305 if(cache->times_file){
|
mas01mc@541
|
306 fclose(cache->times_file);
|
mas01mc@541
|
307 cache->times_file = NULL;
|
mas01mc@541
|
308 }
|
mas01mc@541
|
309 if(cache->reference){
|
mas01mc@541
|
310 audiodb_free_datum_reference(cache->reference);
|
mas01mc@542
|
311 free(cache->reference);
|
mas01mc@541
|
312 cache->reference = NULL;
|
mas01mc@541
|
313 }
|
mas01mc@541
|
314 }
|
mas01mc@541
|
315 return 0;
|
mas01mc@541
|
316 }
|
mas01mc@541
|
317
|
mas01mc@541
|
318 int audiodb_free_datum_reference(adb_reference_t * reference){
|
mas01mc@541
|
319 if(reference){
|
mas01mc@541
|
320 if(reference->features){
|
mas01mc@541
|
321 free((char *)reference->features);
|
mas01mc@541
|
322 reference->features = 0;
|
mas01mc@541
|
323 }
|
mas01mc@541
|
324 if(reference->power){
|
mas01mc@541
|
325 free((char *)reference->power);
|
mas01mc@541
|
326 reference->power = 0;
|
mas01mc@541
|
327 }
|
mas01mc@541
|
328 if(reference->times){
|
mas01mc@541
|
329 free((char *)reference->times);
|
mas01mc@541
|
330 reference->times = 0;
|
mas01mc@541
|
331 }
|
mas01mc@541
|
332 }
|
mas01mc@541
|
333 return 0;
|
mas01mc@541
|
334 }
|
mas01mc@541
|
335
|
mas01mc@545
|
336 int audiodb_insert_create_datum(adb_insert_t * insert, adb_datum_t *datum){
|
mas01mc@545
|
337 return audiodb_insert_create_datum_offset(insert, datum, 0, 0, 0);
|
mas01mc@545
|
338 }
|
mas01mc@545
|
339
|
mas01mc@546
|
340 int audiodb_insert_create_datum_offset(adb_insert_t *insert, adb_datum_t *datum, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache) {
|
mas01cr@498
|
341 int fd = 0;
|
mas01cr@498
|
342 FILE *file = NULL;
|
mas01cr@498
|
343 struct stat st;
|
mas01cr@498
|
344 off_t size;
|
mas01mc@541
|
345 bool clear_cache = false;
|
mas01cr@498
|
346
|
mas01mc@541
|
347 if(!cache){
|
mas01mc@541
|
348 datum->data = NULL;
|
mas01mc@541
|
349 datum->power = NULL;
|
mas01mc@541
|
350 datum->times = NULL;
|
mas01cr@498
|
351 }
|
mas01mc@541
|
352
|
mas01mc@541
|
353 // STEP 1 check if we need to clear the cache
|
mas01mc@546
|
354 if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0)){
|
mas01mc@541
|
355 clear_cache = true;
|
mas01mc@546
|
356 }
|
mas01mc@541
|
357
|
mas01mc@541
|
358 // STEP 2. Clear the cache if necessary
|
mas01mc@541
|
359 if(cache && clear_cache){
|
mas01mc@541
|
360 close(cache->data_fd);
|
mas01mc@541
|
361 cache->data_fd = 0;
|
mas01mc@541
|
362 free(cache->fname);
|
mas01mc@541
|
363 cache->fname = 0;
|
mas01mc@541
|
364 }
|
mas01mc@541
|
365
|
mas01mc@541
|
366 // STEP 3. Use the cached file descriptor or open a new file descriptor
|
mas01mc@541
|
367 if (cache && cache->data_fd ){
|
mas01mc@541
|
368 fd = cache->data_fd;
|
mas01mc@541
|
369 }
|
mas01mc@541
|
370 else{
|
mas01mc@541
|
371 if ((fd = open(insert->features, O_RDONLY)) == -1) {
|
mas01mc@541
|
372 goto error;
|
mas01mc@541
|
373 }
|
mas01mc@541
|
374 if(cache){
|
mas01mc@541
|
375 cache->fname = (char*) malloc(strlen(insert->features));
|
mas01mc@541
|
376 strncpy(cache->fname, insert->features, strlen(insert->features));
|
mas01mc@541
|
377 }
|
mas01mc@541
|
378 }
|
mas01mc@541
|
379
|
mas01cr@498
|
380 if(fstat(fd, &st)) {
|
mas01cr@498
|
381 goto error;
|
mas01cr@498
|
382 }
|
mas01mc@541
|
383
|
mas01mc@541
|
384 // STEP 4. If file descriptor is new, read the dimensionality, maybe cache the file descriptor
|
mas01mc@541
|
385 if( !( cache && cache->data_fd ) ){
|
mas01mc@541
|
386 read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t));
|
mas01mc@541
|
387 if(cache)
|
mas01mc@541
|
388 cache->data_fd = fd;
|
mas01mc@541
|
389 }
|
mas01mc@541
|
390
|
mas01mc@541
|
391 // STEP 5. Allocate data memory if necessary, read the requested amount of data
|
mas01mc@546
|
392 if(num_vectors){
|
mas01mc@546
|
393 size = num_vectors*datum->dim*sizeof(double);
|
mas01mc@546
|
394 }
|
mas01mc@546
|
395 else{
|
mas01mc@539
|
396 size = st.st_size - sizeof(uint32_t);
|
mas01mc@546
|
397 }
|
mas01mc@541
|
398
|
mas01cr@498
|
399 datum->nvectors = size / (sizeof(double) * datum->dim);
|
mas01mc@541
|
400
|
mas01mc@541
|
401 if(!datum->data){
|
mas01mc@541
|
402 datum->data = (double *) malloc(size);
|
mas01mc@541
|
403 }
|
mas01mc@541
|
404
|
mas01cr@498
|
405 if(!datum->data) {
|
mas01cr@498
|
406 goto error;
|
mas01cr@498
|
407 }
|
mas01mc@541
|
408
|
mas01mc@546
|
409 if(vector_offset){
|
mas01mc@546
|
410 lseek(fd, sizeof(uint32_t) + vector_offset*datum->dim*sizeof(double), SEEK_SET);
|
mas01mc@546
|
411 }
|
mas01cr@498
|
412 read_or_goto_error(fd, datum->data, size);
|
mas01mc@541
|
413
|
mas01mc@541
|
414 // STEP 6. Close the file descriptor, unless we are caching it
|
mas01mc@546
|
415 if(!cache){
|
mas01mc@541
|
416 close(fd);
|
mas01mc@546
|
417 }
|
mas01mc@541
|
418 fd = 0; // we're done with the data
|
mas01mc@541
|
419
|
mas01cr@498
|
420 if(insert->power) {
|
mas01cr@498
|
421 int dim;
|
mas01mc@541
|
422
|
mas01mc@541
|
423 // Clear the cache if necessary
|
mas01mc@541
|
424 if(clear_cache){
|
mas01mc@541
|
425 close(cache->power_fd);
|
mas01mc@541
|
426 cache->power_fd = 0;
|
mas01mc@541
|
427 }
|
mas01mc@541
|
428
|
mas01mc@541
|
429 // Use the cached file descriptor or open a new file descriptor
|
mas01mc@546
|
430 if (cache && cache->power_fd){
|
mas01mc@541
|
431 fd = cache->power_fd;
|
mas01mc@546
|
432 }
|
mas01mc@541
|
433 else if((fd = open(insert->power, O_RDONLY)) == -1) {
|
mas01cr@498
|
434 goto error;
|
mas01cr@498
|
435 }
|
mas01mc@541
|
436
|
mas01cr@498
|
437 if(fstat(fd, &st)) {
|
mas01cr@498
|
438 goto error;
|
mas01cr@498
|
439 }
|
mas01mc@541
|
440
|
mas01cr@498
|
441 /* This cast is so non-trivial that it deserves a comment.
|
mas01cr@498
|
442 *
|
mas01cr@498
|
443 * The data types in this expression, left to right, are: off_t,
|
mas01cr@498
|
444 * size_t, off_t, uint32_t. The rules for conversions in
|
mas01cr@498
|
445 * arithmetic expressions with mixtures of integral types are
|
mas01cr@498
|
446 * essentially that the widest type wins, with unsigned types
|
mas01cr@498
|
447 * winning on a tie-break.
|
mas01cr@498
|
448 *
|
mas01cr@498
|
449 * Because we are enforcing (through the use of sufficient
|
mas01cr@498
|
450 * compiler flags, if necessary) that off_t be a (signed) 64-bit
|
mas01cr@498
|
451 * type, the only variability in this set of types is in fact the
|
mas01cr@498
|
452 * size_t. On 32-bit machines, size_t is uint32_t and so the
|
mas01cr@498
|
453 * coercions on both sides of the equality end up promoting
|
mas01cr@498
|
454 * everything to int64_t, which is fine. On 64-bit machines,
|
mas01cr@498
|
455 * however, the left hand side is promoted to a uint64_t, while
|
mas01cr@498
|
456 * the right hand side remains int64_t.
|
mas01cr@498
|
457 *
|
mas01cr@498
|
458 * The mixture of signed and unsigned types in comparisons is Evil
|
mas01cr@498
|
459 * Bad and Wrong, and gcc complains about it. (It's right to do
|
mas01cr@498
|
460 * so, actually). Of course in this case it will never matter
|
mas01cr@498
|
461 * because of the particular relationships between all of these
|
mas01cr@498
|
462 * numbers, so we just cast the left hand side to off_t, which
|
mas01cr@498
|
463 * will do the right thing for us on all platforms.
|
mas01cr@498
|
464 *
|
mas01cr@498
|
465 * I hate C.
|
mas01cr@498
|
466 */
|
mas01mc@541
|
467
|
mas01mc@546
|
468 if( (!num_vectors) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) {
|
mas01cr@498
|
469 goto error;
|
mas01cr@498
|
470 }
|
mas01mc@541
|
471
|
mas01mc@541
|
472 // If file descriptor is new, read the dimensionality, maybe cache the file descriptor
|
mas01mc@541
|
473 if( !( cache && cache->power_fd ) ){
|
mas01mc@541
|
474 read_or_goto_error(fd, &dim, sizeof(uint32_t));
|
mas01mc@541
|
475 if(dim != 1) {
|
mas01mc@541
|
476 goto error;
|
mas01mc@541
|
477 }
|
mas01mc@546
|
478 if(cache){
|
mas01mc@541
|
479 cache->power_fd = fd;
|
mas01mc@546
|
480 }
|
mas01cr@498
|
481 }
|
mas01mc@541
|
482
|
mas01mc@541
|
483 // Allocate data memory if necessary, read the requested amount of data
|
mas01mc@541
|
484 if(!datum->power)
|
mas01mc@541
|
485 datum->power = (double *) malloc(size / datum->dim);
|
mas01cr@498
|
486 if(!datum->power) {
|
mas01cr@498
|
487 goto error;
|
mas01cr@498
|
488 }
|
mas01mc@541
|
489
|
mas01mc@546
|
490 if(vector_offset){
|
mas01mc@546
|
491 lseek(fd, sizeof(uint32_t) + vector_offset*sizeof(double), SEEK_SET);
|
mas01mc@546
|
492 }
|
mas01mc@541
|
493
|
mas01cr@498
|
494 read_or_goto_error(fd, datum->power, size / datum->dim);
|
mas01mc@541
|
495
|
mas01mc@546
|
496 if(!cache){
|
mas01mc@541
|
497 close(fd);
|
mas01mc@546
|
498 }
|
mas01mc@541
|
499 fd = 0;
|
mas01cr@498
|
500 }
|
mas01mc@541
|
501
|
mas01cr@498
|
502 if(insert->times) {
|
mas01cr@498
|
503 double t, *tp;
|
mas01mc@541
|
504
|
mas01mc@541
|
505 // Clear the cache if necessary
|
mas01mc@541
|
506 if(clear_cache){
|
mas01mc@541
|
507 fclose(cache->times_file);
|
mas01mc@541
|
508 cache->times_file = 0;
|
mas01cr@498
|
509 }
|
mas01mc@541
|
510
|
mas01mc@541
|
511 // Use the cached file descriptor or open a new file descriptor and maybe cache
|
mas01mc@546
|
512 if (cache && cache->times_file){
|
mas01mc@541
|
513 file = cache->times_file;
|
mas01mc@546
|
514 }
|
mas01mc@541
|
515 else{
|
mas01mc@541
|
516 if(!(file = fopen(insert->times, "r"))) {
|
mas01mc@541
|
517 goto error;
|
mas01mc@541
|
518 }
|
mas01mc@546
|
519 if(cache){
|
mas01mc@541
|
520 cache->times_file = file;
|
mas01mc@546
|
521 }
|
mas01mc@541
|
522 }
|
mas01mc@541
|
523
|
mas01mc@541
|
524 // Allocate data memory if necessary, read the requested amount of data
|
mas01mc@546
|
525 if(!datum->times){
|
mas01mc@541
|
526 datum->times = (double *) malloc(2 * size / datum->dim);
|
mas01mc@546
|
527 }
|
mas01cr@498
|
528 if(!datum->times) {
|
mas01cr@498
|
529 goto error;
|
mas01cr@498
|
530 }
|
mas01mc@541
|
531
|
mas01mc@541
|
532 rewind(file);
|
mas01mc@546
|
533
|
mas01cr@498
|
534 if(fscanf(file, " %lf", &t) != 1) {
|
mas01cr@498
|
535 goto error;
|
mas01cr@498
|
536 }
|
mas01mc@546
|
537 if(vector_offset){
|
mas01mc@546
|
538 while(vector_offset-- != 1 ){
|
mas01mc@546
|
539 if(fscanf(file, " %lf", &t) != 1){
|
mas01mc@539
|
540 goto error;
|
mas01mc@546
|
541 }
|
mas01mc@546
|
542 }
|
mas01mc@546
|
543 }
|
mas01cr@498
|
544 tp = datum->times;
|
mas01cr@498
|
545 *tp++ = t;
|
mas01cr@498
|
546 for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
|
mas01cr@498
|
547 if(fscanf(file, " %lf", &t) != 1) {
|
mas01cr@498
|
548 goto error;
|
mas01cr@498
|
549 }
|
mas01cr@498
|
550 *tp++ = t;
|
mas01cr@498
|
551 *tp++ = t;
|
mas01cr@498
|
552 }
|
mas01cr@498
|
553 if(fscanf(file, " %lf", &t) != 1) {
|
mas01cr@498
|
554 goto error;
|
mas01cr@498
|
555 }
|
mas01cr@498
|
556 *tp = t;
|
mas01mc@541
|
557 if(!cache){
|
mas01mc@541
|
558 fclose(file);
|
mas01mc@541
|
559 file=0;
|
mas01mc@541
|
560 }
|
mas01cr@498
|
561 }
|
mas01cr@498
|
562 datum->key = insert->key ? insert->key : insert->features;
|
mas01cr@498
|
563 return 0;
|
mas01cr@498
|
564
|
mas01cr@498
|
565 error:
|
mas01cr@498
|
566 if(fd > 0) {
|
mas01cr@498
|
567 close(fd);
|
mas01cr@498
|
568 }
|
mas01cr@498
|
569 if(file) {
|
mas01cr@498
|
570 fclose(file);
|
mas01cr@498
|
571 }
|
mas01cr@498
|
572 audiodb_free_datum(datum);
|
mas01mc@541
|
573 if(cache)
|
mas01mc@541
|
574 audiodb_free_datum_cache(cache);
|
mas01cr@498
|
575 return 1;
|
mas01mc@541
|
576 }
|
mas01cr@498
|
577
|
mas01cr@498
|
578 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
|
mas01cr@509
|
579 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
|
mas01cr@498
|
580 adb_reference_t *reference = insert;
|
mas01cr@498
|
581 int err;
|
mas01cr@498
|
582 err = audiodb_insert_reference(adb, reference);
|
mas01cr@498
|
583
|
mas01cr@498
|
584 if(err == 2) {
|
mas01cr@498
|
585 return 0;
|
mas01cr@498
|
586 } else {
|
mas01cr@498
|
587 return err;
|
mas01cr@498
|
588 }
|
mas01cr@498
|
589 } else {
|
mas01cr@498
|
590 adb_datum_t datum;
|
mas01cr@498
|
591 int err;
|
mas01cr@498
|
592
|
mas01cr@498
|
593 if(audiodb_insert_create_datum(insert, &datum)) {
|
mas01cr@498
|
594 return 1;
|
mas01cr@498
|
595 }
|
mas01cr@498
|
596 err = audiodb_insert_datum(adb, &datum);
|
mas01cr@498
|
597 audiodb_free_datum(&datum);
|
mas01cr@498
|
598
|
mas01cr@498
|
599 if(err == 2) {
|
mas01cr@498
|
600 return 0;
|
mas01cr@498
|
601 } else {
|
mas01cr@498
|
602 return err;
|
mas01cr@239
|
603 }
|
mas01cr@239
|
604 }
|
mas01cr@239
|
605 }
|
mas01cr@239
|
606
|
mas01cr@498
|
607 int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
|
mas01cr@498
|
608 int err;
|
mas01cr@498
|
609 for(unsigned int n = 0; n < size; n++) {
|
mas01cr@498
|
610 if((err = audiodb_insert(adb, &(insert[n])))) {
|
mas01cr@498
|
611 return err;
|
mas01cr@498
|
612 }
|
mas01mc@324
|
613 }
|
mas01cr@498
|
614 return 0;
|
mas01cr@239
|
615 }
|