mas01cr@239
|
1 #include "audioDB.h"
|
mas01cr@404
|
2 extern "C" {
|
mas01cr@404
|
3 #include "audioDB_API.h"
|
mas01cr@404
|
4 }
|
mas01cr@404
|
5 #include "audioDB-internals.h"
|
mas01cr@404
|
6
|
mas01cr@404
|
7 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
|
mas01cr@404
|
8 adb_header_t *header = adb->header;
|
mas01cr@404
|
9 /* FIXME: timesTableOffset isn't necessarily the next biggest offset
|
mas01cr@404
|
10 after dataOffset. Maybe make the offsets into an array that we
|
mas01cr@404
|
11 can iterate over... */
|
mas01cr@404
|
12 return (header->timesTableOffset >
|
mas01cr@404
|
13 header->dataOffset + header->length + size);
|
mas01cr@404
|
14 }
|
mas01cr@404
|
15
|
mas01cr@404
|
16 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
|
mas01cr@404
|
17 /* FIXME: the comment above about the ordering of the tables applies
|
mas01cr@404
|
18 here too. */
|
mas01cr@404
|
19 adb_header_t *header = adb->header;
|
mas01cr@404
|
20 off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
|
mas01cr@404
|
21 off_t track_table_length = header->dataOffset - header->trackTableOffset;
|
mas01cr@404
|
22 int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE;
|
mas01cr@404
|
23 int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE;
|
mas01cr@404
|
24 /* maxfiles is the _minimum_ of the two. Do not be confused... */
|
mas01cr@404
|
25 unsigned int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
|
mas01cr@404
|
26 return (header->numFiles < maxfiles);
|
mas01cr@404
|
27 }
|
mas01cr@404
|
28
|
mas01cr@404
|
29 /*
|
mas01cr@404
|
30 * Hey, look, a comment. Normally I wouldn't bother, as the code
|
mas01cr@404
|
31 * should be self-documenting, but a lot of logic is concentrated in
|
mas01cr@404
|
32 * this one place, so let's give an overview beforehand. To insert a
|
mas01cr@404
|
33 * datum into the database, we:
|
mas01cr@404
|
34 *
|
mas01cr@404
|
35 * 1. check write permission;
|
mas01cr@404
|
36 * 2. check !O2_FLAG_LARGE_ADB;
|
mas01cr@404
|
37 * 3. check for enough space;
|
mas01cr@404
|
38 * 4. check that datum->dim and adb->header->dim agree (or that the
|
mas01cr@404
|
39 * header dimension is zero, in which case write datum->dim to
|
mas01cr@404
|
40 * adb->header->dim).
|
mas01cr@404
|
41 * 5. check for presence of datum->key in adb->keys;
|
mas01cr@404
|
42 * 6. check for consistency between power and O2_FLAG_POWER, and
|
mas01cr@404
|
43 * times and O2_FLAG_TIMES;
|
mas01cr@404
|
44 * 7. write in data, power, times as appropriate; add to track
|
mas01cr@404
|
45 * and key tables too;
|
mas01cr@404
|
46 * 8. if O2_FLAG_L2NORM, compute norms and fill in table;
|
mas01cr@404
|
47 * 9. update adb->keys and adb->header;
|
mas01cr@404
|
48 * 10. sync adb->header with disk.
|
mas01cr@404
|
49 *
|
mas01cr@404
|
50 * Step 10 essentially commits the transaction; until we update
|
mas01cr@404
|
51 * header->length, nothing will recognize the newly-written data.
|
mas01cr@404
|
52 * In principle, if it fails, we should roll back, which we can in
|
mas01cr@404
|
53 * fact do on the assumption that nothing in step 9 can ever fail;
|
mas01cr@404
|
54 * on the other hand, if it's failed, then it's unlikely that
|
mas01cr@404
|
55 * rolling back by syncing the original header back to disk is going
|
mas01cr@404
|
56 * to work desperately well.
|
mas01cr@404
|
57 */
|
mas01cr@404
|
58 int audiodb_insert_datum(adb_t *adb, adb_datum_t *datum) {
|
mas01cr@404
|
59
|
mas01cr@404
|
60 off_t size, offset, nfiles;
|
mas01cr@404
|
61 double *l2norm_buffer, *lp, *dp;
|
mas01cr@404
|
62
|
mas01cr@404
|
63 /* 1. check write permission; */
|
mas01cr@404
|
64 if(!(adb->flags & O_RDWR)) {
|
mas01cr@404
|
65 return 1;
|
mas01cr@404
|
66 }
|
mas01cr@404
|
67 /* 2. check !O2_FLAG_LARGE_ADB; */
|
mas01cr@404
|
68 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
|
mas01cr@404
|
69 return 1;
|
mas01cr@404
|
70 }
|
mas01cr@404
|
71 /* 3. check for enough space; */
|
mas01cr@404
|
72 size = sizeof(double) * datum->nvectors * datum->dim;
|
mas01cr@404
|
73 if(!audiodb_enough_data_space_free(adb, size)) {
|
mas01cr@404
|
74 return 1;
|
mas01cr@404
|
75 }
|
mas01cr@404
|
76 if(!audiodb_enough_per_file_space_free(adb)) {
|
mas01cr@404
|
77 return 1;
|
mas01cr@404
|
78 }
|
mas01cr@404
|
79 /* 4. check that datum->dim and adb->header->dim agree (or that the
|
mas01cr@404
|
80 * header dimension is zero, in which case write datum->dim to
|
mas01cr@404
|
81 * adb->header->dim).
|
mas01cr@404
|
82 */
|
mas01cr@404
|
83 if(adb->header->dim == 0) {
|
mas01cr@404
|
84 adb->header->dim = datum->dim;
|
mas01cr@404
|
85 } else if (adb->header->dim != datum->dim) {
|
mas01cr@404
|
86 return 1;
|
mas01cr@404
|
87 }
|
mas01cr@404
|
88 /* 5. check for presence of datum->key in adb->keys; */
|
mas01cr@404
|
89 if(adb->keys->count(datum->key)) {
|
mas01cr@404
|
90 /* not part of an explicit API/ABI, but we need a distinguished
|
mas01cr@404
|
91 value in this circumstance to preserve somewhat wonky behaviour
|
mas01cr@404
|
92 of audioDB::batchinsert. */
|
mas01cr@404
|
93 return 2;
|
mas01cr@404
|
94 }
|
mas01cr@404
|
95 /* 6. check for consistency between power and O2_FLAG_POWER, and
|
mas01cr@404
|
96 * times and O2_FLAG_TIMES;
|
mas01cr@404
|
97 */
|
mas01cr@404
|
98 if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) ||
|
mas01cr@404
|
99 ((adb->header->flags & O2_FLAG_POWER) && !datum->power)) {
|
mas01cr@404
|
100 return 1;
|
mas01cr@404
|
101 }
|
mas01cr@404
|
102 if(datum->times && !(adb->header->flags & O2_FLAG_TIMES)) {
|
mas01cr@404
|
103 if(adb->header->numFiles == 0) {
|
mas01cr@404
|
104 adb->header->flags |= O2_FLAG_TIMES;
|
mas01cr@404
|
105 } else {
|
mas01cr@404
|
106 return 1;
|
mas01cr@404
|
107 }
|
mas01cr@404
|
108 } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) {
|
mas01cr@404
|
109 return 1;
|
mas01cr@404
|
110 }
|
mas01cr@404
|
111 /* 7. write in data, power, times as appropriate; add to track
|
mas01cr@404
|
112 * and key tables too;
|
mas01cr@404
|
113 */
|
mas01cr@404
|
114 offset = adb->header->length;
|
mas01cr@404
|
115 nfiles = adb->header->numFiles;
|
mas01cr@404
|
116
|
mas01cr@404
|
117 /* FIXME: checking for all these lseek()s and write()s */
|
mas01cr@404
|
118 lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
|
mas01cr@404
|
119 write(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
|
mas01cr@404
|
120 if(datum->power) {
|
mas01cr@404
|
121 lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
|
mas01cr@404
|
122 write(adb->fd, datum->power, sizeof(double) * datum->nvectors);
|
mas01cr@404
|
123 }
|
mas01cr@404
|
124 if(datum->times) {
|
mas01cr@404
|
125 lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
|
mas01cr@404
|
126 write(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
|
mas01cr@404
|
127 }
|
mas01cr@404
|
128 lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
|
mas01cr@404
|
129 write(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE);
|
mas01cr@404
|
130 lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
|
mas01cr@404
|
131 write(adb->fd, datum->key, strlen(datum->key)+1);
|
mas01cr@404
|
132
|
mas01cr@404
|
133 /* 8. if O2_FLAG_L2NORM, compute norms and fill in table; */
|
mas01cr@404
|
134 l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
|
mas01cr@404
|
135
|
mas01cr@404
|
136 /* FIXME: shared code with audiodb_norm_existing() */
|
mas01cr@404
|
137 dp = datum->data;
|
mas01cr@404
|
138 lp = l2norm_buffer;
|
mas01cr@404
|
139 for(size_t i = 0; i < datum->nvectors; i++) {
|
mas01cr@404
|
140 *lp = 0;
|
mas01cr@404
|
141 for(unsigned int k = 0; k < datum->dim; k++) {
|
mas01cr@404
|
142 *lp += (*dp)*(*dp);
|
mas01cr@404
|
143 dp++;
|
mas01cr@404
|
144 }
|
mas01cr@404
|
145 lp++;
|
mas01cr@404
|
146 }
|
mas01cr@404
|
147 lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
|
mas01cr@404
|
148 write(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
|
mas01cr@404
|
149 free(l2norm_buffer);
|
mas01cr@404
|
150
|
mas01cr@404
|
151 adb->keys->insert(datum->key);
|
mas01cr@404
|
152 adb->header->numFiles += 1;
|
mas01cr@404
|
153 adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
|
mas01cr@404
|
154
|
mas01cr@404
|
155 return audiodb_sync_header(adb);
|
mas01cr@404
|
156
|
mas01cr@404
|
157 error:
|
mas01cr@404
|
158 return 1;
|
mas01cr@404
|
159 }
|
mas01cr@239
|
160
|
mas01cr@251
|
161 bool audioDB::enough_per_file_space_free() {
|
mas01cr@251
|
162 unsigned int fmaxfiles, tmaxfiles;
|
mas01cr@251
|
163 unsigned int maxfiles;
|
mas01cr@251
|
164
|
mas01cr@256
|
165 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
|
mas01cr@256
|
166 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
|
mas01cr@251
|
167 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
|
mas01cr@251
|
168 return(dbH->numFiles < maxfiles);
|
mas01cr@251
|
169 }
|
mas01cr@251
|
170
|
mas01cr@405
|
171 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
|
mas01cr@404
|
172 adb_datum_t datum;
|
mas01cr@405
|
173 int fd = 0;
|
mas01cr@405
|
174 FILE *file = NULL;
|
mas01cr@405
|
175 struct stat st;
|
mas01cr@404
|
176 off_t size;
|
mas01cr@404
|
177 int err;
|
mas01cr@405
|
178
|
mas01cr@405
|
179 datum.data = NULL;
|
mas01cr@405
|
180 datum.power = NULL;
|
mas01cr@405
|
181 datum.times = NULL;
|
mas01cr@405
|
182 if((fd = open(insert->features, O_RDONLY)) == -1) {
|
mas01cr@405
|
183 goto error;
|
mas01cr@370
|
184 }
|
mas01cr@405
|
185 if(fstat(fd, &st)) {
|
mas01cr@405
|
186 goto error;
|
mas01cr@404
|
187 }
|
mas01cr@404
|
188 read(fd, &(datum.dim), sizeof(uint32_t));
|
mas01cr@405
|
189 size = st.st_size - sizeof(uint32_t);
|
mas01cr@404
|
190 datum.nvectors = size / (sizeof(double) * datum.dim);
|
mas01cr@404
|
191 datum.data = (double *) malloc(size);
|
mas01cr@404
|
192 if(!datum.data) {
|
mas01cr@405
|
193 goto error;
|
mas01cr@404
|
194 }
|
mas01cr@404
|
195 read(fd, datum.data, size);
|
mas01cr@404
|
196 close(fd);
|
mas01cr@405
|
197 fd = 0;
|
mas01cr@405
|
198 if(insert->power) {
|
mas01cr@405
|
199 int dim;
|
mas01cr@405
|
200 if((fd = open(insert->power, O_RDONLY)) == -1) {
|
mas01cr@405
|
201 goto error;
|
mas01cr@405
|
202 }
|
mas01cr@405
|
203 if(fstat(fd, &st)) {
|
mas01cr@405
|
204 goto error;
|
mas01cr@405
|
205 }
|
mas01cr@405
|
206 if((st.st_size - sizeof(uint32_t)) != (size / datum.dim)) {
|
mas01cr@405
|
207 goto error;
|
mas01cr@405
|
208 }
|
mas01cr@405
|
209 read(fd, &dim, sizeof(uint32_t));
|
mas01cr@405
|
210 if(dim != 1) {
|
mas01cr@405
|
211 goto error;
|
mas01cr@405
|
212 }
|
mas01cr@405
|
213 datum.power = (double *) malloc(size / datum.dim);
|
mas01cr@405
|
214 if(!datum.power) {
|
mas01cr@405
|
215 goto error;
|
mas01cr@405
|
216 }
|
mas01cr@405
|
217 read(fd, datum.power, size / datum.dim);
|
mas01cr@405
|
218 close(fd);
|
mas01cr@405
|
219 }
|
mas01cr@405
|
220 if(insert->times) {
|
mas01cr@405
|
221 double t, *tp;
|
mas01cr@405
|
222 if(!(file = fopen(insert->times, "r"))) {
|
mas01cr@405
|
223 goto error;
|
mas01cr@405
|
224 }
|
mas01cr@405
|
225 datum.times = (double *) malloc(2 * size / datum.dim);
|
mas01cr@404
|
226 if(!datum.times) {
|
mas01cr@405
|
227 goto error;
|
mas01cr@404
|
228 }
|
mas01cr@405
|
229 if(fscanf(file, " %lf", &t) != 1) {
|
mas01cr@405
|
230 goto error;
|
mas01cr@405
|
231 }
|
mas01cr@405
|
232 tp = datum.times;
|
mas01cr@405
|
233 *tp++ = t;
|
mas01cr@405
|
234 for(unsigned int n = 0; n < datum.nvectors - 1; n++) {
|
mas01cr@405
|
235 if(fscanf(file, " %lf", &t) != 1) {
|
mas01cr@405
|
236 goto error;
|
mas01cr@405
|
237 }
|
mas01cr@405
|
238 *tp++ = t;
|
mas01cr@405
|
239 *tp++ = t;
|
mas01cr@405
|
240 }
|
mas01cr@405
|
241 if(fscanf(file, " %lf", &t) != 1) {
|
mas01cr@405
|
242 goto error;
|
mas01cr@405
|
243 }
|
mas01cr@405
|
244 *tp = t;
|
mas01cr@405
|
245 fclose(file);
|
mas01cr@404
|
246 }
|
mas01cr@405
|
247 datum.key = insert->key ? insert->key : insert->features;
|
mas01cr@405
|
248 err = audiodb_insert_datum(adb, &datum);
|
mas01cr@405
|
249 free(datum.data);
|
mas01cr@405
|
250 if(datum.power) {
|
mas01cr@405
|
251 free(datum.power);
|
mas01cr@405
|
252 }
|
mas01cr@405
|
253 if(datum.times) {
|
mas01cr@405
|
254 free(datum.times);
|
mas01cr@405
|
255 }
|
mas01cr@405
|
256 if(err == 2) {
|
mas01cr@405
|
257 return 0;
|
mas01cr@405
|
258 }
|
mas01cr@405
|
259 else {
|
mas01cr@405
|
260 return err;
|
mas01cr@405
|
261 }
|
mas01cr@405
|
262
|
mas01cr@405
|
263 error:
|
mas01cr@405
|
264 if(fd > 0) {
|
mas01cr@405
|
265 close(fd);
|
mas01cr@405
|
266 }
|
mas01cr@405
|
267 if(file) {
|
mas01cr@405
|
268 fclose(file);
|
mas01cr@405
|
269 }
|
mas01cr@405
|
270 if(datum.data) {
|
mas01cr@405
|
271 free(datum.data);
|
mas01cr@405
|
272 }
|
mas01cr@405
|
273 if(datum.power) {
|
mas01cr@405
|
274 free(datum.power);
|
mas01cr@405
|
275 }
|
mas01cr@405
|
276 if(datum.times) {
|
mas01cr@405
|
277 free(datum.times);
|
mas01cr@405
|
278 }
|
mas01cr@405
|
279 return 1;
|
mas01cr@405
|
280 }
|
mas01cr@405
|
281
|
mas01cr@405
|
282 int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
|
mas01cr@405
|
283 int err;
|
mas01cr@405
|
284 for(unsigned int n = 0; n < size; n++) {
|
mas01cr@405
|
285 if((err = audiodb_insert(adb, &(insert[n])))) {
|
mas01cr@405
|
286 return err;
|
mas01cr@404
|
287 }
|
mas01cr@404
|
288 }
|
mas01cr@405
|
289 return 0;
|
mas01cr@239
|
290 }
|
mas01cr@239
|
291
|
mas01cr@239
|
292 void audioDB::insert(const char* dbName, const char* inFile) {
|
mas01cr@404
|
293 if(!adb) {
|
mas01cr@404
|
294 if(!(adb = audiodb_open(dbName, O_RDWR))) {
|
mas01cr@404
|
295 error("failed to open database", dbName);
|
mas01cr@404
|
296 }
|
mas01cr@251
|
297 }
|
mas01cr@404
|
298 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
|
mas01cr@404
|
299
|
mas01cr@404
|
300 } else {
|
mas01cr@404
|
301 /* at this point, we have powerfd (an fd), timesFile (a
|
mas01cr@405
|
302 * std::ifstream *) and inFile (a char *). Wacky, huh? Ignore
|
mas01cr@405
|
303 * the wackiness and just use the names. */
|
mas01cr@405
|
304 adb_insert_t insert;
|
mas01cr@405
|
305 insert.features = inFile;
|
mas01cr@405
|
306 insert.times = timesFileName;
|
mas01cr@405
|
307 insert.power = powerFileName;
|
mas01cr@405
|
308 insert.key = key;
|
mas01cr@405
|
309 if(audiodb_insert(adb, &insert)) {
|
mas01cr@405
|
310 error("insertion failure", inFile);
|
mas01cr@405
|
311 }
|
mas01cr@239
|
312 }
|
mas01cr@239
|
313 status(dbName);
|
mas01cr@239
|
314 }
|
mas01cr@239
|
315
|
mas01cr@239
|
316 void audioDB::batchinsert(const char* dbName, const char* inFile) {
|
mas01cr@239
|
317 forWrite = true;
|
mas01cr@239
|
318 initDBHeader(dbName);
|
mas01cr@239
|
319
|
mas01mc@324
|
320 // Treat large ADB instances differently
|
mas01mc@324
|
321 if( dbH->flags & O2_FLAG_LARGE_ADB ){
|
mas01mc@324
|
322 batchinsert_large_adb(dbName, inFile) ;
|
mas01mc@324
|
323 return;
|
mas01mc@324
|
324 }
|
mas01mc@324
|
325
|
mas01cr@239
|
326 if(!key)
|
mas01cr@239
|
327 key=inFile;
|
mas01cr@239
|
328 std::ifstream *filesIn = 0;
|
mas01cr@239
|
329 std::ifstream *keysIn = 0;
|
mas01cr@239
|
330
|
mas01cr@239
|
331 if(!(filesIn = new std::ifstream(inFile)))
|
mas01cr@239
|
332 error("Could not open batch in file", inFile);
|
mas01cr@239
|
333 if(key && key!=inFile)
|
mas01cr@239
|
334 if(!(keysIn = new std::ifstream(key)))
|
mas01cr@239
|
335 error("Could not open batch key file",key);
|
mas01cr@239
|
336
|
mas01cr@239
|
337 unsigned totalVectors=0;
|
mas01cr@239
|
338 char *thisFile = new char[MAXSTR];
|
mas01cr@262
|
339 char *thisKey = 0;
|
mas01cr@262
|
340 if (key && (key != inFile)) {
|
mas01cr@262
|
341 thisKey = new char[MAXSTR];
|
mas01cr@262
|
342 }
|
mas01cr@239
|
343 char *thisTimesFileName = new char[MAXSTR];
|
mas01cr@239
|
344 char *thisPowerFileName = new char[MAXSTR];
|
mas01cr@302
|
345
|
mas01cr@302
|
346 do {
|
mas01cr@239
|
347 filesIn->getline(thisFile,MAXSTR);
|
mas01cr@262
|
348 if(key && key!=inFile) {
|
mas01cr@239
|
349 keysIn->getline(thisKey,MAXSTR);
|
mas01cr@262
|
350 } else {
|
mas01cr@239
|
351 thisKey = thisFile;
|
mas01cr@262
|
352 }
|
mas01cr@262
|
353 if(usingTimes) {
|
mas01cr@262
|
354 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01cr@262
|
355 }
|
mas01cr@262
|
356 if(usingPower) {
|
mas01cr@239
|
357 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01cr@262
|
358 }
|
mas01cr@239
|
359
|
mas01cr@262
|
360 if(filesIn->eof()) {
|
mas01cr@239
|
361 break;
|
mas01cr@262
|
362 }
|
mas01cr@404
|
363 if(usingTimes){
|
mas01cr@404
|
364 if(timesFile->eof()) {
|
mas01cr@404
|
365 error("not enough timestamp files in timesList", timesFileName);
|
mas01cr@404
|
366 }
|
mas01cr@251
|
367 }
|
mas01cr@404
|
368 if (usingPower) {
|
mas01cr@404
|
369 if(powerFile->eof()) {
|
mas01cr@404
|
370 error("not enough power files in powerList", powerFileName);
|
mas01cr@239
|
371 }
|
mas01cr@239
|
372 }
|
mas01cr@405
|
373 adb_insert_t insert;
|
mas01cr@405
|
374 insert.features = thisFile;
|
mas01cr@405
|
375 insert.times = usingTimes ? thisTimesFileName : NULL;
|
mas01cr@405
|
376 insert.power = usingPower ? thisPowerFileName : NULL;
|
mas01cr@405
|
377 insert.key = thisKey;
|
mas01cr@405
|
378 if(audiodb_insert(adb, &insert)) {
|
mas01cr@405
|
379 error("insertion failure", thisFile);
|
mas01cr@405
|
380 }
|
mas01cr@239
|
381 } while(!filesIn->eof());
|
mas01cr@239
|
382
|
mas01cr@239
|
383 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01cr@262
|
384
|
mas01cr@262
|
385 delete [] thisPowerFileName;
|
mas01cr@262
|
386 if(key && (key != inFile)) {
|
mas01cr@262
|
387 delete [] thisKey;
|
mas01cr@262
|
388 }
|
mas01cr@262
|
389 delete [] thisFile;
|
mas01cr@262
|
390 delete [] thisTimesFileName;
|
mas01cr@239
|
391
|
mas01cr@262
|
392 delete filesIn;
|
mas01cr@262
|
393 delete keysIn;
|
mas01cr@262
|
394
|
mas01cr@239
|
395 // Report status
|
mas01cr@239
|
396 status(dbName);
|
mas01cr@239
|
397 }
|
mas01mc@324
|
398
|
mas01mc@324
|
399
|
mas01mc@324
|
400 // BATCHINSERT_LARGE_ADB
|
mas01mc@324
|
401 //
|
mas01mc@324
|
402 // This method inserts file pointers into the ADB instance rather than the actual feature data
|
mas01mc@324
|
403 //
|
mas01mc@324
|
404 // This method is intended for databases that are large enough to only support indexed query
|
mas01mc@324
|
405 // So exhaustive searching across all feature vectors will not be performed
|
mas01mc@324
|
406 //
|
mas01mc@324
|
407 // We insert featureFileName, [powerFileName], [timesFileName]
|
mas01mc@324
|
408 //
|
mas01mc@324
|
409 // l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
|
mas01mc@324
|
410 //
|
mas01mc@324
|
411 // LIMITS:
|
mas01mc@324
|
412 //
|
mas01mc@324
|
413 // We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
|
mas01mc@324
|
414 //
|
mas01mc@324
|
415 void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
|
mas01mc@324
|
416
|
mas01mc@324
|
417 if(!key)
|
mas01mc@324
|
418 key=inFile;
|
mas01mc@324
|
419 std::ifstream *filesIn = 0;
|
mas01mc@324
|
420 std::ifstream *keysIn = 0;
|
mas01mc@324
|
421 std::ifstream* thisTimesFile = 0;
|
mas01mc@324
|
422 int thispowerfd = 0;
|
mas01mc@324
|
423
|
mas01mc@324
|
424 if(!(filesIn = new std::ifstream(inFile)))
|
mas01mc@324
|
425 error("Could not open batch in file", inFile);
|
mas01mc@324
|
426 if(key && key!=inFile)
|
mas01mc@324
|
427 if(!(keysIn = new std::ifstream(key)))
|
mas01mc@324
|
428 error("Could not open batch key file",key);
|
mas01mc@324
|
429
|
mas01mc@324
|
430 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01mc@324
|
431 error("Must use timestamps with timestamped database","use --times");
|
mas01mc@324
|
432
|
mas01mc@324
|
433 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01mc@324
|
434 error("Must use power with power-enabled database", dbName);
|
mas01mc@324
|
435
|
mas01cr@380
|
436 char *cwd = new char[PATH_MAX];
|
mas01cr@380
|
437
|
mas01cr@380
|
438 if ((getcwd(cwd, PATH_MAX)) == 0) {
|
mas01cr@380
|
439 error("error getting working directory", "", "getcwd");
|
mas01cr@380
|
440 }
|
mas01cr@380
|
441
|
mas01mc@324
|
442 unsigned totalVectors=0;
|
mas01mc@324
|
443 char *thisFile = new char[MAXSTR];
|
mas01mc@324
|
444 char *thisKey = 0;
|
mas01mc@324
|
445 if (key && (key != inFile)) {
|
mas01mc@324
|
446 thisKey = new char[MAXSTR];
|
mas01mc@324
|
447 }
|
mas01mc@324
|
448 char *thisTimesFileName = new char[MAXSTR];
|
mas01mc@324
|
449 char *thisPowerFileName = new char[MAXSTR];
|
mas01mc@324
|
450
|
mas01mc@324
|
451 std::set<std::string> s;
|
mas01mc@324
|
452
|
mas01mc@324
|
453 for (unsigned k = 0; k < dbH->numFiles; k++) {
|
mas01mc@324
|
454 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
|
mas01mc@324
|
455 }
|
mas01mc@324
|
456
|
mas01mc@324
|
457 do {
|
mas01mc@324
|
458 filesIn->getline(thisFile,MAXSTR);
|
mas01mc@324
|
459 if(key && key!=inFile) {
|
mas01mc@324
|
460 keysIn->getline(thisKey,MAXSTR);
|
mas01mc@324
|
461 } else {
|
mas01mc@324
|
462 thisKey = thisFile;
|
mas01mc@324
|
463 }
|
mas01mc@324
|
464 if(usingTimes) {
|
mas01mc@324
|
465 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01mc@324
|
466 }
|
mas01mc@324
|
467 if(usingPower) {
|
mas01mc@324
|
468 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01mc@324
|
469 }
|
mas01mc@324
|
470
|
mas01mc@324
|
471 if(filesIn->eof()) {
|
mas01mc@324
|
472 break;
|
mas01mc@324
|
473 }
|
mas01mc@324
|
474
|
mas01mc@324
|
475 initInputFile(thisFile, false);
|
mas01mc@324
|
476
|
mas01mc@324
|
477 if(!enough_per_file_space_free()) {
|
mas01mc@324
|
478 error("batchinsert failed: no more room for metadata", thisFile);
|
mas01mc@324
|
479 }
|
mas01mc@324
|
480
|
mas01mc@324
|
481 if(s.count(thisKey)) {
|
mas01mc@324
|
482 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
|
mas01mc@324
|
483 } else {
|
mas01mc@324
|
484 s.insert(thisKey);
|
mas01mc@324
|
485 // Make a track index table of features to file indexes
|
mas01mc@324
|
486 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01mc@324
|
487 if(!numVectors) {
|
mas01mc@324
|
488 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
|
mas01mc@324
|
489 }
|
mas01mc@324
|
490 else{
|
mas01mc@324
|
491 // Check that time-stamp file exists
|
mas01mc@324
|
492 if(usingTimes){
|
mas01mc@324
|
493 if(timesFile->eof()) {
|
mas01mc@324
|
494 error("not enough timestamp files in timesList", timesFileName);
|
mas01mc@324
|
495 }
|
mas01mc@324
|
496 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01mc@324
|
497 if(!thisTimesFile->is_open()) {
|
mas01mc@324
|
498 error("Cannot open timestamp file", thisTimesFileName);
|
mas01mc@324
|
499 }
|
mas01mc@324
|
500 if(thisTimesFile)
|
mas01mc@324
|
501 delete thisTimesFile;
|
mas01mc@324
|
502 }
|
mas01mc@324
|
503
|
mas01mc@324
|
504 // Check that power file exists
|
mas01mc@324
|
505 if (usingPower) {
|
mas01mc@324
|
506 if(powerFile->eof()) {
|
mas01mc@324
|
507 error("not enough power files in powerList", powerFileName);
|
mas01mc@324
|
508 }
|
mas01mc@324
|
509 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01mc@324
|
510 if (thispowerfd < 0) {
|
mas01mc@324
|
511 error("failed to open power file", thisPowerFileName);
|
mas01mc@324
|
512 }
|
mas01mc@324
|
513 if (0 < thispowerfd) {
|
mas01mc@324
|
514 close(thispowerfd);
|
mas01mc@324
|
515 }
|
mas01mc@324
|
516 }
|
mas01mc@324
|
517
|
mas01mc@324
|
518 // persist links to the feature files for reading from filesystem later
|
mas01mc@324
|
519
|
mas01mc@324
|
520 // Primary Keys
|
mas01mc@324
|
521 INSERT_FILETABLE_STRING(fileTable, thisKey);
|
mas01cr@380
|
522
|
mas01cr@380
|
523 if(*thisFile != '/') {
|
mas01cr@380
|
524 /* FIXME: MAXSTR and O2_FILETABLE_ENTRY_SIZE should probably
|
mas01cr@380
|
525 be the same thing. Also, both are related to PATH_MAX,
|
mas01cr@380
|
526 which admittedly is not always defined or a
|
mas01cr@380
|
527 constant... */
|
mas01cr@380
|
528 char tmp[MAXSTR];
|
mas01cr@380
|
529 strncpy(tmp, thisFile, MAXSTR);
|
mas01cr@380
|
530 snprintf(thisFile, MAXSTR, "%s/%s", cwd, tmp);
|
mas01cr@380
|
531 }
|
mas01mc@324
|
532 // Feature Vector fileNames
|
mas01mc@324
|
533 INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
|
mas01mc@324
|
534
|
mas01mc@324
|
535 // Time Stamp fileNames
|
mas01cr@380
|
536 if(usingTimes) {
|
mas01cr@380
|
537 if(*thisTimesFileName != '/') {
|
mas01cr@380
|
538 char tmp[MAXSTR];
|
mas01cr@380
|
539 strncpy(tmp, thisTimesFileName, MAXSTR);
|
mas01cr@380
|
540 snprintf(thisTimesFileName, MAXSTR, "%s/%s", cwd, tmp);
|
mas01cr@380
|
541 }
|
mas01mc@324
|
542 INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
|
mas01cr@380
|
543 }
|
mas01mc@324
|
544
|
mas01mc@324
|
545 // Power fileNames
|
mas01cr@380
|
546 if(usingPower) {
|
mas01cr@380
|
547 if(*thisPowerFileName != '/') {
|
mas01cr@380
|
548 char tmp[MAXSTR];
|
mas01cr@380
|
549 strncpy(tmp, thisPowerFileName, MAXSTR);
|
mas01cr@380
|
550 snprintf(thisPowerFileName, MAXSTR, "%s/%s", cwd, tmp);
|
mas01cr@380
|
551 }
|
mas01mc@324
|
552 INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
|
mas01cr@380
|
553 }
|
mas01mc@324
|
554
|
mas01mc@324
|
555 // Increment file count
|
mas01mc@324
|
556 dbH->numFiles++;
|
mas01mc@324
|
557
|
mas01mc@324
|
558 // Update Header information
|
mas01mc@324
|
559 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01mc@324
|
560
|
mas01mc@324
|
561 // Update track to file index map
|
mas01mc@324
|
562 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01mc@324
|
563
|
mas01mc@324
|
564 totalVectors+=numVectors;
|
mas01mc@324
|
565
|
mas01mc@324
|
566 // Copy the header back to the database
|
mas01mc@324
|
567 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01mc@324
|
568 }
|
mas01mc@324
|
569 }
|
mas01mc@324
|
570 // CLEAN UP
|
mas01mc@324
|
571 if(indata)
|
mas01mc@324
|
572 munmap(indata,statbuf.st_size);
|
mas01mc@324
|
573 if(infid>0)
|
mas01mc@324
|
574 close(infid);
|
mas01mc@324
|
575 } while(!filesIn->eof());
|
mas01mc@324
|
576
|
mas01mc@324
|
577 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01mc@324
|
578
|
mas01mc@324
|
579 delete [] thisPowerFileName;
|
mas01mc@324
|
580 if(key && (key != inFile)) {
|
mas01mc@324
|
581 delete [] thisKey;
|
mas01mc@324
|
582 }
|
mas01mc@324
|
583 delete [] thisFile;
|
mas01mc@324
|
584 delete [] thisTimesFileName;
|
mas01mc@324
|
585
|
mas01mc@324
|
586 delete filesIn;
|
mas01mc@324
|
587 delete keysIn;
|
mas01mc@324
|
588
|
mas01mc@324
|
589 // Report status
|
mas01mc@324
|
590 status(dbName);
|
mas01mc@324
|
591 }
|