mas01cr@243
|
1 #include "audioDB.h"
|
mas01cr@243
|
2
|
mas01cr@277
|
3 bool audioDB::enough_per_file_space_free() {
|
mas01cr@277
|
4 unsigned int fmaxfiles, tmaxfiles;
|
mas01cr@277
|
5 unsigned int maxfiles;
|
mas01cr@277
|
6
|
mas01cr@277
|
7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
|
mas01cr@277
|
8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
|
mas01cr@277
|
9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
|
mas01cr@277
|
10 return(dbH->numFiles < maxfiles);
|
mas01cr@277
|
11 }
|
mas01cr@277
|
12
|
mas01cr@243
|
13 bool audioDB::enough_data_space_free(off_t size) {
|
mas01cr@243
|
14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
|
mas01cr@243
|
15 }
|
mas01cr@243
|
16
|
mas01cr@243
|
17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
|
mas01cr@243
|
18 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
|
mas01cr@243
|
19 write(dbfid, buffer, size);
|
mas01cr@243
|
20 }
|
mas01cr@243
|
21
|
mas01cr@243
|
22 void audioDB::insert(const char* dbName, const char* inFile) {
|
mas01cr@243
|
23 forWrite = true;
|
mas01cr@243
|
24 initTables(dbName, inFile);
|
mas01cr@243
|
25
|
mas01cr@243
|
26 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@243
|
27 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@243
|
28
|
mas01cr@243
|
29 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@243
|
30 error("Must use power with power-enabled database", dbName);
|
mas01cr@243
|
31
|
mas01cr@277
|
32 if(!enough_per_file_space_free()) {
|
mas01cr@277
|
33 error("Insert failed: no more room for metadata", inFile);
|
mas01cr@277
|
34 }
|
mas01cr@277
|
35
|
mas01cr@243
|
36 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@243
|
37 error("Insert failed: no more room in database", inFile);
|
mas01cr@243
|
38 }
|
mas01cr@243
|
39
|
mas01cr@243
|
40 if(!key)
|
mas01cr@243
|
41 key=inFile;
|
mas01cr@243
|
42 // Linear scan of filenames check for pre-existing feature
|
mas01cr@243
|
43 unsigned alreadyInserted=0;
|
mas01cr@243
|
44 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@277
|
45 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
|
mas01cr@243
|
46 alreadyInserted=1;
|
mas01cr@243
|
47 break;
|
mas01cr@243
|
48 }
|
mas01cr@243
|
49
|
mas01cr@243
|
50 if(alreadyInserted) {
|
mas01cr@243
|
51 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
|
mas01cr@243
|
52 return;
|
mas01cr@243
|
53 }
|
mas01cr@243
|
54
|
mas01cr@243
|
55 // Make a track index table of features to file indexes
|
mas01cr@243
|
56 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@243
|
57 if(!numVectors) {
|
mas01cr@243
|
58 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
|
mas01cr@243
|
59
|
mas01cr@243
|
60 // CLEAN UP
|
mas01cr@243
|
61 munmap(indata,statbuf.st_size);
|
mas01cr@243
|
62 munmap(db,dbH->dbSize);
|
mas01cr@243
|
63 close(infid);
|
mas01cr@243
|
64 return;
|
mas01cr@243
|
65 }
|
mas01cr@243
|
66
|
mas01cr@277
|
67 strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, key, strlen(key));
|
mas01cr@243
|
68
|
mas01cr@243
|
69 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@243
|
70
|
mas01cr@243
|
71 // Check times status and insert times from file
|
mas01cr@243
|
72 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
|
mas01cr@243
|
73 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@243
|
74
|
mas01cr@243
|
75 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@243
|
76 error("out of space for times", key);
|
mas01cr@243
|
77 }
|
mas01cr@243
|
78
|
mas01cr@243
|
79 if (usingTimes) {
|
mas01cr@243
|
80 insertTimeStamps(numVectors, timesFile, timesdata);
|
mas01cr@243
|
81 }
|
mas01cr@243
|
82
|
mas01cr@243
|
83 double *powerdata = powerTable + indexoffset;
|
mas01cr@243
|
84 insertPowerData(numVectors, powerfd, powerdata);
|
mas01cr@243
|
85
|
mas01cr@243
|
86 // Increment file count
|
mas01cr@243
|
87 dbH->numFiles++;
|
mas01cr@243
|
88
|
mas01cr@243
|
89 // Update Header information
|
mas01cr@243
|
90 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@243
|
91
|
mas01cr@243
|
92 // Update track to file index map
|
mas01cr@243
|
93 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
|
mas01cr@243
|
94
|
mas01cr@243
|
95 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@243
|
96
|
mas01cr@243
|
97 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@243
|
98 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@243
|
99 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@243
|
100
|
mas01cr@243
|
101 // Report status
|
mas01cr@243
|
102 status(dbName);
|
mas01cr@243
|
103 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
|
mas01cr@243
|
104
|
mas01cr@243
|
105 // Copy the header back to the database
|
mas01cr@243
|
106 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@243
|
107
|
mas01cr@243
|
108 // CLEAN UP
|
mas01cr@243
|
109 munmap(indata,statbuf.st_size);
|
mas01cr@243
|
110 close(infid);
|
mas01cr@243
|
111 }
|
mas01cr@243
|
112
|
mas01cr@243
|
113 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
|
mas01cr@243
|
114 assert(usingTimes);
|
mas01cr@243
|
115
|
mas01cr@243
|
116 unsigned numtimes = 0;
|
mas01cr@243
|
117
|
mas01cr@243
|
118 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
|
mas01cr@243
|
119 dbH->flags=dbH->flags|O2_FLAG_TIMES;
|
mas01cr@243
|
120 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
|
mas01cr@243
|
121 error("Timestamp file used with non-timestamped database", timesFileName);
|
mas01cr@243
|
122 }
|
mas01cr@243
|
123
|
mas01cr@243
|
124 if(!timesFile->is_open()) {
|
mas01cr@243
|
125 error("problem opening times file on timestamped database", timesFileName);
|
mas01cr@243
|
126 }
|
mas01cr@243
|
127
|
mas01cr@243
|
128 double timepoint, next;
|
mas01cr@243
|
129 *timesFile >> timepoint;
|
mas01cr@243
|
130 if (timesFile->eof()) {
|
mas01cr@243
|
131 error("no entries in times file", timesFileName);
|
mas01cr@243
|
132 }
|
mas01cr@243
|
133 numtimes++;
|
mas01cr@243
|
134 do {
|
mas01cr@243
|
135 *timesFile >> next;
|
mas01cr@243
|
136 if (timesFile->eof()) {
|
mas01cr@243
|
137 break;
|
mas01cr@243
|
138 }
|
mas01cr@243
|
139 numtimes++;
|
mas01cr@243
|
140 timesdata[0] = timepoint;
|
mas01cr@243
|
141 timepoint = (timesdata[1] = next);
|
mas01cr@243
|
142 timesdata += 2;
|
mas01cr@243
|
143 } while (numtimes < numVectors + 1);
|
mas01cr@243
|
144
|
mas01cr@243
|
145 if (numtimes < numVectors + 1) {
|
mas01cr@243
|
146 error("too few timepoints in times file", timesFileName);
|
mas01cr@243
|
147 }
|
mas01cr@243
|
148
|
mas01cr@243
|
149 *timesFile >> next;
|
mas01cr@243
|
150 if (!timesFile->eof()) {
|
mas01cr@243
|
151 error("too many timepoints in times file", timesFileName);
|
mas01cr@243
|
152 }
|
mas01cr@243
|
153 }
|
mas01cr@243
|
154
|
mas01cr@243
|
155 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
|
mas01cr@243
|
156 if (usingPower) {
|
mas01cr@243
|
157 if (!(dbH->flags & O2_FLAG_POWER)) {
|
mas01cr@243
|
158 error("Cannot insert power data on non-power DB", dbName);
|
mas01cr@243
|
159 }
|
mas01cr@243
|
160
|
mas01cr@243
|
161 int one;
|
mas01cr@243
|
162 unsigned int count;
|
mas01cr@243
|
163
|
mas01cr@243
|
164 count = read(powerfd, &one, sizeof(unsigned int));
|
mas01cr@243
|
165 if (count != sizeof(unsigned int)) {
|
mas01cr@243
|
166 error("powerfd read failed", "int", "read");
|
mas01cr@243
|
167 }
|
mas01cr@243
|
168 if (one != 1) {
|
mas01cr@243
|
169 error("dimensionality of power file not 1", powerFileName);
|
mas01cr@243
|
170 }
|
mas01cr@243
|
171
|
mas01cr@243
|
172 // FIXME: should check that the powerfile is the right size for
|
mas01cr@243
|
173 // this. -- CSR, 2007-10-30
|
mas01cr@243
|
174 count = read(powerfd, powerdata, numVectors * sizeof(double));
|
mas01cr@243
|
175 if (count != numVectors * sizeof(double)) {
|
mas01cr@243
|
176 error("powerfd read failed", "double", "read");
|
mas01cr@243
|
177 }
|
mas01cr@243
|
178 }
|
mas01cr@243
|
179 }
|
mas01cr@243
|
180
|
mas01cr@243
|
181 void audioDB::batchinsert(const char* dbName, const char* inFile) {
|
mas01cr@243
|
182
|
mas01cr@243
|
183 forWrite = true;
|
mas01cr@243
|
184 initDBHeader(dbName);
|
mas01cr@243
|
185
|
mas01cr@243
|
186 if(!key)
|
mas01cr@243
|
187 key=inFile;
|
mas01cr@243
|
188 std::ifstream *filesIn = 0;
|
mas01cr@243
|
189 std::ifstream *keysIn = 0;
|
mas01cr@243
|
190 std::ifstream* thisTimesFile = 0;
|
mas01cr@243
|
191 int thispowerfd = 0;
|
mas01cr@243
|
192
|
mas01cr@243
|
193 if(!(filesIn = new std::ifstream(inFile)))
|
mas01cr@243
|
194 error("Could not open batch in file", inFile);
|
mas01cr@243
|
195 if(key && key!=inFile)
|
mas01cr@243
|
196 if(!(keysIn = new std::ifstream(key)))
|
mas01cr@243
|
197 error("Could not open batch key file",key);
|
mas01cr@243
|
198
|
mas01cr@243
|
199 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@243
|
200 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@243
|
201
|
mas01cr@243
|
202 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@243
|
203 error("Must use power with power-enabled database", dbName);
|
mas01cr@243
|
204
|
mas01cr@243
|
205 unsigned totalVectors=0;
|
mas01cr@243
|
206 char *thisFile = new char[MAXSTR];
|
mas01cr@277
|
207 char *thisKey = 0;
|
mas01cr@277
|
208 if (key && (key != inFile)) {
|
mas01cr@277
|
209 thisKey = new char[MAXSTR];
|
mas01cr@277
|
210 }
|
mas01cr@243
|
211 char *thisTimesFileName = new char[MAXSTR];
|
mas01cr@243
|
212 char *thisPowerFileName = new char[MAXSTR];
|
mas01cr@243
|
213
|
mas01cr@243
|
214 do{
|
mas01cr@243
|
215 filesIn->getline(thisFile,MAXSTR);
|
mas01cr@277
|
216 if(key && key!=inFile) {
|
mas01cr@243
|
217 keysIn->getline(thisKey,MAXSTR);
|
mas01cr@277
|
218 } else {
|
mas01cr@243
|
219 thisKey = thisFile;
|
mas01cr@277
|
220 }
|
mas01cr@277
|
221 if(usingTimes) {
|
mas01cr@277
|
222 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01cr@277
|
223 }
|
mas01cr@277
|
224 if(usingPower) {
|
mas01cr@243
|
225 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01cr@277
|
226 }
|
mas01cr@243
|
227
|
mas01cr@277
|
228 if(filesIn->eof()) {
|
mas01cr@243
|
229 break;
|
mas01cr@277
|
230 }
|
mas01cr@277
|
231 initInputFile(thisFile);
|
mas01cr@243
|
232
|
mas01cr@277
|
233 if(!enough_per_file_space_free()) {
|
mas01cr@277
|
234 error("batchinsert failed: no more room for metadata", thisFile);
|
mas01cr@277
|
235 }
|
mas01cr@243
|
236
|
mas01cr@243
|
237 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@243
|
238 error("batchinsert failed: no more room in database", thisFile);
|
mas01cr@243
|
239 }
|
mas01cr@243
|
240
|
mas01cr@243
|
241 // Linear scan of filenames check for pre-existing feature
|
mas01cr@243
|
242 unsigned alreadyInserted=0;
|
mas01cr@243
|
243
|
mas01cr@243
|
244 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@277
|
245 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey)+1)==0){
|
mas01cr@243
|
246 alreadyInserted=1;
|
mas01cr@243
|
247 break;
|
mas01cr@243
|
248 }
|
mas01cr@243
|
249
|
mas01cr@243
|
250 if(alreadyInserted) {
|
mas01cr@243
|
251 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
|
mas01cr@243
|
252 } else {
|
mas01cr@243
|
253 // Make a track index table of features to file indexes
|
mas01cr@243
|
254 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@243
|
255 if(!numVectors) {
|
mas01cr@243
|
256 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
|
mas01cr@243
|
257 }
|
mas01cr@243
|
258 else{
|
mas01cr@243
|
259 if(usingTimes){
|
mas01cr@243
|
260 if(timesFile->eof()) {
|
mas01cr@243
|
261 error("not enough timestamp files in timesList", timesFileName);
|
mas01cr@243
|
262 }
|
mas01cr@243
|
263 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01cr@243
|
264 if(!thisTimesFile->is_open()) {
|
mas01cr@243
|
265 error("Cannot open timestamp file", thisTimesFileName);
|
mas01cr@243
|
266 }
|
mas01cr@243
|
267 off_t insertoffset = dbH->length;
|
mas01cr@243
|
268 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
|
mas01cr@243
|
269 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@243
|
270 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@243
|
271 error("out of space for times", key);
|
mas01cr@243
|
272 }
|
mas01cr@243
|
273 insertTimeStamps(numVectors, thisTimesFile, timesdata);
|
mas01cr@243
|
274 if(thisTimesFile)
|
mas01cr@243
|
275 delete thisTimesFile;
|
mas01cr@243
|
276 }
|
mas01cr@243
|
277
|
mas01cr@243
|
278 if (usingPower) {
|
mas01cr@243
|
279 if(powerFile->eof()) {
|
mas01cr@243
|
280 error("not enough power files in powerList", powerFileName);
|
mas01cr@243
|
281 }
|
mas01cr@243
|
282 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01cr@243
|
283 if (thispowerfd < 0) {
|
mas01cr@243
|
284 error("failed to open power file", thisPowerFileName);
|
mas01cr@243
|
285 }
|
mas01cr@243
|
286 off_t insertoffset = dbH->length;
|
mas01cr@243
|
287 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
|
mas01cr@243
|
288 double *powerdata = powerTable + poweroffset;
|
mas01cr@243
|
289 insertPowerData(numVectors, thispowerfd, powerdata);
|
mas01cr@243
|
290 if (0 < thispowerfd) {
|
mas01cr@243
|
291 close(thispowerfd);
|
mas01cr@243
|
292 }
|
mas01cr@243
|
293 }
|
mas01cr@277
|
294 strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey));
|
mas01cr@243
|
295
|
mas01cr@243
|
296 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@243
|
297
|
mas01cr@243
|
298 // Increment file count
|
mas01cr@243
|
299 dbH->numFiles++;
|
mas01cr@243
|
300
|
mas01cr@243
|
301 // Update Header information
|
mas01cr@243
|
302 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@243
|
303
|
mas01cr@243
|
304 // Update track to file index map
|
mas01cr@243
|
305 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01cr@243
|
306
|
mas01cr@243
|
307 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@243
|
308
|
mas01cr@243
|
309 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@243
|
310 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@243
|
311 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@243
|
312
|
mas01cr@243
|
313 totalVectors+=numVectors;
|
mas01cr@243
|
314
|
mas01cr@243
|
315 // Copy the header back to the database
|
mas01cr@243
|
316 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@243
|
317 }
|
mas01cr@243
|
318 }
|
mas01cr@243
|
319 // CLEAN UP
|
mas01cr@243
|
320 munmap(indata,statbuf.st_size);
|
mas01cr@243
|
321 close(infid);
|
mas01cr@243
|
322 } while(!filesIn->eof());
|
mas01cr@243
|
323
|
mas01cr@243
|
324 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01cr@277
|
325
|
mas01cr@277
|
326 delete [] thisPowerFileName;
|
mas01cr@277
|
327 if(key && (key != inFile)) {
|
mas01cr@277
|
328 delete [] thisKey;
|
mas01cr@277
|
329 }
|
mas01cr@277
|
330 delete [] thisFile;
|
mas01cr@277
|
331 delete [] thisTimesFileName;
|
mas01cr@243
|
332
|
mas01cr@277
|
333 delete filesIn;
|
mas01cr@277
|
334 delete keysIn;
|
mas01cr@277
|
335
|
mas01cr@243
|
336 // Report status
|
mas01cr@243
|
337 status(dbName);
|
mas01cr@243
|
338 }
|