Mercurial > hg > audiodb
comparison insert.cpp @ 239:2cc06e5b05a5
Merge refactoring branch.
Bug fixes:
* 64-bit powertable bug;
* -inf - -inf bug;
* use new times information;
* plus short track, O2_MAXFILES and structure padding ABI fixes (already
backported)
Major code changes:
* split source into functional units, known as 'files';
* Reporter class for accumulating and reporting on query results;
* much OAOOization, mostly from above: net 800 LOC (25%) shorter.
author | mas01cr |
---|---|
date | Thu, 13 Dec 2007 14:23:32 +0000 |
parents | |
children | a6c9a1c68646 abfb26e08d9c |
comparison
equal
deleted
inserted
replaced
224:3a81da6fb1d7 | 239:2cc06e5b05a5 |
---|---|
1 #include "audioDB.h" | |
2 | |
3 bool audioDB::enough_data_space_free(off_t size) { | |
4 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size); | |
5 } | |
6 | |
7 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) { | |
8 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET); | |
9 write(dbfid, buffer, size); | |
10 } | |
11 | |
12 void audioDB::insert(const char* dbName, const char* inFile) { | |
13 forWrite = true; | |
14 initTables(dbName, inFile); | |
15 | |
16 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
17 error("Must use timestamps with timestamped database","use --times"); | |
18 | |
19 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
20 error("Must use power with power-enabled database", dbName); | |
21 | |
22 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
23 error("Insert failed: no more room in database", inFile); | |
24 } | |
25 | |
26 if(!key) | |
27 key=inFile; | |
28 // Linear scan of filenames check for pre-existing feature | |
29 unsigned alreadyInserted=0; | |
30 for(unsigned k=0; k<dbH->numFiles; k++) | |
31 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){ | |
32 alreadyInserted=1; | |
33 break; | |
34 } | |
35 | |
36 if(alreadyInserted) { | |
37 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile); | |
38 return; | |
39 } | |
40 | |
41 // Make a track index table of features to file indexes | |
42 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
43 if(!numVectors) { | |
44 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key); | |
45 | |
46 // CLEAN UP | |
47 munmap(indata,statbuf.st_size); | |
48 munmap(db,dbH->dbSize); | |
49 close(infid); | |
50 return; | |
51 } | |
52 | |
53 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); | |
54 | |
55 off_t insertoffset = dbH->length;// Store current state | |
56 | |
57 // Check times status and insert times from file | |
58 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double)); | |
59 double *timesdata = timesTable + 2*indexoffset; | |
60 | |
61 if(2*(indexoffset + numVectors) > timesTableLength) { | |
62 error("out of space for times", key); | |
63 } | |
64 | |
65 if (usingTimes) { | |
66 insertTimeStamps(numVectors, timesFile, timesdata); | |
67 } | |
68 | |
69 double *powerdata = powerTable + indexoffset; | |
70 insertPowerData(numVectors, powerfd, powerdata); | |
71 | |
72 // Increment file count | |
73 dbH->numFiles++; | |
74 | |
75 // Update Header information | |
76 dbH->length+=(statbuf.st_size-sizeof(int)); | |
77 | |
78 // Update track to file index map | |
79 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned)); | |
80 | |
81 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
82 | |
83 // Norm the vectors on input if the database is already L2 normed | |
84 if(dbH->flags & O2_FLAG_L2NORM) | |
85 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
86 | |
87 // Report status | |
88 status(dbName); | |
89 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int))); | |
90 | |
91 // Copy the header back to the database | |
92 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
93 | |
94 // CLEAN UP | |
95 munmap(indata,statbuf.st_size); | |
96 close(infid); | |
97 } | |
98 | |
99 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) { | |
100 assert(usingTimes); | |
101 | |
102 unsigned numtimes = 0; | |
103 | |
104 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) { | |
105 dbH->flags=dbH->flags|O2_FLAG_TIMES; | |
106 } else if(!(dbH->flags & O2_FLAG_TIMES)) { | |
107 error("Timestamp file used with non-timestamped database", timesFileName); | |
108 } | |
109 | |
110 if(!timesFile->is_open()) { | |
111 error("problem opening times file on timestamped database", timesFileName); | |
112 } | |
113 | |
114 double timepoint, next; | |
115 *timesFile >> timepoint; | |
116 if (timesFile->eof()) { | |
117 error("no entries in times file", timesFileName); | |
118 } | |
119 numtimes++; | |
120 do { | |
121 *timesFile >> next; | |
122 if (timesFile->eof()) { | |
123 break; | |
124 } | |
125 numtimes++; | |
126 timesdata[0] = timepoint; | |
127 timepoint = (timesdata[1] = next); | |
128 timesdata += 2; | |
129 } while (numtimes < numVectors + 1); | |
130 | |
131 if (numtimes < numVectors + 1) { | |
132 error("too few timepoints in times file", timesFileName); | |
133 } | |
134 | |
135 *timesFile >> next; | |
136 if (!timesFile->eof()) { | |
137 error("too many timepoints in times file", timesFileName); | |
138 } | |
139 } | |
140 | |
141 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { | |
142 if (usingPower) { | |
143 if (!(dbH->flags & O2_FLAG_POWER)) { | |
144 error("Cannot insert power data on non-power DB", dbName); | |
145 } | |
146 | |
147 int one; | |
148 unsigned int count; | |
149 | |
150 count = read(powerfd, &one, sizeof(unsigned int)); | |
151 if (count != sizeof(unsigned int)) { | |
152 error("powerfd read failed", "int", "read"); | |
153 } | |
154 if (one != 1) { | |
155 error("dimensionality of power file not 1", powerFileName); | |
156 } | |
157 | |
158 // FIXME: should check that the powerfile is the right size for | |
159 // this. -- CSR, 2007-10-30 | |
160 count = read(powerfd, powerdata, numVectors * sizeof(double)); | |
161 if (count != numVectors * sizeof(double)) { | |
162 error("powerfd read failed", "double", "read"); | |
163 } | |
164 } | |
165 } | |
166 | |
167 void audioDB::batchinsert(const char* dbName, const char* inFile) { | |
168 | |
169 forWrite = true; | |
170 initDBHeader(dbName); | |
171 | |
172 if(!key) | |
173 key=inFile; | |
174 std::ifstream *filesIn = 0; | |
175 std::ifstream *keysIn = 0; | |
176 std::ifstream* thisTimesFile = 0; | |
177 int thispowerfd = 0; | |
178 | |
179 if(!(filesIn = new std::ifstream(inFile))) | |
180 error("Could not open batch in file", inFile); | |
181 if(key && key!=inFile) | |
182 if(!(keysIn = new std::ifstream(key))) | |
183 error("Could not open batch key file",key); | |
184 | |
185 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
186 error("Must use timestamps with timestamped database","use --times"); | |
187 | |
188 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
189 error("Must use power with power-enabled database", dbName); | |
190 | |
191 unsigned totalVectors=0; | |
192 char *thisKey = new char[MAXSTR]; | |
193 char *thisFile = new char[MAXSTR]; | |
194 char *thisTimesFileName = new char[MAXSTR]; | |
195 char *thisPowerFileName = new char[MAXSTR]; | |
196 | |
197 do{ | |
198 filesIn->getline(thisFile,MAXSTR); | |
199 if(key && key!=inFile) | |
200 keysIn->getline(thisKey,MAXSTR); | |
201 else | |
202 thisKey = thisFile; | |
203 if(usingTimes) | |
204 timesFile->getline(thisTimesFileName,MAXSTR); | |
205 if(usingPower) | |
206 powerFile->getline(thisPowerFileName, MAXSTR); | |
207 | |
208 if(filesIn->eof()) | |
209 break; | |
210 | |
211 initInputFile(thisFile); | |
212 | |
213 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
214 error("batchinsert failed: no more room in database", thisFile); | |
215 } | |
216 | |
217 // Linear scan of filenames check for pre-existing feature | |
218 unsigned alreadyInserted=0; | |
219 | |
220 for(unsigned k=0; k<dbH->numFiles; k++) | |
221 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){ | |
222 alreadyInserted=1; | |
223 break; | |
224 } | |
225 | |
226 if(alreadyInserted) { | |
227 VERB_LOG(0, "key already exists in database: %s\n", thisKey); | |
228 } else { | |
229 // Make a track index table of features to file indexes | |
230 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
231 if(!numVectors) { | |
232 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey); | |
233 } | |
234 else{ | |
235 if(usingTimes){ | |
236 if(timesFile->eof()) { | |
237 error("not enough timestamp files in timesList", timesFileName); | |
238 } | |
239 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in); | |
240 if(!thisTimesFile->is_open()) { | |
241 error("Cannot open timestamp file", thisTimesFileName); | |
242 } | |
243 off_t insertoffset = dbH->length; | |
244 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double)); | |
245 double *timesdata = timesTable + 2*indexoffset; | |
246 if(2*(indexoffset + numVectors) > timesTableLength) { | |
247 error("out of space for times", key); | |
248 } | |
249 insertTimeStamps(numVectors, thisTimesFile, timesdata); | |
250 if(thisTimesFile) | |
251 delete thisTimesFile; | |
252 } | |
253 | |
254 if (usingPower) { | |
255 if(powerFile->eof()) { | |
256 error("not enough power files in powerList", powerFileName); | |
257 } | |
258 thispowerfd = open(thisPowerFileName, O_RDONLY); | |
259 if (thispowerfd < 0) { | |
260 error("failed to open power file", thisPowerFileName); | |
261 } | |
262 off_t insertoffset = dbH->length; | |
263 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double)); | |
264 double *powerdata = powerTable + poweroffset; | |
265 insertPowerData(numVectors, thispowerfd, powerdata); | |
266 if (0 < thispowerfd) { | |
267 close(thispowerfd); | |
268 } | |
269 } | |
270 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); | |
271 | |
272 off_t insertoffset = dbH->length;// Store current state | |
273 | |
274 // Increment file count | |
275 dbH->numFiles++; | |
276 | |
277 // Update Header information | |
278 dbH->length+=(statbuf.st_size-sizeof(int)); | |
279 | |
280 // Update track to file index map | |
281 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
282 | |
283 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
284 | |
285 // Norm the vectors on input if the database is already L2 normed | |
286 if(dbH->flags & O2_FLAG_L2NORM) | |
287 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
288 | |
289 totalVectors+=numVectors; | |
290 | |
291 // Copy the header back to the database | |
292 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
293 } | |
294 } | |
295 // CLEAN UP | |
296 munmap(indata,statbuf.st_size); | |
297 close(infid); | |
298 } while(!filesIn->eof()); | |
299 | |
300 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double))); | |
301 | |
302 // Report status | |
303 status(dbName); | |
304 } |