Mercurial > hg > audiodb
comparison insert.cpp @ 204:2ea1908707c7 refactoring
Filewise refactor.
Break apart huge monolithic audioDB.cpp file into seven broadly
independent portions:
* SOAP
* DB creation
* insertion
* query
* dump
* common functionality
* constructor functions
Remove the "using namespace std" from the header file, though that
wasn't actually a problem: the problem in question is solved by
including adb.nsmap in only soap.cpp.
Makefile improvements.
author | mas01cr |
---|---|
date | Wed, 28 Nov 2007 15:10:28 +0000 |
parents | |
children | 0eab3ca2267d |
comparison
equal
deleted
inserted
replaced
203:4b05c5bbf06d | 204:2ea1908707c7 |
---|---|
1 #include "audioDB.h" | |
2 | |
3 bool audioDB::enough_data_space_free(off_t size) { | |
4 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size); | |
5 } | |
6 | |
7 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) { | |
8 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET); | |
9 write(dbfid, buffer, size); | |
10 } | |
11 | |
12 void audioDB::insert(const char* dbName, const char* inFile) { | |
13 forWrite = true; | |
14 initTables(dbName, inFile); | |
15 | |
16 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
17 error("Must use timestamps with timestamped database","use --times"); | |
18 | |
19 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
20 error("Must use power with power-enabled database", dbName); | |
21 | |
22 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
23 error("Insert failed: no more room in database", inFile); | |
24 } | |
25 | |
26 if(!key) | |
27 key=inFile; | |
28 // Linear scan of filenames check for pre-existing feature | |
29 unsigned alreadyInserted=0; | |
30 for(unsigned k=0; k<dbH->numFiles; k++) | |
31 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){ | |
32 alreadyInserted=1; | |
33 break; | |
34 } | |
35 | |
36 if(alreadyInserted){ | |
37 if(verbosity) { | |
38 std::cerr << "Warning: key already exists in database, ignoring: " <<inFile << std::endl; | |
39 } | |
40 return; | |
41 } | |
42 | |
43 // Make a track index table of features to file indexes | |
44 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
45 if(!numVectors){ | |
46 if(verbosity) { | |
47 std::cerr << "Warning: ignoring zero-length feature vector file:" << key << std::endl; | |
48 } | |
49 // CLEAN UP | |
50 munmap(indata,statbuf.st_size); | |
51 munmap(db,dbH->dbSize); | |
52 close(infid); | |
53 return; | |
54 } | |
55 | |
56 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); | |
57 | |
58 off_t insertoffset = dbH->length;// Store current state | |
59 | |
60 // Check times status and insert times from file | |
61 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double)); | |
62 double *timesdata = timesTable + 2*indexoffset; | |
63 | |
64 if(2*(indexoffset + numVectors) > timesTableLength) { | |
65 error("out of space for times", key); | |
66 } | |
67 | |
68 if (usingTimes) { | |
69 insertTimeStamps(numVectors, timesFile, timesdata); | |
70 } | |
71 | |
72 double *powerdata = powerTable + indexoffset; | |
73 insertPowerData(numVectors, powerfd, powerdata); | |
74 | |
75 // Increment file count | |
76 dbH->numFiles++; | |
77 | |
78 // Update Header information | |
79 dbH->length+=(statbuf.st_size-sizeof(int)); | |
80 | |
81 // Update track to file index map | |
82 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned)); | |
83 | |
84 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
85 | |
86 // Norm the vectors on input if the database is already L2 normed | |
87 if(dbH->flags & O2_FLAG_L2NORM) | |
88 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
89 | |
90 // Report status | |
91 status(dbName); | |
92 if(verbosity) { | |
93 std::cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " | |
94 << (statbuf.st_size-sizeof(int)) << " bytes." << std::endl; | |
95 } | |
96 | |
97 // Copy the header back to the database | |
98 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
99 | |
100 // CLEAN UP | |
101 munmap(indata,statbuf.st_size); | |
102 close(infid); | |
103 } | |
104 | |
105 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) { | |
106 assert(usingTimes); | |
107 | |
108 unsigned numtimes = 0; | |
109 | |
110 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) { | |
111 dbH->flags=dbH->flags|O2_FLAG_TIMES; | |
112 } else if(!(dbH->flags & O2_FLAG_TIMES)) { | |
113 error("Timestamp file used with non-timestamped database", timesFileName); | |
114 } | |
115 | |
116 if(!timesFile->is_open()) { | |
117 error("problem opening times file on timestamped database", timesFileName); | |
118 } | |
119 | |
120 double timepoint, next; | |
121 *timesFile >> timepoint; | |
122 if (timesFile->eof()) { | |
123 error("no entries in times file", timesFileName); | |
124 } | |
125 numtimes++; | |
126 do { | |
127 *timesFile >> next; | |
128 if (timesFile->eof()) { | |
129 break; | |
130 } | |
131 numtimes++; | |
132 timesdata[0] = timepoint; | |
133 timepoint = (timesdata[1] = next); | |
134 timesdata += 2; | |
135 } while (numtimes < numVectors + 1); | |
136 | |
137 if (numtimes < numVectors + 1) { | |
138 error("too few timepoints in times file", timesFileName); | |
139 } | |
140 | |
141 *timesFile >> next; | |
142 if (!timesFile->eof()) { | |
143 error("too many timepoints in times file", timesFileName); | |
144 } | |
145 } | |
146 | |
147 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { | |
148 if (usingPower) { | |
149 if (!(dbH->flags & O2_FLAG_POWER)) { | |
150 error("Cannot insert power data on non-power DB", dbName); | |
151 } | |
152 | |
153 int one; | |
154 unsigned int count; | |
155 | |
156 count = read(powerfd, &one, sizeof(unsigned int)); | |
157 if (count != sizeof(unsigned int)) { | |
158 error("powerfd read failed", "int", "read"); | |
159 } | |
160 if (one != 1) { | |
161 error("dimensionality of power file not 1", powerFileName); | |
162 } | |
163 | |
164 // FIXME: should check that the powerfile is the right size for | |
165 // this. -- CSR, 2007-10-30 | |
166 count = read(powerfd, powerdata, numVectors * sizeof(double)); | |
167 if (count != numVectors * sizeof(double)) { | |
168 error("powerfd read failed", "double", "read"); | |
169 } | |
170 } | |
171 } | |
172 | |
173 void audioDB::batchinsert(const char* dbName, const char* inFile) { | |
174 | |
175 forWrite = true; | |
176 initDBHeader(dbName); | |
177 | |
178 if(!key) | |
179 key=inFile; | |
180 std::ifstream *filesIn = 0; | |
181 std::ifstream *keysIn = 0; | |
182 std::ifstream* thisTimesFile = 0; | |
183 int thispowerfd = 0; | |
184 | |
185 if(!(filesIn = new std::ifstream(inFile))) | |
186 error("Could not open batch in file", inFile); | |
187 if(key && key!=inFile) | |
188 if(!(keysIn = new std::ifstream(key))) | |
189 error("Could not open batch key file",key); | |
190 | |
191 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
192 error("Must use timestamps with timestamped database","use --times"); | |
193 | |
194 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
195 error("Must use power with power-enabled database", dbName); | |
196 | |
197 unsigned totalVectors=0; | |
198 char *thisKey = new char[MAXSTR]; | |
199 char *thisFile = new char[MAXSTR]; | |
200 char *thisTimesFileName = new char[MAXSTR]; | |
201 char *thisPowerFileName = new char[MAXSTR]; | |
202 | |
203 do{ | |
204 filesIn->getline(thisFile,MAXSTR); | |
205 if(key && key!=inFile) | |
206 keysIn->getline(thisKey,MAXSTR); | |
207 else | |
208 thisKey = thisFile; | |
209 if(usingTimes) | |
210 timesFile->getline(thisTimesFileName,MAXSTR); | |
211 if(usingPower) | |
212 powerFile->getline(thisPowerFileName, MAXSTR); | |
213 | |
214 if(filesIn->eof()) | |
215 break; | |
216 | |
217 initInputFile(thisFile); | |
218 | |
219 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
220 error("batchinsert failed: no more room in database", thisFile); | |
221 } | |
222 | |
223 // Linear scan of filenames check for pre-existing feature | |
224 unsigned alreadyInserted=0; | |
225 | |
226 for(unsigned k=0; k<dbH->numFiles; k++) | |
227 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){ | |
228 alreadyInserted=1; | |
229 break; | |
230 } | |
231 | |
232 if(alreadyInserted){ | |
233 if(verbosity) { | |
234 std::cerr << "Warning: key already exists in database:" << thisKey << std::endl; | |
235 } | |
236 } | |
237 else{ | |
238 | |
239 // Make a track index table of features to file indexes | |
240 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
241 if(!numVectors){ | |
242 if(verbosity) { | |
243 std::cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << std::endl; | |
244 } | |
245 } | |
246 else{ | |
247 if(usingTimes){ | |
248 if(timesFile->eof()) { | |
249 error("not enough timestamp files in timesList", timesFileName); | |
250 } | |
251 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in); | |
252 if(!thisTimesFile->is_open()) { | |
253 error("Cannot open timestamp file", thisTimesFileName); | |
254 } | |
255 off_t insertoffset = dbH->length; | |
256 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double)); | |
257 double *timesdata = timesTable + 2*indexoffset; | |
258 if(2*(indexoffset + numVectors) > timesTableLength) { | |
259 error("out of space for times", key); | |
260 } | |
261 insertTimeStamps(numVectors, thisTimesFile, timesdata); | |
262 if(thisTimesFile) | |
263 delete thisTimesFile; | |
264 } | |
265 | |
266 if (usingPower) { | |
267 if(powerFile->eof()) { | |
268 error("not enough power files in powerList", powerFileName); | |
269 } | |
270 thispowerfd = open(thisPowerFileName, O_RDONLY); | |
271 if (thispowerfd < 0) { | |
272 error("failed to open power file", thisPowerFileName); | |
273 } | |
274 unsigned insertoffset = dbH->length; | |
275 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double)); | |
276 double *powerdata = powerTable + poweroffset; | |
277 insertPowerData(numVectors, thispowerfd, powerdata); | |
278 if (0 < thispowerfd) { | |
279 close(thispowerfd); | |
280 } | |
281 } | |
282 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); | |
283 | |
284 off_t insertoffset = dbH->length;// Store current state | |
285 | |
286 // Increment file count | |
287 dbH->numFiles++; | |
288 | |
289 // Update Header information | |
290 dbH->length+=(statbuf.st_size-sizeof(int)); | |
291 | |
292 // Update track to file index map | |
293 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
294 | |
295 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
296 | |
297 // Norm the vectors on input if the database is already L2 normed | |
298 if(dbH->flags & O2_FLAG_L2NORM) | |
299 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
300 | |
301 totalVectors+=numVectors; | |
302 | |
303 // Copy the header back to the database | |
304 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
305 } | |
306 } | |
307 // CLEAN UP | |
308 munmap(indata,statbuf.st_size); | |
309 close(infid); | |
310 }while(!filesIn->eof()); | |
311 | |
312 if(verbosity) { | |
313 std::cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " | |
314 << totalVectors*dbH->dim*sizeof(double) << " bytes." << std::endl; | |
315 } | |
316 | |
317 // Report status | |
318 status(dbName); | |
319 } |