comparison insert.cpp @ 204:2ea1908707c7 refactoring

Filewise refactor. Break apart huge monolithic audioDB.cpp file into seven broadly independent portions: * SOAP * DB creation * insertion * query * dump * common functionality * constructor functions Remove the "using namespace std" from the header file, though that wasn't actually a problem: the problem in question is solved by including adb.nsmap in only soap.cpp. Makefile improvements.
author mas01cr
date Wed, 28 Nov 2007 15:10:28 +0000
parents
children 0eab3ca2267d
comparison
equal deleted inserted replaced
203:4b05c5bbf06d 204:2ea1908707c7
1 #include "audioDB.h"
2
3 bool audioDB::enough_data_space_free(off_t size) {
4 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
5 }
6
7 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
8 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
9 write(dbfid, buffer, size);
10 }
11
12 void audioDB::insert(const char* dbName, const char* inFile) {
13 forWrite = true;
14 initTables(dbName, inFile);
15
16 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
17 error("Must use timestamps with timestamped database","use --times");
18
19 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
20 error("Must use power with power-enabled database", dbName);
21
22 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
23 error("Insert failed: no more room in database", inFile);
24 }
25
26 if(!key)
27 key=inFile;
28 // Linear scan of filenames check for pre-existing feature
29 unsigned alreadyInserted=0;
30 for(unsigned k=0; k<dbH->numFiles; k++)
31 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){
32 alreadyInserted=1;
33 break;
34 }
35
36 if(alreadyInserted){
37 if(verbosity) {
38 std::cerr << "Warning: key already exists in database, ignoring: " <<inFile << std::endl;
39 }
40 return;
41 }
42
43 // Make a track index table of features to file indexes
44 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
45 if(!numVectors){
46 if(verbosity) {
47 std::cerr << "Warning: ignoring zero-length feature vector file:" << key << std::endl;
48 }
49 // CLEAN UP
50 munmap(indata,statbuf.st_size);
51 munmap(db,dbH->dbSize);
52 close(infid);
53 return;
54 }
55
56 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
57
58 off_t insertoffset = dbH->length;// Store current state
59
60 // Check times status and insert times from file
61 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
62 double *timesdata = timesTable + 2*indexoffset;
63
64 if(2*(indexoffset + numVectors) > timesTableLength) {
65 error("out of space for times", key);
66 }
67
68 if (usingTimes) {
69 insertTimeStamps(numVectors, timesFile, timesdata);
70 }
71
72 double *powerdata = powerTable + indexoffset;
73 insertPowerData(numVectors, powerfd, powerdata);
74
75 // Increment file count
76 dbH->numFiles++;
77
78 // Update Header information
79 dbH->length+=(statbuf.st_size-sizeof(int));
80
81 // Update track to file index map
82 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
83
84 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
85
86 // Norm the vectors on input if the database is already L2 normed
87 if(dbH->flags & O2_FLAG_L2NORM)
88 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
89
90 // Report status
91 status(dbName);
92 if(verbosity) {
93 std::cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
94 << (statbuf.st_size-sizeof(int)) << " bytes." << std::endl;
95 }
96
97 // Copy the header back to the database
98 memcpy (db, dbH, sizeof(dbTableHeaderT));
99
100 // CLEAN UP
101 munmap(indata,statbuf.st_size);
102 close(infid);
103 }
104
105 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
106 assert(usingTimes);
107
108 unsigned numtimes = 0;
109
110 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
111 dbH->flags=dbH->flags|O2_FLAG_TIMES;
112 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
113 error("Timestamp file used with non-timestamped database", timesFileName);
114 }
115
116 if(!timesFile->is_open()) {
117 error("problem opening times file on timestamped database", timesFileName);
118 }
119
120 double timepoint, next;
121 *timesFile >> timepoint;
122 if (timesFile->eof()) {
123 error("no entries in times file", timesFileName);
124 }
125 numtimes++;
126 do {
127 *timesFile >> next;
128 if (timesFile->eof()) {
129 break;
130 }
131 numtimes++;
132 timesdata[0] = timepoint;
133 timepoint = (timesdata[1] = next);
134 timesdata += 2;
135 } while (numtimes < numVectors + 1);
136
137 if (numtimes < numVectors + 1) {
138 error("too few timepoints in times file", timesFileName);
139 }
140
141 *timesFile >> next;
142 if (!timesFile->eof()) {
143 error("too many timepoints in times file", timesFileName);
144 }
145 }
146
147 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
148 if (usingPower) {
149 if (!(dbH->flags & O2_FLAG_POWER)) {
150 error("Cannot insert power data on non-power DB", dbName);
151 }
152
153 int one;
154 unsigned int count;
155
156 count = read(powerfd, &one, sizeof(unsigned int));
157 if (count != sizeof(unsigned int)) {
158 error("powerfd read failed", "int", "read");
159 }
160 if (one != 1) {
161 error("dimensionality of power file not 1", powerFileName);
162 }
163
164 // FIXME: should check that the powerfile is the right size for
165 // this. -- CSR, 2007-10-30
166 count = read(powerfd, powerdata, numVectors * sizeof(double));
167 if (count != numVectors * sizeof(double)) {
168 error("powerfd read failed", "double", "read");
169 }
170 }
171 }
172
173 void audioDB::batchinsert(const char* dbName, const char* inFile) {
174
175 forWrite = true;
176 initDBHeader(dbName);
177
178 if(!key)
179 key=inFile;
180 std::ifstream *filesIn = 0;
181 std::ifstream *keysIn = 0;
182 std::ifstream* thisTimesFile = 0;
183 int thispowerfd = 0;
184
185 if(!(filesIn = new std::ifstream(inFile)))
186 error("Could not open batch in file", inFile);
187 if(key && key!=inFile)
188 if(!(keysIn = new std::ifstream(key)))
189 error("Could not open batch key file",key);
190
191 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
192 error("Must use timestamps with timestamped database","use --times");
193
194 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
195 error("Must use power with power-enabled database", dbName);
196
197 unsigned totalVectors=0;
198 char *thisKey = new char[MAXSTR];
199 char *thisFile = new char[MAXSTR];
200 char *thisTimesFileName = new char[MAXSTR];
201 char *thisPowerFileName = new char[MAXSTR];
202
203 do{
204 filesIn->getline(thisFile,MAXSTR);
205 if(key && key!=inFile)
206 keysIn->getline(thisKey,MAXSTR);
207 else
208 thisKey = thisFile;
209 if(usingTimes)
210 timesFile->getline(thisTimesFileName,MAXSTR);
211 if(usingPower)
212 powerFile->getline(thisPowerFileName, MAXSTR);
213
214 if(filesIn->eof())
215 break;
216
217 initInputFile(thisFile);
218
219 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
220 error("batchinsert failed: no more room in database", thisFile);
221 }
222
223 // Linear scan of filenames check for pre-existing feature
224 unsigned alreadyInserted=0;
225
226 for(unsigned k=0; k<dbH->numFiles; k++)
227 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){
228 alreadyInserted=1;
229 break;
230 }
231
232 if(alreadyInserted){
233 if(verbosity) {
234 std::cerr << "Warning: key already exists in database:" << thisKey << std::endl;
235 }
236 }
237 else{
238
239 // Make a track index table of features to file indexes
240 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
241 if(!numVectors){
242 if(verbosity) {
243 std::cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << std::endl;
244 }
245 }
246 else{
247 if(usingTimes){
248 if(timesFile->eof()) {
249 error("not enough timestamp files in timesList", timesFileName);
250 }
251 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
252 if(!thisTimesFile->is_open()) {
253 error("Cannot open timestamp file", thisTimesFileName);
254 }
255 off_t insertoffset = dbH->length;
256 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
257 double *timesdata = timesTable + 2*indexoffset;
258 if(2*(indexoffset + numVectors) > timesTableLength) {
259 error("out of space for times", key);
260 }
261 insertTimeStamps(numVectors, thisTimesFile, timesdata);
262 if(thisTimesFile)
263 delete thisTimesFile;
264 }
265
266 if (usingPower) {
267 if(powerFile->eof()) {
268 error("not enough power files in powerList", powerFileName);
269 }
270 thispowerfd = open(thisPowerFileName, O_RDONLY);
271 if (thispowerfd < 0) {
272 error("failed to open power file", thisPowerFileName);
273 }
274 unsigned insertoffset = dbH->length;
275 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
276 double *powerdata = powerTable + poweroffset;
277 insertPowerData(numVectors, thispowerfd, powerdata);
278 if (0 < thispowerfd) {
279 close(thispowerfd);
280 }
281 }
282 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
283
284 off_t insertoffset = dbH->length;// Store current state
285
286 // Increment file count
287 dbH->numFiles++;
288
289 // Update Header information
290 dbH->length+=(statbuf.st_size-sizeof(int));
291
292 // Update track to file index map
293 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
294
295 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
296
297 // Norm the vectors on input if the database is already L2 normed
298 if(dbH->flags & O2_FLAG_L2NORM)
299 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
300
301 totalVectors+=numVectors;
302
303 // Copy the header back to the database
304 memcpy (db, dbH, sizeof(dbTableHeaderT));
305 }
306 }
307 // CLEAN UP
308 munmap(indata,statbuf.st_size);
309 close(infid);
310 }while(!filesIn->eof());
311
312 if(verbosity) {
313 std::cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
314 << totalVectors*dbH->dim*sizeof(double) << " bytes." << std::endl;
315 }
316
317 // Report status
318 status(dbName);
319 }