Mercurial > hg > dml-open-backendtools
comparison pyspark/ilm/assetDB.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e34cf1b6fe09 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # | |
3 # This program is free software; you can redistribute it and/or | |
4 # modify it under the terms of the GNU General Public License | |
5 # as published by the Free Software Foundation; either version 2 | |
6 # of the License, or (at your option) any later version. | |
7 # | |
8 # This program is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public | |
14 # License along with this library; if not, write to the Free Software | |
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
16 | |
17 #!/usr/bin/env python | |
18 # encoding: utf-8 | |
19 """ | |
20 assetDB.py | |
21 | |
22 Created by George Fazekas on 2012-01-16. Modifications by Mathieu Barthet in 2013-12, | |
23 Steven Hargreaves 22/12/2014. | |
24 Copyright (c) 2013 . All rights reserved. | |
25 """ | |
26 | |
27 import sys,os,logging | |
28 import sqlalchemy as sal | |
29 from sqlalchemy.ext.declarative import declarative_base | |
30 from sqlalchemy import Column, Integer, String, Sequence, Enum | |
31 from sqlalchemy.orm import sessionmaker | |
32 from sqlalchemy.dialects import mysql | |
33 from hashlib import md5 | |
34 | |
35 class assetDB(object): | |
36 | |
37 asset_types = ['wav','mpeg/320kbps','mpeg/64kbps'] | |
38 extensions = ['wav','mp3','mp3'] | |
39 ext = dict(zip(asset_types,extensions)) | |
40 | |
41 def __init__(self, prefix, pref=list(), config=None): | |
42 self.log = logging.getLogger('spark_feat_extract') | |
43 self.log.info("ORM Version: %s",sal.__version__) | |
44 self.config = config | |
45 self.session = None | |
46 self.Assets = None | |
47 self.prefix = prefix | |
48 if pref : | |
49 self.asset_prefs = pref | |
50 else : | |
51 self.asset_prefs = assetDB.asset_types | |
52 # reporting errors: | |
53 self.found_different_asset_type = 0 | |
54 self.errata_file = None | |
55 if config and hasattr(config,"db_errata_file") : | |
56 self.errata_file = config.db_errata_file | |
57 | |
58 | |
59 def connect(self,echo=False): | |
60 '''Connect to the MySQL database and create a session.''' | |
61 URL = "mysql://%s:%s@%s/%s" %(self.config.get('Commercial Asset Database', 'user'),self.config.get('Commercial Asset Database', 'passwd'),self.config.get('Commercial Asset Database', 'host'),self.config.get('Commercial Asset Database', 'name')) | |
62 self.log.info("Connecting to database server at: %s",URL.replace(self.config.get('Commercial Asset Database', 'passwd'),'*****')) | |
63 engine=sal.create_engine(URL, echo=echo) | |
64 Session = sessionmaker(bind=engine) | |
65 self.session = Session() | |
66 self.log.debug("MySQL session created successfully.") | |
67 return self | |
68 | |
69 def close(self): | |
70 '''Close the database session''' | |
71 if self.session : | |
72 self.session.close() | |
73 self.log.info("Database closed.") | |
74 return self | |
75 | |
76 def create_mapper(self): | |
77 '''Create an Object-Relational Mapper''' | |
78 Base = declarative_base() | |
79 class Assets(Base): | |
80 #change | |
81 #__tablename__ = 'assets' | |
82 __tablename__ = self.config.get('Commercial Asset Database', 'tablename') | |
83 # map all table columns to variables here, e.g. | |
84 # album_id = Column(Integer, primary_key=True) | |
85 # song_title = Column(String) | |
86 # genre_id = Column(Integer) | |
87 self.Assets = Assets | |
88 return self | |
89 | |
90 def get_assets(self,start=0,limit=10,asset_type='audio/x-wav'): | |
91 '''Returns some assets from the database. | |
92 If the path given by the specified asset type does not exists, | |
93 try to find the assets given the preference list provided in self.asset_prefs. | |
94 If no valid path can be found for an asset, log the error and yield None for the path.''' | |
95 # limit = start + limit # this changes the semantics of the SQL limit | |
96 | |
97 # create the ORM mapper object if doesn't exist | |
98 if self.Assets == None : | |
99 self.create_mapper() | |
100 | |
101 # generate an SQL query and for each asset in the results, yield a (validated) path name for the asset, or yield None if not found | |
102 for asset in self.session.query(self.Assets)[start:limit]: | |
103 path = self.generate_path(asset,asset_type) | |
104 if self.validate_path(path) and self.validate_size(path,asset_type): | |
105 yield path,asset | |
106 elif not self.asset_prefs : | |
107 self.log.error("Requested file for asset not found: %s. (Album ID: %i)",asset.song_title,asset.album_id) | |
108 yield None,asset | |
109 else : | |
110 #change | |
111 self.log.warning("Requested file for asset bad or not found: %s. (Album ID: %i)",asset.song_title,asset.album_id) | |
112 self.log.warning("Trying other asset types.") | |
113 path = self.find_preferred_asset_path(asset) | |
114 if path == None : | |
115 yield None,asset | |
116 else : | |
117 yield path,asset | |
118 # ensure each asset yields only once | |
119 pass | |
120 pass | |
121 | |
122 def get_assets_by_genre(self,genre_id,start=0,limit=10,asset_type='audio/x-wav'): | |
123 '''Returns some assets of the given genre_id from the database. | |
124 If the path given by the specified asset type does not exists, | |
125 try to find the assets given the preference list provided in self.asset_prefs. | |
126 If no valid path can be found for an asset, log the error and yield None for the path.''' | |
127 # limit = start + limit # this changes the semantics of the SQL limit | |
128 | |
129 # create the ORM mapper object if doesn't exist | |
130 if self.Assets == None : | |
131 self.create_mapper() | |
132 | |
133 # generate an SQL query and for each asset in the results, yield a (validated) path name for the asset, or yield None if not found | |
134 for asset in self.session.query(self.Assets).filter(self.Assets.genre_id == genre_id).all()[start:limit]: | |
135 path = self.generate_path(asset,asset_type) | |
136 if self.validate_path(path) and self.validate_size(path,asset_type): | |
137 yield path,asset | |
138 elif not self.asset_prefs : | |
139 self.log.error("Requested file for asset not found: %s. (Album ID: %i)",asset.song_title,asset.album_id) | |
140 yield None,asset | |
141 else : | |
142 #change | |
143 self.log.warning("Requested file for asset bad or not found: %s. (Album ID: %i)",asset.song_title,asset.album_id) | |
144 self.log.warning("Trying other asset types.") | |
145 path = self.find_preferred_asset_path(asset) | |
146 if path == None : | |
147 yield None,asset | |
148 else : | |
149 yield path,asset | |
150 # ensure each asset yields only once | |
151 pass | |
152 pass | |
153 | |
154 def find_preferred_asset_path(self,asset): | |
155 '''Iteratively find a path name for each asset type in asset_prefs and return the first one available. | |
156 Return None if not found and log this event for error management.''' | |
157 path = unicode() | |
158 for asset_type in self.asset_prefs : | |
159 path = self.generate_path(asset,asset_type) | |
160 if self.validate_path(path): | |
161 self.log.info("Asset found but type is different from requested: %s. (Album ID: %i) ",asset.song_title,asset.album_id) | |
162 self.append_db_errata(path,"Found different asset type for problem case. (%s)"%asset_type) | |
163 self.found_different_asset_type += 1 | |
164 if self.validate_size(path,asset_type): | |
165 return path | |
166 else : | |
167 self.log.error("Requested file for asset is worng size, probably corrupt: %s. (Album ID: %i)",asset.song_title,asset.album_id) | |
168 continue | |
169 else: | |
170 self.append_db_errata(path,"File not found.") | |
171 if len(path) == 0 : | |
172 self.log.warning("Asset not found for: %s. (Album ID: %i)",asset.song_title,asset.album_id) | |
173 return None | |
174 | |
175 def generate_path(self,asset,asset_type): | |
176 '''Generate the path name given a asset database object and a requested asset type''' | |
177 path = '' # need to generate audio file path here | |
178 return path | |
179 | |
180 def validate_path(self,path): | |
181 '''Validate the generated path name.''' | |
182 return os.path.isfile(path) | |
183 | |
184 def validate_size(self,path,asset_type): | |
185 '''Check if the file size makes sense.''' | |
186 size = -1 | |
187 try : | |
188 size = int(os.path.getsize(path)) | |
189 except Exception, e: | |
190 self.append_db_errata(path,"Unable to determine file size.") | |
191 self.log.error("Unable to determine file size: %s." %path) | |
192 self.log.error("Exception %s."%str(e)) | |
193 return False | |
194 if size == 0 : | |
195 self.append_db_errata(path,"File has zero size.") | |
196 self.log.error("File has zero size: %s."%path) | |
197 return False | |
198 if 'wav' in asset_type : | |
199 # rationale: with very small files some feature extractor plugins fail or output junk | |
200 if size > 209715200 or size < 209715 : | |
201 self.append_db_errata(path,"Rejected file size is: %f KB" %(size/1024.0)) | |
202 return False | |
203 if 'mpeg' in asset_type : | |
204 # same assuming about 1:10 compression | |
205 if size > 41943040 or size < 65536 : | |
206 self.append_db_errata(path,"Rejected file size is: %f KB" %(size/1024.0)) | |
207 return False | |
208 return True | |
209 | |
210 def get_different_asset_no(self): | |
211 '''Return a count of the cases where the preferred asset type was not found''' | |
212 return self.found_different_asset_type | |
213 | |
214 def reset_different_asset_no(self): | |
215 '''Reset the asset type was not found counter''' | |
216 self.found_different_asset_type = 0 | |
217 | |
218 def append_db_errata(self,filename,reason,metadata=""): | |
219 '''Append to a file collecting assets present in the DB but not found on disk.''' | |
220 if not self.errata_file : return False | |
221 try : | |
222 with open(self.errata_file,"a+") as ef: | |
223 if metadata : | |
224 ef.write("%(filename)s,%(reason)s,%(metadata)s\n"%locals()) | |
225 else: | |
226 ef.write("%(filename)s,%(reason)s\n"%locals()) | |
227 except: | |
228 self.log.error("Failed to append database errata.") | |
229 |