Daniel@0
|
1 # Part of DML (Digital Music Laboratory)
|
Daniel@0
|
2 #
|
Daniel@0
|
3 # This program is free software; you can redistribute it and/or
|
Daniel@0
|
4 # modify it under the terms of the GNU General Public License
|
Daniel@0
|
5 # as published by the Free Software Foundation; either version 2
|
Daniel@0
|
6 # of the License, or (at your option) any later version.
|
Daniel@0
|
7 #
|
Daniel@0
|
8 # This program is distributed in the hope that it will be useful,
|
Daniel@0
|
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Daniel@0
|
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Daniel@0
|
11 # GNU General Public License for more details.
|
Daniel@0
|
12 #
|
Daniel@0
|
13 # You should have received a copy of the GNU General Public
|
Daniel@0
|
14 # License along with this library; if not, write to the Free Software
|
Daniel@0
|
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Daniel@0
|
16
|
Daniel@0
|
17 #!/usr/bin/env python
|
Daniel@0
|
18 # encoding: utf-8
|
Daniel@0
|
19 """
|
Daniel@0
|
20 assetDB.py
|
Daniel@0
|
21
|
Daniel@0
|
22 Created by George Fazekas on 2012-01-16. Modifications by Mathieu Barthet in 2013-12,
|
Daniel@0
|
23 Steven Hargreaves 22/12/2014.
|
Daniel@0
|
24 Copyright (c) 2013 . All rights reserved.
|
Daniel@0
|
25 """
|
Daniel@0
|
26
|
Daniel@0
|
27 import sys,os,logging
|
Daniel@0
|
28 import sqlalchemy as sal
|
Daniel@0
|
29 from sqlalchemy.ext.declarative import declarative_base
|
Daniel@0
|
30 from sqlalchemy import Column, Integer, String, Sequence, Enum
|
Daniel@0
|
31 from sqlalchemy.orm import sessionmaker
|
Daniel@0
|
32 from sqlalchemy.dialects import mysql
|
Daniel@0
|
33 from hashlib import md5
|
Daniel@0
|
34
|
Daniel@0
|
35 class assetDB(object):
|
Daniel@0
|
36
|
Daniel@0
|
37 asset_types = ['wav','mpeg/320kbps','mpeg/64kbps']
|
Daniel@0
|
38 extensions = ['wav','mp3','mp3']
|
Daniel@0
|
39 ext = dict(zip(asset_types,extensions))
|
Daniel@0
|
40
|
Daniel@0
|
41 def __init__(self, prefix, pref=list(), config=None):
|
Daniel@0
|
42 self.log = logging.getLogger('spark_feat_extract')
|
Daniel@0
|
43 self.log.info("ORM Version: %s",sal.__version__)
|
Daniel@0
|
44 self.config = config
|
Daniel@0
|
45 self.session = None
|
Daniel@0
|
46 self.Assets = None
|
Daniel@0
|
47 self.prefix = prefix
|
Daniel@0
|
48 if pref :
|
Daniel@0
|
49 self.asset_prefs = pref
|
Daniel@0
|
50 else :
|
Daniel@0
|
51 self.asset_prefs = assetDB.asset_types
|
Daniel@0
|
52 # reporting errors:
|
Daniel@0
|
53 self.found_different_asset_type = 0
|
Daniel@0
|
54 self.errata_file = None
|
Daniel@0
|
55 if config and hasattr(config,"db_errata_file") :
|
Daniel@0
|
56 self.errata_file = config.db_errata_file
|
Daniel@0
|
57
|
Daniel@0
|
58
|
Daniel@0
|
59 def connect(self,echo=False):
|
Daniel@0
|
60 '''Connect to the MySQL database and create a session.'''
|
Daniel@0
|
61 URL = "mysql://%s:%s@%s/%s" %(self.config.get('Commercial Asset Database', 'user'),self.config.get('Commercial Asset Database', 'passwd'),self.config.get('Commercial Asset Database', 'host'),self.config.get('Commercial Asset Database', 'name'))
|
Daniel@0
|
62 self.log.info("Connecting to database server at: %s",URL.replace(self.config.get('Commercial Asset Database', 'passwd'),'*****'))
|
Daniel@0
|
63 engine=sal.create_engine(URL, echo=echo)
|
Daniel@0
|
64 Session = sessionmaker(bind=engine)
|
Daniel@0
|
65 self.session = Session()
|
Daniel@0
|
66 self.log.debug("MySQL session created successfully.")
|
Daniel@0
|
67 return self
|
Daniel@0
|
68
|
Daniel@0
|
69 def close(self):
|
Daniel@0
|
70 '''Close the database session'''
|
Daniel@0
|
71 if self.session :
|
Daniel@0
|
72 self.session.close()
|
Daniel@0
|
73 self.log.info("Database closed.")
|
Daniel@0
|
74 return self
|
Daniel@0
|
75
|
Daniel@0
|
76 def create_mapper(self):
|
Daniel@0
|
77 '''Create an Object-Relational Mapper'''
|
Daniel@0
|
78 Base = declarative_base()
|
Daniel@0
|
79 class Assets(Base):
|
Daniel@0
|
80 #change
|
Daniel@0
|
81 #__tablename__ = 'assets'
|
Daniel@0
|
82 __tablename__ = self.config.get('Commercial Asset Database', 'tablename')
|
Daniel@0
|
83 # map all table columns to variables here, e.g.
|
Daniel@0
|
84 # album_id = Column(Integer, primary_key=True)
|
Daniel@0
|
85 # song_title = Column(String)
|
Daniel@0
|
86 # genre_id = Column(Integer)
|
Daniel@0
|
87 self.Assets = Assets
|
Daniel@0
|
88 return self
|
Daniel@0
|
89
|
Daniel@0
|
90 def get_assets(self,start=0,limit=10,asset_type='audio/x-wav'):
|
Daniel@0
|
91 '''Returns some assets from the database.
|
Daniel@0
|
92 If the path given by the specified asset type does not exists,
|
Daniel@0
|
93 try to find the assets given the preference list provided in self.asset_prefs.
|
Daniel@0
|
94 If no valid path can be found for an asset, log the error and yield None for the path.'''
|
Daniel@0
|
95 # limit = start + limit # this changes the semantics of the SQL limit
|
Daniel@0
|
96
|
Daniel@0
|
97 # create the ORM mapper object if doesn't exist
|
Daniel@0
|
98 if self.Assets == None :
|
Daniel@0
|
99 self.create_mapper()
|
Daniel@0
|
100
|
Daniel@0
|
101 # generate an SQL query and for each asset in the results, yield a (validated) path name for the asset, or yield None if not found
|
Daniel@0
|
102 for asset in self.session.query(self.Assets)[start:limit]:
|
Daniel@0
|
103 path = self.generate_path(asset,asset_type)
|
Daniel@0
|
104 if self.validate_path(path) and self.validate_size(path,asset_type):
|
Daniel@0
|
105 yield path,asset
|
Daniel@0
|
106 elif not self.asset_prefs :
|
Daniel@0
|
107 self.log.error("Requested file for asset not found: %s. (Album ID: %i)",asset.song_title,asset.album_id)
|
Daniel@0
|
108 yield None,asset
|
Daniel@0
|
109 else :
|
Daniel@0
|
110 #change
|
Daniel@0
|
111 self.log.warning("Requested file for asset bad or not found: %s. (Album ID: %i)",asset.song_title,asset.album_id)
|
Daniel@0
|
112 self.log.warning("Trying other asset types.")
|
Daniel@0
|
113 path = self.find_preferred_asset_path(asset)
|
Daniel@0
|
114 if path == None :
|
Daniel@0
|
115 yield None,asset
|
Daniel@0
|
116 else :
|
Daniel@0
|
117 yield path,asset
|
Daniel@0
|
118 # ensure each asset yields only once
|
Daniel@0
|
119 pass
|
Daniel@0
|
120 pass
|
Daniel@0
|
121
|
Daniel@0
|
122 def get_assets_by_genre(self,genre_id,start=0,limit=10,asset_type='audio/x-wav'):
|
Daniel@0
|
123 '''Returns some assets of the given genre_id from the database.
|
Daniel@0
|
124 If the path given by the specified asset type does not exists,
|
Daniel@0
|
125 try to find the assets given the preference list provided in self.asset_prefs.
|
Daniel@0
|
126 If no valid path can be found for an asset, log the error and yield None for the path.'''
|
Daniel@0
|
127 # limit = start + limit # this changes the semantics of the SQL limit
|
Daniel@0
|
128
|
Daniel@0
|
129 # create the ORM mapper object if doesn't exist
|
Daniel@0
|
130 if self.Assets == None :
|
Daniel@0
|
131 self.create_mapper()
|
Daniel@0
|
132
|
Daniel@0
|
133 # generate an SQL query and for each asset in the results, yield a (validated) path name for the asset, or yield None if not found
|
Daniel@0
|
134 for asset in self.session.query(self.Assets).filter(self.Assets.genre_id == genre_id).all()[start:limit]:
|
Daniel@0
|
135 path = self.generate_path(asset,asset_type)
|
Daniel@0
|
136 if self.validate_path(path) and self.validate_size(path,asset_type):
|
Daniel@0
|
137 yield path,asset
|
Daniel@0
|
138 elif not self.asset_prefs :
|
Daniel@0
|
139 self.log.error("Requested file for asset not found: %s. (Album ID: %i)",asset.song_title,asset.album_id)
|
Daniel@0
|
140 yield None,asset
|
Daniel@0
|
141 else :
|
Daniel@0
|
142 #change
|
Daniel@0
|
143 self.log.warning("Requested file for asset bad or not found: %s. (Album ID: %i)",asset.song_title,asset.album_id)
|
Daniel@0
|
144 self.log.warning("Trying other asset types.")
|
Daniel@0
|
145 path = self.find_preferred_asset_path(asset)
|
Daniel@0
|
146 if path == None :
|
Daniel@0
|
147 yield None,asset
|
Daniel@0
|
148 else :
|
Daniel@0
|
149 yield path,asset
|
Daniel@0
|
150 # ensure each asset yields only once
|
Daniel@0
|
151 pass
|
Daniel@0
|
152 pass
|
Daniel@0
|
153
|
Daniel@0
|
154 def find_preferred_asset_path(self,asset):
|
Daniel@0
|
155 '''Iteratively find a path name for each asset type in asset_prefs and return the first one available.
|
Daniel@0
|
156 Return None if not found and log this event for error management.'''
|
Daniel@0
|
157 path = unicode()
|
Daniel@0
|
158 for asset_type in self.asset_prefs :
|
Daniel@0
|
159 path = self.generate_path(asset,asset_type)
|
Daniel@0
|
160 if self.validate_path(path):
|
Daniel@0
|
161 self.log.info("Asset found but type is different from requested: %s. (Album ID: %i) ",asset.song_title,asset.album_id)
|
Daniel@0
|
162 self.append_db_errata(path,"Found different asset type for problem case. (%s)"%asset_type)
|
Daniel@0
|
163 self.found_different_asset_type += 1
|
Daniel@0
|
164 if self.validate_size(path,asset_type):
|
Daniel@0
|
165 return path
|
Daniel@0
|
166 else :
|
Daniel@0
|
167 self.log.error("Requested file for asset is worng size, probably corrupt: %s. (Album ID: %i)",asset.song_title,asset.album_id)
|
Daniel@0
|
168 continue
|
Daniel@0
|
169 else:
|
Daniel@0
|
170 self.append_db_errata(path,"File not found.")
|
Daniel@0
|
171 if len(path) == 0 :
|
Daniel@0
|
172 self.log.warning("Asset not found for: %s. (Album ID: %i)",asset.song_title,asset.album_id)
|
Daniel@0
|
173 return None
|
Daniel@0
|
174
|
Daniel@0
|
175 def generate_path(self,asset,asset_type):
|
Daniel@0
|
176 '''Generate the path name given a asset database object and a requested asset type'''
|
Daniel@0
|
177 path = '' # need to generate audio file path here
|
Daniel@0
|
178 return path
|
Daniel@0
|
179
|
Daniel@0
|
180 def validate_path(self,path):
|
Daniel@0
|
181 '''Validate the generated path name.'''
|
Daniel@0
|
182 return os.path.isfile(path)
|
Daniel@0
|
183
|
Daniel@0
|
184 def validate_size(self,path,asset_type):
|
Daniel@0
|
185 '''Check if the file size makes sense.'''
|
Daniel@0
|
186 size = -1
|
Daniel@0
|
187 try :
|
Daniel@0
|
188 size = int(os.path.getsize(path))
|
Daniel@0
|
189 except Exception, e:
|
Daniel@0
|
190 self.append_db_errata(path,"Unable to determine file size.")
|
Daniel@0
|
191 self.log.error("Unable to determine file size: %s." %path)
|
Daniel@0
|
192 self.log.error("Exception %s."%str(e))
|
Daniel@0
|
193 return False
|
Daniel@0
|
194 if size == 0 :
|
Daniel@0
|
195 self.append_db_errata(path,"File has zero size.")
|
Daniel@0
|
196 self.log.error("File has zero size: %s."%path)
|
Daniel@0
|
197 return False
|
Daniel@0
|
198 if 'wav' in asset_type :
|
Daniel@0
|
199 # rationale: with very small files some feature extractor plugins fail or output junk
|
Daniel@0
|
200 if size > 209715200 or size < 209715 :
|
Daniel@0
|
201 self.append_db_errata(path,"Rejected file size is: %f KB" %(size/1024.0))
|
Daniel@0
|
202 return False
|
Daniel@0
|
203 if 'mpeg' in asset_type :
|
Daniel@0
|
204 # same assuming about 1:10 compression
|
Daniel@0
|
205 if size > 41943040 or size < 65536 :
|
Daniel@0
|
206 self.append_db_errata(path,"Rejected file size is: %f KB" %(size/1024.0))
|
Daniel@0
|
207 return False
|
Daniel@0
|
208 return True
|
Daniel@0
|
209
|
Daniel@0
|
210 def get_different_asset_no(self):
|
Daniel@0
|
211 '''Return a count of the cases where the preferred asset type was not found'''
|
Daniel@0
|
212 return self.found_different_asset_type
|
Daniel@0
|
213
|
Daniel@0
|
214 def reset_different_asset_no(self):
|
Daniel@0
|
215 '''Reset the asset type was not found counter'''
|
Daniel@0
|
216 self.found_different_asset_type = 0
|
Daniel@0
|
217
|
Daniel@0
|
218 def append_db_errata(self,filename,reason,metadata=""):
|
Daniel@0
|
219 '''Append to a file collecting assets present in the DB but not found on disk.'''
|
Daniel@0
|
220 if not self.errata_file : return False
|
Daniel@0
|
221 try :
|
Daniel@0
|
222 with open(self.errata_file,"a+") as ef:
|
Daniel@0
|
223 if metadata :
|
Daniel@0
|
224 ef.write("%(filename)s,%(reason)s,%(metadata)s\n"%locals())
|
Daniel@0
|
225 else:
|
Daniel@0
|
226 ef.write("%(filename)s,%(reason)s\n"%locals())
|
Daniel@0
|
227 except:
|
Daniel@0
|
228 self.log.error("Failed to append database errata.")
|
Daniel@0
|
229
|