annotate adc2004/adc2004.py @ 2:86aed1f351e3 tip

uriref for adc2004 audio file ids
author alo <nothing@tehis.net>
date Wed, 05 Apr 2017 17:51:18 +0100
parents ef28c91b6bc4
children
rev   line source
nothing@1 1 from os.path import join
nothing@1 2 from rdflib import Graph, BNode, Namespace, RDF, RDFS, Literal, URIRef, XSD, OWL
nothing@1 3 import glob, wave
nothing@1 4
nothing@1 5 class Adc2004Converter:
nothing@1 6 def __init__(self):
nothing@1 7 self.data_dir = "./data"
nothing@1 8 self.destination = "./rdf"
nothing@1 9
nothing@1 10 def run(self):
nothing@1 11 for path in glob.glob(self.data_dir + "/*REF.txt"):
nothing@1 12 self.createGraph()
nothing@1 13 with open(path, "r") as adc_file:
nothing@1 14 data = adc_file.read()
nothing@1 15 adc_file.close()
nothing@1 16 audio_data = self.get_audio_data(path.replace("REF.txt", ".wav"))
nothing@1 17 self.convert(data, audio_data)
nothing@1 18 write_path = self.destination + "/" + path.split("/")[-1].split(".")[0] + ".n3"
nothing@1 19 self.graph.serialize(write_path, format="n3")
nothing@1 20
nothing@1 21 def bindNamespaces(self):
nothing@1 22 self.ns = {
nothing@1 23 'afv': Namespace("https://w3id.org/afo/vocab/1.1#"),
nothing@1 24 'afo': Namespace("https://w3id.org/afo/onto/1.1#"),
nothing@1 25 'tl': Namespace("http://purl.org/NET/c4dm/timeline.owl#"),
nothing@1 26 'event': Namespace("http://purl.org/NET/c4dm/event.owl#"),
nothing@1 27 'mo': Namespace("http://purl.org/ontology/mo/"),
nothing@1 28 'sxsd': Namespace("https://www.w3.org/TR/speech-synthesis11/synthesis-nonamespace.xsd#")
nothing@1 29 }
nothing@1 30 for key in self.ns:
nothing@1 31 self.graph.bind(key, self.ns[key])
nothing@1 32
nothing@1 33 def createGraph(self):
nothing@1 34 self.graph = Graph()
nothing@1 35 self.bindNamespaces()
nothing@1 36
nothing@1 37 def convert(self, data, audio_data):
nothing@1 38 self.signal = BNode()
nothing@2 39 self.file = URIRef(audio_data['path'].split("/")[-1])
nothing@1 40 self.timeline = BNode()
nothing@1 41 self.interval = BNode()
nothing@1 42 duration = audio_data['n_frames'] / audio_data['f_rate']
nothing@1 43
nothing@1 44 self.graph.add(( self.signal, RDF.type, self.ns['mo']['Signal'] ))
nothing@1 45 self.graph.add(( self.file, RDF.type, self.ns['mo']['AudioFile'] ))
nothing@1 46 self.graph.add(( self.timeline, RDF.type, self.ns['mo']['Timeline'] ))
nothing@1 47 self.graph.add(( self.interval, RDF.type, self.ns['tl']['Interval'] ))
nothing@1 48 self.graph.add(( self.file, self.ns['mo']['encodes'], self.signal ))
nothing@1 49 self.graph.add(( self.signal, self.ns['mo']['sample_rate'], Literal(audio_data['f_rate']) ))
nothing@1 50 self.graph.add(( self.signal, self.ns['mo']['channels'], Literal(audio_data['n_channels']) ))
nothing@1 51 self.graph.add(( self.signal, self.ns['mo']['time'], self.interval ))
nothing@1 52 self.graph.add(( self.interval, self.ns['tl']['duration'], Literal(str(duration), datatype=XSD.duration) ))
nothing@1 53 self.graph.add(( self.interval, self.ns['tl']['timeline'], self.timeline ))
nothing@1 54
nothing@1 55 index = 0
nothing@1 56 for row in data.split("\n"):
nothing@1 57 if row != "":
nothing@1 58 time, freq = row.split(" ")
nothing@1 59 event_id = BNode("event_" + str(index))
nothing@1 60 interval_id = BNode()
nothing@1 61 self.graph.add(( event_id, RDF.type, self.ns['afv']['FundamentalFrequency'] ))
nothing@1 62 self.graph.add(( event_id, self.ns['afo']['value'], Literal(str(float(freq)), datatype=self.ns['sxsd']['hertz.number']) ))
nothing@1 63 self.graph.add(( event_id, self.ns['event']['time'], interval_id ))
nothing@1 64 self.graph.add(( interval_id, self.ns['tl']['at'], Literal(time, datatype=XSD.float) ))
nothing@1 65 self.graph.add(( interval_id, self.ns['tl']['duration'], Literal((256.0/44100.0), datatype=XSD.duration) ))
nothing@1 66 self.graph.add(( interval_id, self.ns['tl']['timeline'], self.timeline ))
nothing@1 67 index += 1
nothing@1 68
nothing@1 69 def get_audio_data(self, path):
nothing@1 70 audio_data = {}
nothing@1 71 wave_read = wave.open(path, 'rb')
nothing@1 72 audio_data['n_channels'] = wave_read.getnchannels()
nothing@1 73 audio_data['n_frames'] = wave_read.getnframes()
nothing@1 74 audio_data['s_width']= wave_read.getsampwidth()
nothing@1 75 audio_data['f_rate'] = wave_read.getframerate()
nothing@1 76 audio_data['path'] = path
nothing@1 77 wave_read.close()
nothing@1 78 return audio_data
nothing@1 79
nothing@1 80 def main():
nothing@1 81 Adc2004Converter().run()
nothing@1 82
nothing@1 83 if __name__ == "__main__":
nothing@1 84 main()