mir-rdf-datasets: adc2004/adc2004.py annotate

annotate adc2004/adc2004.py @ 2:86aed1f351e3 tip

uriref for adc2004 audio file ids

author	alo <nothing@tehis.net>
date	Wed, 05 Apr 2017 17:51:18 +0100
parents	ef28c91b6bc4
children

rev	line source
nothing@1	1 from os.path import join
nothing@1	2 from rdflib import Graph, BNode, Namespace, RDF, RDFS, Literal, URIRef, XSD, OWL
nothing@1	3 import glob, wave
nothing@1	4
nothing@1	5 class Adc2004Converter:
nothing@1	6 def __init__(self):
nothing@1	7 self.data_dir = "./data"
nothing@1	8 self.destination = "./rdf"
nothing@1	9
nothing@1	10 def run(self):
nothing@1	11 for path in glob.glob(self.data_dir + "/*REF.txt"):
nothing@1	12 self.createGraph()
nothing@1	13 with open(path, "r") as adc_file:
nothing@1	14 data = adc_file.read()
nothing@1	15 adc_file.close()
nothing@1	16 audio_data = self.get_audio_data(path.replace("REF.txt", ".wav"))
nothing@1	17 self.convert(data, audio_data)
nothing@1	18 write_path = self.destination + "/" + path.split("/")[-1].split(".")[0] + ".n3"
nothing@1	19 self.graph.serialize(write_path, format="n3")
nothing@1	20
nothing@1	21 def bindNamespaces(self):
nothing@1	22 self.ns = {
nothing@1	23 'afv': Namespace("https://w3id.org/afo/vocab/1.1#"),
nothing@1	24 'afo': Namespace("https://w3id.org/afo/onto/1.1#"),
nothing@1	25 'tl': Namespace("http://purl.org/NET/c4dm/timeline.owl#"),
nothing@1	26 'event': Namespace("http://purl.org/NET/c4dm/event.owl#"),
nothing@1	27 'mo': Namespace("http://purl.org/ontology/mo/"),
nothing@1	28 'sxsd': Namespace("https://www.w3.org/TR/speech-synthesis11/synthesis-nonamespace.xsd#")
nothing@1	29 }
nothing@1	30 for key in self.ns:
nothing@1	31 self.graph.bind(key, self.ns[key])
nothing@1	32
nothing@1	33 def createGraph(self):
nothing@1	34 self.graph = Graph()
nothing@1	35 self.bindNamespaces()
nothing@1	36
nothing@1	37 def convert(self, data, audio_data):
nothing@1	38 self.signal = BNode()
nothing@2	39 self.file = URIRef(audio_data['path'].split("/")[-1])
nothing@1	40 self.timeline = BNode()
nothing@1	41 self.interval = BNode()
nothing@1	42 duration = audio_data['n_frames'] / audio_data['f_rate']
nothing@1	43
nothing@1	44 self.graph.add(( self.signal, RDF.type, self.ns['mo']['Signal'] ))
nothing@1	45 self.graph.add(( self.file, RDF.type, self.ns['mo']['AudioFile'] ))
nothing@1	46 self.graph.add(( self.timeline, RDF.type, self.ns['mo']['Timeline'] ))
nothing@1	47 self.graph.add(( self.interval, RDF.type, self.ns['tl']['Interval'] ))
nothing@1	48 self.graph.add(( self.file, self.ns['mo']['encodes'], self.signal ))
nothing@1	49 self.graph.add(( self.signal, self.ns['mo']['sample_rate'], Literal(audio_data['f_rate']) ))
nothing@1	50 self.graph.add(( self.signal, self.ns['mo']['channels'], Literal(audio_data['n_channels']) ))
nothing@1	51 self.graph.add(( self.signal, self.ns['mo']['time'], self.interval ))
nothing@1	52 self.graph.add(( self.interval, self.ns['tl']['duration'], Literal(str(duration), datatype=XSD.duration) ))
nothing@1	53 self.graph.add(( self.interval, self.ns['tl']['timeline'], self.timeline ))
nothing@1	54
nothing@1	55 index = 0
nothing@1	56 for row in data.split("\n"):
nothing@1	57 if row != "":
nothing@1	58 time, freq = row.split(" ")
nothing@1	59 event_id = BNode("event_" + str(index))
nothing@1	60 interval_id = BNode()
nothing@1	61 self.graph.add(( event_id, RDF.type, self.ns['afv']['FundamentalFrequency'] ))
nothing@1	62 self.graph.add(( event_id, self.ns['afo']['value'], Literal(str(float(freq)), datatype=self.ns['sxsd']['hertz.number']) ))
nothing@1	63 self.graph.add(( event_id, self.ns['event']['time'], interval_id ))
nothing@1	64 self.graph.add(( interval_id, self.ns['tl']['at'], Literal(time, datatype=XSD.float) ))
nothing@1	65 self.graph.add(( interval_id, self.ns['tl']['duration'], Literal((256.0/44100.0), datatype=XSD.duration) ))
nothing@1	66 self.graph.add(( interval_id, self.ns['tl']['timeline'], self.timeline ))
nothing@1	67 index += 1
nothing@1	68
nothing@1	69 def get_audio_data(self, path):
nothing@1	70 audio_data = {}
nothing@1	71 wave_read = wave.open(path, 'rb')
nothing@1	72 audio_data['n_channels'] = wave_read.getnchannels()
nothing@1	73 audio_data['n_frames'] = wave_read.getnframes()
nothing@1	74 audio_data['s_width']= wave_read.getsampwidth()
nothing@1	75 audio_data['f_rate'] = wave_read.getframerate()
nothing@1	76 audio_data['path'] = path
nothing@1	77 wave_read.close()
nothing@1	78 return audio_data
nothing@1	79
nothing@1	80 def main():
nothing@1	81 Adc2004Converter().run()
nothing@1	82
nothing@1	83 if __name__ == "__main__":
nothing@1	84 main()

Mercurial > hg > mir-rdf-datasets

annotate adc2004/adc2004.py @ 2:86aed1f351e3 tip