comparison cpack/dml/lib/bl_p2r.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:718306e29690
1 /* Part of DML (Digital Music Laboratory)
2 Copyright 2014-2015 Samer Abdallah, University of London
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 :- module(bl_p2r,[ audio_file/3, audio_link/2, scrape_audio_link/2, has_available_audio/1 ]).
20
21 :- use_module(library(semweb/rdf_db)).
22 :- use_module(library(xmlarchive)).
23 :- use_module(library(xpath)).
24 :- use_module(library(settings)).
25 :- use_module(library(sandbox)).
26 :- use_module(library(fileutils)).
27 :- use_module(library(termutils)).
28 :- use_module(library(rdfutils)).
29 :- use_module(library(insist)).
30 :- use_module(library(memo)).
31 :- use_module(entailment(p2r)).
32 :- use_module(cliopatria(hooks)).
33 :- use_module(library(http/http_client)).
34
35 :- set_prolog_flag(double_quotes,string).
36
37 /*
38 METS to RDF conversion.
39
40 Useful info:
41 0. Each top-level mets:mets element contains several sections.
42 1,4 x dmdSec .. 1 x mdWrap
43 1 x amdSec .. [ N x techMD, {0,1} x rightsMD, {0,1} x sourceMD ]
44 1 x fileSec .. 1-4 x fileGrp .. 1-36 x file
45 1 x structLink .. {1,3} x smLink
46 2-3 x structMap .. 1 x div
47
48 1. Each dmdSec only ever contains one mdWrap element
49 5. each techMD or sourceMD contains exactly 1 mdWrap
50 6. each rightsMD contains an mdRef
51 7. The rightsMD is not interesting to look at.
52 8. dmdSec mdWraps can be empty
53
54 List of all dmdSec tags in dataset
55 blapsi:id3album
56 blapsi:id3artist
57 blapsi:id3comment
58 blapsi:id3songtitle
59 blapsi:id3year
60 dc:description
61 dc:identifier
62 dc:language
63 dc:rights
64 dc:source
65 dc:subject
66 dc:title
67 dc:type
68 dcterms:created
69 dcterms:isPartOf
70 dcterms:spatial
71 dcterms:temporal
72 marcrel:CMP
73 marcrel:CND
74 marcrel:IVE
75 marcrel:IVR
76 marcrel:LYR
77 marcrel:PRF
78 marcrel:RCE
79 marcrel:SPK
80 mods:accessCondition
81 mods:identifier
82 mods:name
83 mods:recordInfo
84 mods:titleInfo
85
86 amdSec tags
87 blapsi:audioObject
88 blapsi:file_bitrate
89 blapsi:file_channels
90 blapsi:file_duration
91 blapsi:file_sample
92 blapsi:resolution
93
94 sourceMD .. blapsi:audioObject
95 blapsi:face
96 blapsi:format
97 blapsi:physicalProperties
98 blapsi:face
99 blapsi:format
100 blapsi:physicalProperties
101 blapsi:primaryIdentifier
102 blapsi:secondaryIdentifier
103 blapsi:primaryIdentifier
104 blapsi:secondaryIdentifier
105
106 would prefer to have mo predicates to foaf:Person resources for these.
107 marcrel:'CMP',dml:composer).
108 marcrel:'CND',dml:conductor).
109 marcrel:'PRF',dml:performer).
110
111 */
112
113 :- setting(audio_root,ground,nothing,'Root directory of audio file collection').
114 :- setting(archive_pattern,atom,'~/lib/datasets/mets/BL_metadata_complete.7z','Pattern to match METS metadata files').
115
116 :- rdf_register_prefix(bldata,'http://sounds.bl.uk/resource/').
117 :- rdf_register_prefix(marcrel,'http://id.loc.gov/vocabulary/relators/').
118 :- rdf_register_prefix(blapsi,'http://sounds.bl.uk/blapsi#').
119 :- rdf_register_prefix(blterms,'http://www.bl.uk/schemas/bibliographic/blterms#').
120
121 % https://code.google.com/p/libarchive/issues/detail?id=328&colspec=ID%20Type%20Status%20Priority%20Milestone%20OpSys%20Owner%20Summary
122
123 :- public import/0.
124 import :- assert_all(bl_p2r).
125
126 rdf(dml:blpage,rdfs:range,foaf:'Document') <== true.
127 rdf(dml:blpage,rdfs:subPropertyOf,foaf:page) <== true.
128 rdf(S,P,O) <== bl_archive_triple(T), once(expand_triple(T,rdf(S,P,O))).
129
130 expand_triple(rdf(SS,PP,OO),rdf(S,P,O)) :-
131 uripattern:pattern_uri(SS,S),
132 uripattern:pattern_uri(PP,P),
133 ( OO=literal(_) -> rdf_global_object(OO,O)
134 ; uripattern:pattern_uri(OO,O)
135 ).
136
137 %% import_directory(+Dir:atom, +Graph:atom) is det.
138 %
139 % Import contents of a given directory into a named RDF graph.
140 % The directory must be a subdirectory of the directory named in the
141 % bl_p2r:audio_root setting (whose value is either =|just(Dir)|= or
142 % =|nothing|=).
143 import_directory(Dir,Graph) :-
144 forall( directory_triple(Dir,T), (once(expand_triple(T,rdf(S,P,O))), rdf_assert(S,P,O,Graph))).
145
146 directory_triple(Dir,T) :-
147 Ext=txt,
148 setting(audio_root,just(AudioRoot)),
149 atom_concat(AudioRoot,'/',Prefix),
150 find_files(under(AudioRoot/Dir),FullPath),
151 atom_concat(Prefix,Path,FullPath),
152 split_path(Path,Loc,Ext),
153 with_stream(Str,open(FullPath,read,Str), ext_loc_stream_triple(Ext,Loc,Str,T)).
154
155
156 %% bl_archive_triple(-T:rdf) is nondet.
157 % This predicate generates triples from the metadata archive files whose names
158 % match the pattern stored in the setting bl_p2r:archive_pattern.
159 bl_archive_triple(T) :-
160 setting(archive_pattern, ArchivePattern),
161 find_files(like(ArchivePattern), Archive),
162 with_archive_stream(Archive, Path, path_triple_stream(Path,T)).
163
164 path_triple_stream(Path,T,S) :-
165 status('Importing ~s',[Path]),
166 catch(( insist(split_path(Path,Loc,Ext)),
167 ext_loc_stream_triple(Ext,Loc,S,T)
168 ), Ex, (nl,print_message(warning,Ex),fail)).
169
170 split_path(Path,Dirs-Base,Ext) :-
171 % split_string(Path,'/','',Parts),
172 atomic_list_concat(Parts,'/',Path),
173 exclude(ignore_dir,Parts,Parts1),
174 append(Dirs,[Name],Parts1),
175 sub_atom(Name,_,3,0,Ext), % NB this assumes three character extension
176 sub_atom(Name,0,_,4,Base).
177
178 %% ext_loc_stream_triple(+Ext:atom, +Loc:pair(list(atom), atom), +S:stream, -T:rdf) is nondet.
179 %
180 % This predicate succeeds once for each RDF triple that can be derived from
181 % an archive stream whose path in the archive Loc=Dirs-Base consist of the directories
182 % Dirs and whose name is Base,'.',Ext. It understands entries with extensions
183 % xml and txt. XML streams are decoded as full METS documents. TXT streams are decoded
184 % as the DMD section of a METS document. Other extensions generate a warning and then fail.
185
186 ext_loc_stream_triple(xml,Dirs-Base,S,T) :- !,
187 insist(load_xmlns(S,Doc)),
188 insist(member(element(mets:mets,_,METS),Doc)),
189 ( insist(get_bl_url(Base,Dirs,URL)),
190 T=rdf(bldata:Base,dml:blpage,URL)
191 ; T=rdf(bldata:Base,rdf:type,mo:'Signal')
192 ; insist(multi,member(element(Tag,_,Content),METS),no_content(mets,Dirs-Base)),
193 mets_triple(Tag,Content,Dirs,Base,T)
194 ).
195
196 ext_loc_stream_triple(txt,Dirs-Base,S,T) :- !,
197 % TXT streams appear to be LATIN-1 encoded, not UTF-8
198 Base\=combined, % exclude combined metadata files
199 set_stream(S,encoding(iso_latin_1)),
200 insist(load_xmlns(S,Doc)),
201 txt_triple(Doc,Dirs,Base,T).
202
203 ext_loc_stream_triple(Ext,Dirs-Base,_,_) :-
204 warning(unrecognised_extension(Ext,Dirs-Base)).
205
206
207 %% get_bl_url(+Name:atom,+Dirs:list(atom),-URL:atom) is det.
208 % Deduces the sounds.bl.uk URL from entry name and directory.
209 get_bl_url(Name,Dirs,URL) :-
210 sub_atom(Name,0,4,_,CatCode),
211 category(CatCode,Category),
212 append(_,[Collection],Dirs),
213 atomic_list_concat([ 'http://sounds.bl.uk',
214 Category,Collection,Name],'/',URL).
215
216 category('021M','Oral-history').
217 category('025M','World-and-traditional-music').
218 category('023M','Jazz-and-popular-music').
219 category('026M','Classical-music').
220 category('028M','Jazz-and-popular-music').
221 category('020A','Classical-music').
222
223 ignore_dir('_Metadata').
224 ignore_dir('_Non-music').
225 ignore_dir('_Audio_Metadata').
226
227 % --------------------- TOP LEVEL STRUCTURE -------------------------------
228
229 unwrap([element(mets:mdWrap,MDAttr,[element(mets:xmlData,_,XMLContent)])],MDAttr,XMLContent).
230
231 % for complete METS documents
232 mets_triple(mets:dmdSec,DMDContent,_,ID,T) :- !,
233 unwrap(DMDContent,MDAttr,XMLContent),
234 member('MDTYPE'='DC',MDAttr),
235 dmd_triple(XMLContent,bldata:ID,T).
236
237 mets_triple(mets:amdSec,AMDContent,_,ID,T) :- !,
238 member(element(Tag,Attr,Content),AMDContent),
239 amd_triple(Tag,Attr,Content,ID,T).
240
241 mets_triple(mets:fileSec,FileSecContent,Dirs,ID,T) :- !,
242 member(element(T1,GAttr,GroupContent),FileSecContent), insist(T1=mets:fileGrp),
243 member(element(T2,FAttr,FileContent),GroupContent), insist(T2=mets:file),
244 \+member('ID'='WEBRESOURCES',GAttr),
245 FileContent=[element(Tag,Attr,Content)],
246 file_triple(Tag,Attr,Content,GAttr-FAttr,Dirs,ID,T).
247
248 mets_triple(mets:metsHdr,_,_,_,_) :- !, fail.
249 mets_triple(mets:structLink,_,_,_,_) :- !, fail.
250 mets_triple(mets:structMap,_,_,_,_) :- !, fail.
251 mets_triple(Tag,_,_,_,_) :- warning(unrecognised_tag(Tag,mets:mets)).
252
253 % for txt, partial XML documents
254 txt_triple(_,_,ID,rdf(bldata:ID,rdf:type,mo:'Signal')).
255 txt_triple(Doc,Dirs,ID,T) :-
256 insist(multi,member(element(Tag,_,Content),Doc),no_content(txt)),
257 txt_tag_triple(Tag,Content,Dirs,ID,T).
258
259 identifier_file_ext(F,F1,mp3) :- sub_atom(F,Bef,_,_,'.mp3'), !, insist(sub_atom(F,0,Bef,_,F1)).
260 identifier_file_ext(F,F1,wav) :- sub_atom(F,Bef,_,0,'.wav'), !, insist(sub_atom(F,0,Bef,_,F1)).
261 identifier_file_ext(F,F1,m4a) :- sub_atom(F,Bef,_,0,'.m4a'), !, insist(sub_atom(F,0,Bef,_,F1)).
262
263 txt_tag_triple(dc:identifier, [F], Dirs, ID, rdf(bldata:ID, bldata:path, literal(Path))) :- !,
264 % NB some of the txt files have the file name written twice. Hence I am going to discard
265 % everything after the first dot. Relies on sub_atom returning matches starting from the beginning
266 ( identifier_file_ext(F,F1,Ext)
267 -> file_name_extension(F1,Ext,Name),
268 atomics_to_string(Dirs,"/",Dir),
269 directory_file_path(Dir,Name,Path)
270 ; print_message(warning,txt_triple_identifier_fail(ID,F)),
271 fail
272 ).
273
274 % !!! MUSICALS only. Should not really have mo:duration in them either...
275 txt_tag_triple(dml:rating,Content, _, ID, rdf(bldata:ID, dml:rating, literal(Content))) :- !.
276 txt_tag_triple(mo:duration,Content, _, ID, rdf(bldata:ID, mo:duration, literal(type(xsd:float,Millis)))) :- !,
277 insist(Content=[Dur],bad_content(Content,mo:duration)),
278 insist(atom_number(Dur,Millis)).
279 txt_tag_triple(blapsi:file_duration,Content, _, ID, rdf(bldata:ID, mo:duration, literal(type(xsd:float,Millis)))) :- !,
280 insist(Content=[Dur],bad_content(Content,blapsi:file_duration)),
281 % insist(parse_duration_millis(Dur,Millis)).
282 ( parse_duration_millis(Dur,Millis) -> true
283 ; warning(bad_duration(ID,Dur))
284 ).
285 txt_tag_triple(Tag,Content,_,ID,T) :- dmd_tag_triple(Tag,Content,bldata:ID,T).
286
287
288 % --------------- Document meta data --------------------------
289
290 dmd_triple(DMD,URI,T) :-
291 member(element(Tag,_,Content),DMD),
292 dmd_tag_triple(Tag,Content,URI,T).
293
294 dmd_tag_triple(dcterms:contributor,Content,ID,T) :- !, dmd_triple(Content,bldata:ID,T).
295 dmd_tag_triple(dc:contributor,Content,ID,T) :- !, dmd_triple(Content,bldata:ID,T).
296 dmd_tag_triple(marcrel:REL,Content,URI,rdf(URI,marcrel:Rel,literal(Lit))) :- !,
297 Content=[Lit],%empty_tag(marcrel:REL,Content)),
298 downcase_atom(REL,Rel).
299 dmd_tag_triple(Tag,Content,URI,rdf(URI,Tag,literal(Lit))) :- keep_tag(Tag), !, Content=[Lit].
300 dmd_tag_triple(Tag,_,_,_) :- ignore_tag(Tag), !, fail.
301 dmd_tag_triple(Tag,_Content,URI,_) :- warning(unrecognised_tag(Tag,dmd,URI)).
302
303 % !!!FIXME - sometimes dates are given in D/M/Y instead of Y-M-D
304 keep_tag(dc:title).
305 keep_tag(dc:description).
306 keep_tag(dc:source).
307 keep_tag(dc:subject).
308 keep_tag(dc:language).
309 keep_tag(dc:created).
310 keep_tag(dcterms:language).
311 keep_tag(dcterms:abstract).
312 keep_tag(dcterms:created).
313 keep_tag(dcterms:spatial).
314 keep_tag(dcterms:temporal).
315 keep_tag(dcterms:extent). % !!!FIXME need to parse this
316 keep_tag(blterms:mechanism).
317 keep_tag(dcterms:isPartOf).
318 keep_tag(blapsi:format).
319
320 ignore_tag(dc:identifier).
321 ignore_tag(blapsi:marker).
322 ignore_tag(dc:rights).
323 ignore_tag(dc:type).
324 ignore_tag(rdf:about).
325 ignore_tag('ARK').
326
327 % ------------------------- ADMINISTRATIVE METADATA SECTION -----------------------------
328
329 amd_triple(mets:sourceMD,_,SMDContent,ID,T) :-
330 insist(unwrap(SMDContent,_,XMLContent),no_xml_content(SMDContent,smd)),
331 atom_concat(ID,'#source',Src),
332 ( T=rdf(bldata:ID,dml:source,bldata:Src)
333 ; insist(multi,member(element(Tag,Attr,Content),XMLContent),no_xml_content(smd)),
334 smd_xml_triple(Tag,Attr,Content,bldata:Src,T)
335 ).
336
337 amd_triple(mets:techMD,Attr,TMDContent,ID,T) :-
338 insist(member('ID'=TMDId,Attr)),
339 unwrap(TMDContent,_,XMLContent),
340 ( T=rdf(bldata:ID/TMDId, mo:sampled_version_of, bldata:ID)
341 % ; T=rdf(bldata:ID/TMDId, dml:annotation,literal(Label)), member('LABEL'=Label,TMDAttr)
342 ; member(element(Tag,_,Content),XMLContent),
343 blapsi_triple(Tag, Content, bldata:ID/TMDId, T)
344 ).
345
346 blapsi_triple(blapsi:Tag, [Text], Signal, rdf(Signal, Pred, literal(Lit))) :-
347 insist(blapsi_info(Tag, Text, Pred, Lit)).
348
349 % ------------ Source ---------------
350
351 smd_xml_triple(blapsi:audioObject,Attr,AOContent,SrcURI,rdf(SrcURI,Pred,literal(Lit))) :- !,
352 ( member(A=Lit,Attr), A\='ID', Pred=bldata:A
353 ; insist(multi,member(element(Tag,Attr1,Content),AOContent),no_content(blapsi:audioObject,AOContent)),
354 ao_tag_info(Tag,Attr1,Content,Pred,Lit)
355 ).
356
357 smd_xml_triple(blapsi:Tag,Content,SrcURI,rdf(SrcURI,blapsi:Tag,literal(Lit))) :- !,
358 insist(Content=[Lit],bad_content(blapsi:Tag,Content,smd_xml_triple)).
359
360 ao_tag_info(blapsi:primaryIdentifier,_,_,_,_) :- !, fail.
361 ao_tag_info(blapsi:secondaryIdentifier,_,_,_,_) :- !, fail.
362 ao_tag_info(blapsi:format,_,Content,blapsi:format,Lit) :- !,
363 insist(Content=[Lit],bad_content(Content,blapsi:format)).
364 ao_tag_info(blapsi:face, Attr, Content, Pred, Lit) :- !,
365 %insist(member('ID'=ID,Attr)),
366 insist(member('label'=Label,Attr)),
367 insist(Content=[],non_empty_content(blapsi:face,Content)),
368 ( fail % Pred=bldata:face_id, Lit=ID IGNORE FOR NOW
369 ; Pred=bldata:face_label, Lit=Label
370 ).
371 ao_tag_info(blapsi:physicalProperties, _, PPContent, Tag, Lit) :- !,
372 insist(multi,member(element(Tag,_,Content),PPContent),no_content(blapsi:physicalProperties)),
373 insist(Content=[Lit],bad_content(Tag,Content)).
374 ao_tag_info(Tag,_,_,_,_) :-
375 warning(unrecognised_tag(Tag,blapsi:audioObject)).
376
377 % identifier_pred('ASR Root ID',asr_root_id).
378 % identifier_pred('Sound Archive accession number',accession_number).
379
380
381
382 % -------------- FILE SECTION ---------------------------
383
384 file_triple(mets:'FLocat',Attr,LocContent,GAttr-FAttr,Dirs,ID,T) :- !,
385 ( member('MIMETYPE'=MimeType,FAttr)
386 -> audio_mimetype(MimeType),
387 insist(member('AMDID'=TMDId1,FAttr)),
388 insist(member('LOCTYPE'='URL',Attr)),
389 insist(member((xlink:href)=Link,Attr)),
390 insist(LocContent=[],non_empty(mets:'FLocat',LocContent)),
391 insist(member('USE'=Use1,FAttr);member('USE'=Use1,GAttr)),
392 % TMDId1 is sometimes "techMDxx digiprovXX" - need to get rid of second word
393 atomic_list_concat([TMDId|_],' ',TMDId1),
394 downcase_atom(Use1,Use),
395 ( T=rdf(bldata:ID/TMDId,dml:mimetype,literal(MimeType))
396 ; file_path_triple(bldata:ID/TMDId,Dirs,Link,T)
397 ; T=rdf(bldata:ID/TMDId,bldata:use,literal(Use))
398 )
399 ; insist(\+member('AMDID'=_,FAttr)),
400 insist(member('ID'=FileID,FAttr)),
401 insist(member('LOCTYPE'='URL',Attr)),
402 insist(member((xlink:href)=Link,Attr)),
403 sub_atom(Link,_,3,0,Ext),
404 audio_extension(Ext),
405 ( T=rdf(bldata:ID/FileID,mo:sampled_version_of,bldata:ID)
406 ; file_path_triple(bldata:ID/FileID,Dirs,Link,T)
407 ; member(element(Tag,_,Content),LocContent),
408 blapsi_triple(Tag,Content,bldata:ID/FileID,T)
409 )
410 ).
411
412 file_triple(mets:'Fcontent',_,_,_,_,_,_) :- !, fail.
413 file_triple(Tag,_,_,_,_,_,_) :- warning(unrecognised_tag(Tag,file)).
414
415 file_path_triple(URI,Dirs,Link,rdf(URI,bldata:path,literal(Path))) :-
416 atomics_to_string(Parts,"/",Link),
417 atomics_to_string(Dirs,"/",Dir),
418 append(_,[Name],Parts),
419 directory_file_path(Dir,Name,Path).
420
421 audio_mimetype('sound/wav').
422 audio_mimetype('sound/wma').
423 audio_mimetype('sound/mp3').
424 audio_mimetype('sound/ogg').
425
426 audio_extension(wav).
427 audio_extension(mp3).
428 audio_extension(wma).
429
430 % ---------------------------- BLAPSI INFO ---------------------------------
431
432 blapsi_info(file_sample, X, mo:sample_rate, type(xsd:float,SampleRate)) :- atom_number(X,SampleRate).
433 blapsi_info(file_resolution, X, mo:bitsPerSample, type(xsd:int,Bits)) :- atom_number(X,Bits).
434 blapsi_info(resolution, X, mo:bitsPerSample, type(xsd:int,Bits)) :- atom_number(X,Bits).
435 blapsi_info(file_channels, X, mo:channels, type(xsd:int,Channels)) :- atom_number(X,Channels).
436 blapsi_info(file_duration, X, mo:duration, type(xsd:float,Millis)) :- parse_duration_millis(X,Millis).
437 blapsi_info(file_bitrate, X, blapsi:file_bitrate, X). % !!! FIXME should be attached to file, not to signal
438 blapsi_info(file_size, X, blapsi:file_size, type(xsd:int,Size)) :- atom_number(X,Size).
439 blapsi_info(file_length, X, blapsi:file_length, type(xsd:int,Size)) :- atom_number(X,Size).
440
441 % --------------------------- SUPPORTING PREDICATES --------------------------
442
443 :- use_module(library(async)).
444 :- public scrape_all/2.
445 :- volatile_memo scrape_all(+options:list,-count:nonneg).
446 scrape_all(Opts,Count) :-
447 option(spacing(Sleep),Opts,1),
448 option(timeout(Timeout),Opts,10),
449 findall( R, rdf(R,dml:blpage,_), Rs),
450 with_progress_stack(map_with_progress(scrape_then_sleep(Sleep,Timeout),Rs,Ss)),
451 exclude(=(ok),Ss, Failures),
452 (Failures=[] -> length(Rs,Count); throw(scrape_errors(Failures))).
453
454 scrape_then_sleep(D,T,R,Status) :-
455 ( audio_link(R,_) -> Status=ok
456 ; memo:reify(bl_p2r:slow(D,call_with_time_limit(T,scrape_audio_link(R,_))),Status),
457 (Status=ex(abort(Reason)) -> throw(abort(Reason)); true)
458 ).
459
460 slow(Delay,Goal) :- call(Goal), sleep(Delay).
461
462 %% has_available_audio(+R:uri) is semidet.
463 %% has_available_audio(-R:uri) is nondet.
464 % True when R is a recording in the BL collection whose audio is
465 % publicly available.
466 has_available_audio(R) :-
467 rdf(R,dml:blpage,_),
468 scrape_audio_link(R,_).
469
470 :- public audio_link/2.
471 audio_link(URI,AudioURL) :-
472 browse(scrape_audio_link(URI,AudioURL)).
473
474 :- public audio_file/3.
475 audio_file(URI,Path,just(mp3)) :-
476 setting(audio_root,just(Root)),
477 ( rdf(URI,bldata:path,literal(RelPath)),
478 rdf(URI,rdf:type,mo:'Signal')
479 ; rdf(URI2,mo:sampled_version_of,URI),
480 rdf(URI2,dml:mimetype,literal('sound/mp3')),
481 rdf(URI2,bldata:path,literal(RelPath))
482 ),
483 atomic_list_concat([Root,RelPath],'/',Path).
484
485 :- volatile_memo scrape_audio_link(+atom,-atom).
486
487 scrape_audio_link(URI,AudioURL) :-
488 rdf(URI,dml:blpage,PageURL),
489 debug(bl_p2r,'Scraping audio link for ~w...',[URI]),
490 atom_concat('http://sounds.bl.uk/',_,PageURL),
491 http_get(PageURL,Doc,[]),
492 xpath(Doc,//li(@class=mainTrack)/a(@id),ID),
493 string_concat("MNT-",Key,ID),
494 string_concat('http://sounds.bl.uk/GT/',Key,AudioURL).
495
496 sandbox:safe_primitive(bl_p2r:audio_link(_,_)).
497 sandbox:safe_primitive(bl_p2r:scrape_audio_link(_,_)).
498
499 xpath(Prop,E,Path,Val) :- xpath(E,Path,I), xpath(I,/self(Prop),Val).
500
501 warning(Term) :- nl, print_message(warning,Term), fail.