diff cpack/dml/lib/bl_p2r.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpack/dml/lib/bl_p2r.pl	Tue Feb 09 21:05:06 2016 +0100
@@ -0,0 +1,501 @@
+/* Part of DML (Digital Music Laboratory)
+	Copyright 2014-2015 Samer Abdallah, University of London
+	 
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License
+	as published by the Free Software Foundation; either version 2
+	of the License, or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this library; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+:- module(bl_p2r,[ audio_file/3, audio_link/2, scrape_audio_link/2, has_available_audio/1 ]).
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(xmlarchive)).
+:- use_module(library(xpath)).
+:- use_module(library(settings)).
+:- use_module(library(sandbox)).
+:- use_module(library(fileutils)).
+:- use_module(library(termutils)).
+:- use_module(library(rdfutils)).
+:- use_module(library(insist)).
+:- use_module(library(memo)).
+:- use_module(entailment(p2r)).
+:- use_module(cliopatria(hooks)).
+:- use_module(library(http/http_client)).
+
+:- set_prolog_flag(double_quotes,string).
+
+/*
+   METS to RDF conversion.
+
+   Useful info:
+   0. Each top-level mets:mets element contains several sections.
+      1,4 x dmdSec     .. 1 x mdWrap
+      1   x amdSec     .. [ N x techMD, {0,1} x rightsMD, {0,1} x sourceMD ]
+      1   x fileSec    .. 1-4 x fileGrp .. 1-36 x file
+      1   x structLink .. {1,3} x smLink
+      2-3 x structMap  .. 1 x div
+
+   1. Each dmdSec only ever contains one mdWrap element
+   5. each techMD or sourceMD contains exactly 1 mdWrap
+   6. each rightsMD contains an mdRef
+   7. The rightsMD is not interesting to look at.
+   8. dmdSec mdWraps can be empty
+
+   List of all dmdSec tags in dataset
+      blapsi:id3album
+      blapsi:id3artist
+      blapsi:id3comment
+      blapsi:id3songtitle
+      blapsi:id3year
+      dc:description
+      dc:identifier
+      dc:language
+      dc:rights
+      dc:source
+      dc:subject
+      dc:title
+      dc:type
+      dcterms:created
+      dcterms:isPartOf
+      dcterms:spatial
+      dcterms:temporal
+      marcrel:CMP
+      marcrel:CND
+      marcrel:IVE
+      marcrel:IVR
+      marcrel:LYR
+      marcrel:PRF
+      marcrel:RCE
+      marcrel:SPK
+      mods:accessCondition
+      mods:identifier
+      mods:name
+      mods:recordInfo
+      mods:titleInfo
+
+   amdSec tags
+      blapsi:audioObject
+      blapsi:file_bitrate
+      blapsi:file_channels
+      blapsi:file_duration
+      blapsi:file_sample
+      blapsi:resolution
+
+   sourceMD .. blapsi:audioObject
+      blapsi:face 
+      blapsi:format 
+      blapsi:physicalProperties 
+         blapsi:face 
+         blapsi:format 
+         blapsi:physicalProperties 
+         blapsi:primaryIdentifier 
+         blapsi:secondaryIdentifier
+      blapsi:primaryIdentifier 
+      blapsi:secondaryIdentifier
+
+   would prefer to have mo predicates to foaf:Person resources for these.
+      marcrel:'CMP',dml:composer).
+      marcrel:'CND',dml:conductor).
+      marcrel:'PRF',dml:performer).
+
+ */
+
+:- setting(audio_root,ground,nothing,'Root directory of audio file collection').
+:- setting(archive_pattern,atom,'~/lib/datasets/mets/BL_metadata_complete.7z','Pattern to match METS metadata files').
+
+:- rdf_register_prefix(bldata,'http://sounds.bl.uk/resource/').
+:- rdf_register_prefix(marcrel,'http://id.loc.gov/vocabulary/relators/').
+:- rdf_register_prefix(blapsi,'http://sounds.bl.uk/blapsi#').
+:- rdf_register_prefix(blterms,'http://www.bl.uk/schemas/bibliographic/blterms#').
+
+% https://code.google.com/p/libarchive/issues/detail?id=328&colspec=ID%20Type%20Status%20Priority%20Milestone%20OpSys%20Owner%20Summary
+
+:- public import/0.
+import :- assert_all(bl_p2r).
+
+rdf(dml:blpage,rdfs:range,foaf:'Document') <== true.
+rdf(dml:blpage,rdfs:subPropertyOf,foaf:page) <== true.
+rdf(S,P,O) <== bl_archive_triple(T), once(expand_triple(T,rdf(S,P,O))).
+
+expand_triple(rdf(SS,PP,OO),rdf(S,P,O)) :-
+   uripattern:pattern_uri(SS,S),
+   uripattern:pattern_uri(PP,P),
+   (  OO=literal(_) -> rdf_global_object(OO,O)
+   ;  uripattern:pattern_uri(OO,O)
+   ).
+
+%% import_directory(+Dir:atom, +Graph:atom) is det.
+%
+%  Import contents of a given directory into a named RDF graph. 
+%  The directory must be a subdirectory of the directory named in the 
+%  bl_p2r:audio_root setting (whose value is either =|just(Dir)|= or 
+%  =|nothing|=).
+import_directory(Dir,Graph) :-
+   forall( directory_triple(Dir,T), (once(expand_triple(T,rdf(S,P,O))), rdf_assert(S,P,O,Graph))).
+
+directory_triple(Dir,T) :-
+   Ext=txt,
+   setting(audio_root,just(AudioRoot)),
+   atom_concat(AudioRoot,'/',Prefix),
+   find_files(under(AudioRoot/Dir),FullPath),
+   atom_concat(Prefix,Path,FullPath),
+   split_path(Path,Loc,Ext),
+   with_stream(Str,open(FullPath,read,Str), ext_loc_stream_triple(Ext,Loc,Str,T)).
+
+
+%% bl_archive_triple(-T:rdf) is nondet.
+%  This predicate generates triples from the metadata archive files whose names
+%  match the pattern stored in the setting bl_p2r:archive_pattern.
+bl_archive_triple(T) :-
+   setting(archive_pattern, ArchivePattern),
+   find_files(like(ArchivePattern), Archive),
+   with_archive_stream(Archive, Path, path_triple_stream(Path,T)).
+
+path_triple_stream(Path,T,S) :- 
+  status('Importing ~s',[Path]),
+  catch(( insist(split_path(Path,Loc,Ext)),
+           ext_loc_stream_triple(Ext,Loc,S,T)
+        ), Ex, (nl,print_message(warning,Ex),fail)).
+
+split_path(Path,Dirs-Base,Ext) :-
+   % split_string(Path,'/','',Parts),
+   atomic_list_concat(Parts,'/',Path),
+   exclude(ignore_dir,Parts,Parts1),
+   append(Dirs,[Name],Parts1),
+   sub_atom(Name,_,3,0,Ext), % NB this assumes three character extension
+   sub_atom(Name,0,_,4,Base). 
+
+%% ext_loc_stream_triple(+Ext:atom, +Loc:pair(list(atom), atom), +S:stream, -T:rdf) is nondet.
+%
+%  This predicate succeeds once for each RDF triple that can be derived from
+%  an archive stream whose path in the archive Loc=Dirs-Base consist of the directories
+%  Dirs and whose name is Base,'.',Ext. It understands entries with extensions
+%  xml and txt. XML streams are decoded as full METS documents. TXT streams are decoded
+%  as the DMD section of a METS document. Other extensions generate a warning and then fail.
+
+ext_loc_stream_triple(xml,Dirs-Base,S,T) :- !,
+   insist(load_xmlns(S,Doc)), 
+   insist(member(element(mets:mets,_,METS),Doc)),
+   (  insist(get_bl_url(Base,Dirs,URL)),
+      T=rdf(bldata:Base,dml:blpage,URL)
+   ;  T=rdf(bldata:Base,rdf:type,mo:'Signal')
+   ;  insist(multi,member(element(Tag,_,Content),METS),no_content(mets,Dirs-Base)),
+      mets_triple(Tag,Content,Dirs,Base,T)
+   ).
+
+ext_loc_stream_triple(txt,Dirs-Base,S,T) :- !,
+   % TXT streams appear to be LATIN-1 encoded, not UTF-8
+   Base\=combined, % exclude combined metadata files
+   set_stream(S,encoding(iso_latin_1)), 
+   insist(load_xmlns(S,Doc)),
+   txt_triple(Doc,Dirs,Base,T).
+
+ext_loc_stream_triple(Ext,Dirs-Base,_,_) :-
+   warning(unrecognised_extension(Ext,Dirs-Base)). 
+
+
+%% get_bl_url(+Name:atom,+Dirs:list(atom),-URL:atom) is det.
+%  Deduces the sounds.bl.uk URL from entry name and directory.
+get_bl_url(Name,Dirs,URL) :-
+   sub_atom(Name,0,4,_,CatCode),
+   category(CatCode,Category),
+   append(_,[Collection],Dirs),
+   atomic_list_concat([ 'http://sounds.bl.uk',
+                        Category,Collection,Name],'/',URL).
+
+category('021M','Oral-history').
+category('025M','World-and-traditional-music').
+category('023M','Jazz-and-popular-music').
+category('026M','Classical-music').
+category('028M','Jazz-and-popular-music').
+category('020A','Classical-music').
+
+ignore_dir('_Metadata').
+ignore_dir('_Non-music').
+ignore_dir('_Audio_Metadata').
+
+% --------------------- TOP LEVEL STRUCTURE -------------------------------
+
+unwrap([element(mets:mdWrap,MDAttr,[element(mets:xmlData,_,XMLContent)])],MDAttr,XMLContent).
+
+% for complete METS documents
+mets_triple(mets:dmdSec,DMDContent,_,ID,T) :- !, 
+   unwrap(DMDContent,MDAttr,XMLContent),
+   member('MDTYPE'='DC',MDAttr),
+   dmd_triple(XMLContent,bldata:ID,T).
+
+mets_triple(mets:amdSec,AMDContent,_,ID,T) :- !,
+   member(element(Tag,Attr,Content),AMDContent),
+   amd_triple(Tag,Attr,Content,ID,T).
+
+mets_triple(mets:fileSec,FileSecContent,Dirs,ID,T) :- !,
+   member(element(T1,GAttr,GroupContent),FileSecContent), insist(T1=mets:fileGrp),
+   member(element(T2,FAttr,FileContent),GroupContent), insist(T2=mets:file),
+   \+member('ID'='WEBRESOURCES',GAttr),
+   FileContent=[element(Tag,Attr,Content)],
+   file_triple(Tag,Attr,Content,GAttr-FAttr,Dirs,ID,T).
+
+mets_triple(mets:metsHdr,_,_,_,_) :- !, fail.
+mets_triple(mets:structLink,_,_,_,_) :- !, fail.
+mets_triple(mets:structMap,_,_,_,_) :- !, fail.
+mets_triple(Tag,_,_,_,_) :- warning(unrecognised_tag(Tag,mets:mets)).
+
+% for txt, partial XML documents
+txt_triple(_,_,ID,rdf(bldata:ID,rdf:type,mo:'Signal')).
+txt_triple(Doc,Dirs,ID,T) :- 
+   insist(multi,member(element(Tag,_,Content),Doc),no_content(txt)),
+   txt_tag_triple(Tag,Content,Dirs,ID,T).
+
+identifier_file_ext(F,F1,mp3) :- sub_atom(F,Bef,_,_,'.mp3'), !, insist(sub_atom(F,0,Bef,_,F1)).
+identifier_file_ext(F,F1,wav) :- sub_atom(F,Bef,_,0,'.wav'), !, insist(sub_atom(F,0,Bef,_,F1)).
+identifier_file_ext(F,F1,m4a) :- sub_atom(F,Bef,_,0,'.m4a'), !, insist(sub_atom(F,0,Bef,_,F1)).
+
+txt_tag_triple(dc:identifier, [F], Dirs, ID, rdf(bldata:ID, bldata:path, literal(Path))) :- !,
+   % NB some of the txt files have the file name written twice. Hence I am going to discard
+   % everything after the first dot. Relies on sub_atom returning matches starting from the beginning
+   (  identifier_file_ext(F,F1,Ext)
+   -> file_name_extension(F1,Ext,Name),
+      atomics_to_string(Dirs,"/",Dir),
+      directory_file_path(Dir,Name,Path)
+   ;  print_message(warning,txt_triple_identifier_fail(ID,F)),
+      fail
+   ).
+
+% !!! MUSICALS only. Should not really have mo:duration in them either...
+txt_tag_triple(dml:rating,Content, _, ID, rdf(bldata:ID, dml:rating, literal(Content))) :- !.
+txt_tag_triple(mo:duration,Content, _, ID, rdf(bldata:ID, mo:duration, literal(type(xsd:float,Millis)))) :- !,
+   insist(Content=[Dur],bad_content(Content,mo:duration)),
+   insist(atom_number(Dur,Millis)).
+txt_tag_triple(blapsi:file_duration,Content, _, ID, rdf(bldata:ID, mo:duration, literal(type(xsd:float,Millis)))) :- !,
+   insist(Content=[Dur],bad_content(Content,blapsi:file_duration)),
+   % insist(parse_duration_millis(Dur,Millis)).
+   (  parse_duration_millis(Dur,Millis) -> true
+   ;  warning(bad_duration(ID,Dur))
+   ).
+txt_tag_triple(Tag,Content,_,ID,T) :- dmd_tag_triple(Tag,Content,bldata:ID,T).
+
+
+% --------------- Document meta data --------------------------
+
+dmd_triple(DMD,URI,T) :-
+   member(element(Tag,_,Content),DMD),
+   dmd_tag_triple(Tag,Content,URI,T).
+
+dmd_tag_triple(dcterms:contributor,Content,ID,T) :- !, dmd_triple(Content,bldata:ID,T).
+dmd_tag_triple(dc:contributor,Content,ID,T) :- !, dmd_triple(Content,bldata:ID,T).
+dmd_tag_triple(marcrel:REL,Content,URI,rdf(URI,marcrel:Rel,literal(Lit))) :- !, 
+   Content=[Lit],%empty_tag(marcrel:REL,Content)),
+   downcase_atom(REL,Rel).
+dmd_tag_triple(Tag,Content,URI,rdf(URI,Tag,literal(Lit))) :- keep_tag(Tag), !, Content=[Lit].
+dmd_tag_triple(Tag,_,_,_) :- ignore_tag(Tag), !, fail.
+dmd_tag_triple(Tag,_Content,URI,_) :- warning(unrecognised_tag(Tag,dmd,URI)).
+
+% !!!FIXME - sometimes dates are given in D/M/Y instead of Y-M-D
+keep_tag(dc:title).
+keep_tag(dc:description).
+keep_tag(dc:source).
+keep_tag(dc:subject).
+keep_tag(dc:language).
+keep_tag(dc:created).
+keep_tag(dcterms:language).
+keep_tag(dcterms:abstract).
+keep_tag(dcterms:created).
+keep_tag(dcterms:spatial).
+keep_tag(dcterms:temporal).
+keep_tag(dcterms:extent). % !!!FIXME need to parse this
+keep_tag(blterms:mechanism).
+keep_tag(dcterms:isPartOf).
+keep_tag(blapsi:format).
+
+ignore_tag(dc:identifier).
+ignore_tag(blapsi:marker).
+ignore_tag(dc:rights).
+ignore_tag(dc:type).
+ignore_tag(rdf:about).
+ignore_tag('ARK').
+
+% ------------------------- ADMINISTRATIVE METADATA SECTION -----------------------------
+
+amd_triple(mets:sourceMD,_,SMDContent,ID,T) :-
+   insist(unwrap(SMDContent,_,XMLContent),no_xml_content(SMDContent,smd)),
+   atom_concat(ID,'#source',Src),
+   (  T=rdf(bldata:ID,dml:source,bldata:Src)
+   ;  insist(multi,member(element(Tag,Attr,Content),XMLContent),no_xml_content(smd)),
+      smd_xml_triple(Tag,Attr,Content,bldata:Src,T)
+   ).
+
+amd_triple(mets:techMD,Attr,TMDContent,ID,T) :-
+   insist(member('ID'=TMDId,Attr)),
+   unwrap(TMDContent,_,XMLContent),
+   (  T=rdf(bldata:ID/TMDId, mo:sampled_version_of, bldata:ID)
+%  ;  T=rdf(bldata:ID/TMDId, dml:annotation,literal(Label)), member('LABEL'=Label,TMDAttr)
+   ;  member(element(Tag,_,Content),XMLContent),
+      blapsi_triple(Tag, Content, bldata:ID/TMDId, T)
+   ).
+
+blapsi_triple(blapsi:Tag, [Text], Signal, rdf(Signal, Pred, literal(Lit))) :-
+   insist(blapsi_info(Tag, Text, Pred, Lit)).
+
+% ------------ Source ---------------
+
+smd_xml_triple(blapsi:audioObject,Attr,AOContent,SrcURI,rdf(SrcURI,Pred,literal(Lit))) :- !,
+   (  member(A=Lit,Attr), A\='ID', Pred=bldata:A
+   ;  insist(multi,member(element(Tag,Attr1,Content),AOContent),no_content(blapsi:audioObject,AOContent)),
+      ao_tag_info(Tag,Attr1,Content,Pred,Lit)
+   ).
+
+smd_xml_triple(blapsi:Tag,Content,SrcURI,rdf(SrcURI,blapsi:Tag,literal(Lit))) :- !,
+   insist(Content=[Lit],bad_content(blapsi:Tag,Content,smd_xml_triple)).
+
+ao_tag_info(blapsi:primaryIdentifier,_,_,_,_) :- !, fail.
+ao_tag_info(blapsi:secondaryIdentifier,_,_,_,_) :- !, fail.
+ao_tag_info(blapsi:format,_,Content,blapsi:format,Lit) :- !, 
+   insist(Content=[Lit],bad_content(Content,blapsi:format)).
+ao_tag_info(blapsi:face, Attr, Content, Pred, Lit) :- !,
+   %insist(member('ID'=ID,Attr)),
+   insist(member('label'=Label,Attr)),
+   insist(Content=[],non_empty_content(blapsi:face,Content)),
+   (  fail % Pred=bldata:face_id, Lit=ID    IGNORE FOR NOW
+   ;  Pred=bldata:face_label, Lit=Label
+   ).
+ao_tag_info(blapsi:physicalProperties, _, PPContent, Tag, Lit) :- !,
+   insist(multi,member(element(Tag,_,Content),PPContent),no_content(blapsi:physicalProperties)),
+   insist(Content=[Lit],bad_content(Tag,Content)).
+ao_tag_info(Tag,_,_,_,_) :- 
+   warning(unrecognised_tag(Tag,blapsi:audioObject)).
+
+% identifier_pred('ASR Root ID',asr_root_id).
+% identifier_pred('Sound Archive accession number',accession_number).
+
+
+
+% -------------- FILE SECTION ---------------------------
+
+file_triple(mets:'FLocat',Attr,LocContent,GAttr-FAttr,Dirs,ID,T) :- !,
+   (  member('MIMETYPE'=MimeType,FAttr)
+   -> audio_mimetype(MimeType),
+      insist(member('AMDID'=TMDId1,FAttr)), 
+      insist(member('LOCTYPE'='URL',Attr)),
+      insist(member((xlink:href)=Link,Attr)),
+      insist(LocContent=[],non_empty(mets:'FLocat',LocContent)),
+      insist(member('USE'=Use1,FAttr);member('USE'=Use1,GAttr)),
+      % TMDId1 is sometimes "techMDxx digiprovXX" - need to get rid of second word
+      atomic_list_concat([TMDId|_],' ',TMDId1),
+      downcase_atom(Use1,Use),
+      (  T=rdf(bldata:ID/TMDId,dml:mimetype,literal(MimeType))
+      ;  file_path_triple(bldata:ID/TMDId,Dirs,Link,T)
+      ;  T=rdf(bldata:ID/TMDId,bldata:use,literal(Use))
+      )
+   ;  insist(\+member('AMDID'=_,FAttr)),
+      insist(member('ID'=FileID,FAttr)),
+      insist(member('LOCTYPE'='URL',Attr)),
+      insist(member((xlink:href)=Link,Attr)),
+      sub_atom(Link,_,3,0,Ext),
+      audio_extension(Ext),
+      (  T=rdf(bldata:ID/FileID,mo:sampled_version_of,bldata:ID)
+      ;  file_path_triple(bldata:ID/FileID,Dirs,Link,T)
+      ;  member(element(Tag,_,Content),LocContent),
+         blapsi_triple(Tag,Content,bldata:ID/FileID,T)
+      )
+   ).
+
+file_triple(mets:'Fcontent',_,_,_,_,_,_) :- !, fail.
+file_triple(Tag,_,_,_,_,_,_) :- warning(unrecognised_tag(Tag,file)).
+
+file_path_triple(URI,Dirs,Link,rdf(URI,bldata:path,literal(Path))) :-
+   atomics_to_string(Parts,"/",Link),
+   atomics_to_string(Dirs,"/",Dir),
+   append(_,[Name],Parts),
+   directory_file_path(Dir,Name,Path).
+
+audio_mimetype('sound/wav').
+audio_mimetype('sound/wma').
+audio_mimetype('sound/mp3').
+audio_mimetype('sound/ogg').
+
+audio_extension(wav).
+audio_extension(mp3).
+audio_extension(wma).
+
+% ---------------------------- BLAPSI INFO ---------------------------------
+
+blapsi_info(file_sample,     X, mo:sample_rate,   type(xsd:float,SampleRate)) :- atom_number(X,SampleRate).
+blapsi_info(file_resolution, X, mo:bitsPerSample, type(xsd:int,Bits)) :- atom_number(X,Bits).
+blapsi_info(resolution,      X, mo:bitsPerSample, type(xsd:int,Bits)) :- atom_number(X,Bits).
+blapsi_info(file_channels,   X, mo:channels,      type(xsd:int,Channels)) :- atom_number(X,Channels).
+blapsi_info(file_duration,   X, mo:duration,      type(xsd:float,Millis)) :- parse_duration_millis(X,Millis).
+blapsi_info(file_bitrate,    X, blapsi:file_bitrate, X). % !!! FIXME should be attached to file, not to signal
+blapsi_info(file_size,       X, blapsi:file_size, type(xsd:int,Size)) :- atom_number(X,Size).
+blapsi_info(file_length,     X, blapsi:file_length, type(xsd:int,Size)) :- atom_number(X,Size).
+
+% --------------------------- SUPPORTING PREDICATES --------------------------
+
+:- use_module(library(async)).
+:- public scrape_all/2.
+:- volatile_memo scrape_all(+options:list,-count:nonneg).
+scrape_all(Opts,Count) :-
+   option(spacing(Sleep),Opts,1),
+   option(timeout(Timeout),Opts,10),
+   findall( R, rdf(R,dml:blpage,_), Rs),
+   with_progress_stack(map_with_progress(scrape_then_sleep(Sleep,Timeout),Rs,Ss)),
+   exclude(=(ok),Ss, Failures),
+   (Failures=[] -> length(Rs,Count); throw(scrape_errors(Failures))).
+
+scrape_then_sleep(D,T,R,Status) :- 
+   (  audio_link(R,_) -> Status=ok
+   ;  memo:reify(bl_p2r:slow(D,call_with_time_limit(T,scrape_audio_link(R,_))),Status),
+      (Status=ex(abort(Reason)) -> throw(abort(Reason)); true)
+   ).
+
+slow(Delay,Goal) :- call(Goal), sleep(Delay).
+
+%% has_available_audio(+R:uri) is semidet.
+%% has_available_audio(-R:uri) is nondet.
+%  True when R is a recording in the BL collection whose audio is 
+%  publicly available.
+has_available_audio(R) :-
+   rdf(R,dml:blpage,_),
+   scrape_audio_link(R,_).
+
+:- public audio_link/2.
+audio_link(URI,AudioURL) :- 
+   browse(scrape_audio_link(URI,AudioURL)).
+
+:- public audio_file/3.
+audio_file(URI,Path,just(mp3)) :-
+   setting(audio_root,just(Root)),
+   (  rdf(URI,bldata:path,literal(RelPath)),
+      rdf(URI,rdf:type,mo:'Signal')
+   ;  rdf(URI2,mo:sampled_version_of,URI),
+      rdf(URI2,dml:mimetype,literal('sound/mp3')),
+      rdf(URI2,bldata:path,literal(RelPath))
+   ),
+   atomic_list_concat([Root,RelPath],'/',Path).
+
+:- volatile_memo scrape_audio_link(+atom,-atom).
+
+scrape_audio_link(URI,AudioURL) :-
+   rdf(URI,dml:blpage,PageURL),
+   debug(bl_p2r,'Scraping audio link for ~w...',[URI]),
+   atom_concat('http://sounds.bl.uk/',_,PageURL),
+   http_get(PageURL,Doc,[]),
+   xpath(Doc,//li(@class=mainTrack)/a(@id),ID),
+   string_concat("MNT-",Key,ID),
+   string_concat('http://sounds.bl.uk/GT/',Key,AudioURL).
+
+sandbox:safe_primitive(bl_p2r:audio_link(_,_)).
+sandbox:safe_primitive(bl_p2r:scrape_audio_link(_,_)).
+ 
+xpath(Prop,E,Path,Val) :- xpath(E,Path,I), xpath(I,/self(Prop),Val).
+
+warning(Term) :- nl, print_message(warning,Term), fail.