Mercurial > hg > musicweb-iswc2016
view musicweb.tex @ 13:ebb555f93f05
started MusicWeb section
author | alo |
---|---|
date | Fri, 29 Apr 2016 23:36:36 +0100 |
parents | ac5876e99049 |
children | 26c3962ac371 |
line wrap: on
line source
\documentclass{llncs} \usepackage{graphicx} \usepackage{amsmath,cite} \usepackage{pifont} \usepackage{listings} \usepackage{courier} \usepackage{adjustbox} \usepackage{url} \usepackage[ngerman,english]{babel} \usepackage[utf8]{inputenc} \newcommand{\cmark}{\fontsize{14}{14}\textbullet\selectfont} \usepackage[usenames,dvipsnames]{color} \lstdefinelanguage{sparql} { morekeywords={SELECT, WHERE, a, COUNT, GROUP, ORDER, BY, HAVING, as, DISTINCT, OPTIONAL}, sensitive=true, morecomment=[l][\color{ForestGreen}\bfseries\selectfont]{\#\ }, moredelim=[s][\color{ForestGreen}\bfseries\selectfont]{?}{\ }, %variable moredelim=[s][\color{NavyBlue}\bfseries\selectfont]{@}{\ }, %prefix moredelim=[s][\color{OrangeRed}\bfseries\selectfont]{<}{>}, %red URIs morestring=[b][\color{ForestGreen}]\", %green strings morestring=[b]", } \lstdefinestyle{sparql}{ language=sparql, basicstyle=\footnotesize\ttfamily\bfseries, % basicstyle=\ttfamily, keywordstyle=\color{blue}\bfseries, identifierstyle=, commentstyle=\color{ForestGreen}\bfseries, stringstyle=\color{BlueGreen}\ttfamily, showstringspaces=false, emph={context},emphstyle=\color{BrickRed}\bfseries, frame = lines, %aboveskip = 15pt, %framexbottommargin = 5pt, %framextopmargin = 5pt, aboveskip = 10pt, framexbottommargin = 2pt, framextopmargin = 2pt, captionpos=b, escapechar=~, breaklines=true, breakatwhitespace=true, breakindent=20pt, xleftmargin=5pt, xrightmargin=5pt, basewidth={0.55em,0.45em}, %basewidth={0.50em,0.6em}, fontadjust=true, % lineskip={-1.5pt}, % columns=[c]spaceflexible, %make the long artist URI fit %float = h!, } \renewcommand{\lstlistingname}{{\bf Listing}} \lstdefinelanguage{ttl} { morekeywords={a}, sensitive=true, % morecomment=[l][\color{ForestGreen}\bfseries\fontsize{10}{2}\selectfont]{\#\ }, % moredelim=[s][\color{ForestGreen}\bfseries\fontsize{10}{2}\selectfont]{\#\ }{\ }, % moredelim=[s][\color{NavyBlue}\bfseries\fontsize{10}{2}\selectfont]{@}{\ }, %prefix % moredelim=[s][\color{OrangeRed}\bfseries\fontsize{10}{2}\selectfont]{<}{>}, %red URIs morecomment=[l][\color{ForestGreen}\ttfamily\bfseries\selectfont]{\#\ }, moredelim=[s][\color{NavyBlue}\ttfamily\bfseries\selectfont]{@}{\ }, %prefix moredelim=[s][\color{OrangeRed}\ttfamily\bfseries\selectfont]{<}{>}, %red URIs morestring=[b][\color{ForestGreen}]\", %green strings morestring=[b]", } \lstdefinestyle{ttl}{ language=ttl, basicstyle=\footnotesize\ttfamily\bfseries, keywordstyle=\color{blue}\bfseries, identifierstyle=, commentstyle=\color{ForestGreen}\bfseries, stringstyle=\color{BlueGreen}\bfseries, showstringspaces=false, emph={context},emphstyle=\color{BrickRed}\bfseries, frame = lines, framexbottommargin = 5pt, framextopmargin = 5pt, captionpos=b, escapechar=~, breaklines=true, breakatwhitespace=true, breakindent=20pt, aboveskip = 15pt, xleftmargin=5pt, xrightmargin=5pt, basewidth={0.55em,0.45em}, % basewidth={0.50em,0.6em}, fontadjust=true, lineskip={-1.5pt}, % columns=[c]spaceflexible, %make the long artist URI fit % float = h!, } \lstdefinelanguage{json} { sensitive=true, %morecomment=[l]{@}, moredelim=[s][\color{ForestGreen}]{?}{\ }, %green variables moredelim=[s][\color{OrangeRed}\fontsize{7}{2}\selectfont]{<}{>}, %red URIs morestring=[b][\color{Gray}]\", morestring=[b]", } \lstdefinestyle{json}{ language=json, keywords={type,context}, basicstyle=\fontsize{7}{8}\bfseries\ttfamily, keywordstyle=\color{blue}\bfseries, identifierstyle=, commentstyle=\color{Gray}\bfseries, stringstyle=\color{OliveGreen}\ttfamily, showstringspaces=false, emph={context},emphstyle=\color{BrickRed}\bfseries, frame = lines, framexbottommargin = 5pt, framextopmargin = 5pt, tabsize=4, captionpos=b, escapechar=~, breaklines=false, xleftmargin=5pt, xrightmargin=5pt, basewidth={0.50em,0.45em}, %basewidth={0.50em,0.6em}, fontadjust=true, columns=[c]spaceflexible, %make the long artist URI fit float = ht, } \renewcommand{\labelitemi}{$\bullet$} \begin{document} \mainmatter \title{MusicWeb: an open linked semantic platform for music metadata} \author{Mariano Mora-Mcginity \and Alo Allik \and Gy\"orgy Fazekas \and Mark Sandler } % \institute{Queen Mary University of London, \\ \email{\{m.mora-mcginity, a.allik, g.fazekas, mark.sandler\}@qmul.ac.uk}} \maketitle \begin{abstract} % MusicWeb is a web site that provides users a browsing, searching and linking platform of music artist and group information by integrating open linked semantic metadata from various Semantic Web, music recommendation and social media data sources, including DBpedia.org, sameas.org, MusicBrainz, the Music Ontology, Last.FM, Youtube, and Echonest. The front portal includes suggested links to selected artists and a search functionality from where users can navigate to individual artists pages. Each artist page contains a biography, links to online audio and a video player with a side menu displaying a selection of Youtube videos. Further it provides lists of YAGO categories linking each artist to other artists by various commonalities such as style, geographical location, instrumentation, record label as well as more obscure categories, for example, artists who have received the same award, have shared the same fate, or belonged to the same organisation or religion. The artist connections are further enhanced by thematic analysis of journal articles and blog posts as well as content-based music information retrieval similarity measures. This paper presents MusicWeb, a novel platform for linking music artists within a web-based application for discovering connections between them. MusicWeb provides a browsing experience using connections that are either extra-musical or tangential to music, such as the artists' political affiliation or social influence, or intra-musical, such as the artists' main instrument or most favoured musical key. The platform integrates open linked semantic metadata from various Semantic Web, music recommendation and social media data sources including DBpedia.org, sameas.org, MusicBrainz, the Music Ontology, Last.FM and Youtube as well as content-derived information. The front portal includes suggested links to selected artists and a search functionality from where users can navigate to individual artists pages. Each artist page contains a biography and links to online audio and a video resources. Connections are made using YAGO categories linking artist by various commonalities such as style, geographical location, instrumentation, record label as well as more obscure categories, for instance, artists who have received the same award, have shared the same fate, or belonged to the same organisation or religion. These connections are further enhanced by thematic analysis of journal articles and blog posts as well as content-based similarity measures focussing on high level musical categories. \keywords{Semantic Web, Linked Open Data, music metadata, semantic audio analysis, music information retrieval } \end{abstract} \section{Introduction}\label{sec:introduction} In recent years we have witnessed an explosion of information, a consequence of millions of users producing and consuming web resources. Researchers and industry have recognised the potential of this data, and have endeavoured to develop methods to handle such a vast amount of information: to understand and manage it, to transform into knowledge. Multimedia content providers have devoted a lot of energy to analysing consumer preference, in an effort to offer customised user experiences. Music stream services, for instance, carry out extensive analysis trying to identify patterns in user's listening habits, and researchers are striving to refine multimedia recommendation algorithms. There are two main approaches to music recommendation\cite{Song2012}: the first is known as \emph{collaborative filtering}\cite{Su2009}, which recommends music items based on the choices of similar users. The second model is based on audio content analysis, or \emph{music information retrieval}. The task here is to extract low to high-level audio features such as tempo, key, metric structure, melodic and harmonic sequences, instrument recognition and song segmentation, which are then used to measure music similarity\cite{Aucoutourier2002}, to carry out genre classification or to identify the mood of the song\cite{Kim2010}. Music discovery websites such as Last.fm\footnote{http://www.last.fm}, Allmusic\footnote{http://www.allmusic.com} or Pandora\footnote{http://www.pandora.com} have successfully developed hybrid systems which combine both approaches. There are, however, limitations in both approaches to music recommendation. Most users participating in (or whose data is used to analyse) collaborative filtering listen to a very small percentage of the music available, the so called ``short-tail'', whereas the much larger ``long-tail'' remains mainly unknown\cite{Celma2010}. These systems will show a bias towards music that is already consumed by many listeners. Suggesting already popular music will increase the likelihood of it being recommended to new users, thus creating a \emph{rich-club phenomenon}\cite{Zhou2004} or what is known as \emph{cumulative advantage}. Also, content analysis of audio features is mainly applied to songs: systems can recommend similar tracks, but generally know nothing about similar artists. Many music listeners follow artists because of their style and would be interested in music from similar artists. It is very hard to pinpoint what exactly makes two artists ``similar'': very often notions of similarity are based on social and cultural issues, rather than a precise definition of style. To many music lovers discovering new music, or music they weren't aware of, is an integral part of enjoying a musical experience, and appreciate expanding their musical taste. Lee and Price\cite{Lee2015} identify seven different \emph{personas} which tipify music service consumption. Two such personas, for instance, the ``active curator'' and the ``music epicurean'' characteristically spend a long time hunting for new music, whereas the ``wanderer'' enjoys the discovery process itself, trying out new things with an open mind. Automatic musical discovery is a very challenging problem\cite{Jennings2007}. There are many different ways in which people are attracted to new artists: word of mouth, their network of friends, music magazines or blogs, songs heard in a movie or a T.V. commercial, they might be interested in a musician who has played with another artist or been mentioned as an influence, etc. The route from listening to one artist and discovering a new one would sometimes seem very disconcerting were it to be drawn on paper. A listener is not so much following a map as exploring new territory, with many possible forks and shortcuts. Music discovery systems generally disregard this kind of information, often because it is very nuanced and difficult to parse and interpret. All these sources of information are in fact music metadata, data about the music data itself. Pachet identifies three types of musical metadata \cite{Pachet2005}: \begin{enumerate} \item Editorial metadata: information that is provided manually by authoritative experts. There is a wide range of potential producers of this kind of data, from record labels to collaborative schemes, as well as different kinds of data, from which musician played in which song to tour info, to artists' biography. \item Cultural metadata: information which is produced by the environment or culture. This is data that is not explicitly entered into some information system, but rather is contained, and must be extracted from, other information sources, such as user trends, google searches, articles and magazines, word associations in blogs, etc. \item Acoustic metadata: data extracted from audio files using music information retrieval methods. \end{enumerate} MusicWeb is an application which offers the user the possibility of exploring editorial, cultural and musical links between artists. It gathers, extracts and manages musical metadata from many different sources and connects them in informative ways. This paper deals with the different ways in which musicweb collects these resources and shapes them into high-level information. We will first review various knowledege-based web resources available to musicweb. We will then introduce the application itself and detail the architecture to analyse and extract data. Before the final conclusions and discussion of future work we will analyse the experience of interfacing with the application and how users can explore and discover new musical paths. \section{Background}\label{sec:background} \begin{itemize} \item related work \item very brief intro to the role of music related data sources on the web and what they are \end{itemize} \section{MusicWeb architecture} MusicWeb provides a browsing experience using connections that are either extra-musical or tangential to music, such as the artists' political affiliation or social influence, or intra-musical, such as the artists' main instrument or most favoured musical key. The platform integrates open linked semantic metadata from various Semantic Web, music recommendation and social media data sources as well as content-derived information. The front portal includes suggested links to selected artists and a search functionality from where users can navigate to individual artists pages. Each artist page contains a biography and links to online audio and a video resources. Connections are made using YAGO categories linking artist by various commonalities such as style, geographical location, instrumentation, record label as well as more obscure categories, for instance, artists who have received the same award, have shared the same fate, or belonged to the same organisation or religion. These connections are further enhanced by thematic analysis of journal articles and blog posts as well as content-based similarity measures focussing on high level musical categories. \begin{figure}[!ht] \centering \includegraphics[scale=0.5]{graphics/architecture.pdf}%\vspace{-5pt} \caption{MusicWeb architecture}\vspace{-10pt} \label{fig:layers} \end{figure} The MusicWeb API uses a number of LOD resources and Semantic Web ontologies to process and aggregate information about artists: \begin{itemize} \item[] \textbf{Musicbrainz} is an online, open, crowd-sourced music encyclopedia, that provides reliable and unambiguous identifiers for entities in music publishing metadata, including artists, releases, recordings, performances, etc. Besides the identifiers, which facilitate artist linking, the artist search functionality of MusicWeb relies on the Web services provided by MusicBrainz. \item[] \textbf{DBPedia} is a crowd-sourced community effort to extract structured information from Wikipedia and make it available on the Web. MusicWeb constructs the majority of an artist profile from this resource, including the biography and most of the linking categories to other artists. \item[] \textbf{Sameas.org} manages URI co-references on Web of Data, which is useful when a MusicBrainz artist identifier needs to be associated with the equivalent DBpedia resource. \item[] \textbf{Youtube} API is used to query associated video content for the artist panel. \item[] \textbf{Echonest} was a music metadata and information retrieval platform for developers and media companies, which has since been integrated into Spotify. The Echonest API is used for recommendations in MusicWeb. \item[] \textbf{Last.fm} is an online music social network and recommender system that collects information about users listeing habits and makes available crowd-sourced tagging data through an API. MusicWeb uses Last.fm recommendation engine to enrich the user experience. \item[] \textbf{YAGO} is a semantic knowledge base that collates information and structure from Wikipedia, WordNet and GeoNames with high accuracy\cite{Suchanek:WWW:2007}. The ontology makes use of the categories defined in Wikipedia as a principle for semantic linking of entities, while exploiting the clean taxonomy of concepts from WordNet. \item[] \textbf{the Music Ontology} \end{itemize} User search request accesses MusicBrainz search services that return a list of MusicBrainz artist identifiers. The MusicWeb API architecture relies on sameas.org co-references to associate these identifiers to DBpedia artist links. %% - Brief description of what it is and what it does %% - Architecture (with a nice diagram) [Alo, can you make this in Omnigraffle? I can then adjust/refine] %% - More details about individual components we use (Yago, musicbrainz, sameas, dbpedia etc.) %% - Brief intro to components we developed for artist similarity (just to bridge to Section 4) \section{Artist similarity} \begin{enumerate} \item Socio-cultural linkage (using linked data) \item Artist similarity by NLP [needs a better subtitle] : MUSIC (picture of interface) \begin{itemize} \item Semantic analysis\cite{Landauer1998} \item Topic modeling\cite{Blei2012} \item Entity recognition \item Hierarchical bayesian modeling \item Authors, journals, keywords, tags \end{itemize} \item Artist similarity by features [i can write this part] \end{enumerate} \section{Content-based information retrieval}\label{sec:mir} \section{Discussion}\label{sec:discussion} \section{Conclusions}\label{sec:conclusions} % % ---- Bibliography ---- % \vspace{-1em}\begin{thebibliography}{5} % \bibitem{Song2012} Y.~Song, S.~Dixon and M.~Pearce. \newblock A survey of music recommendation systems and future perspectives \newblock In {\em Proceedings of the 9th International Symposium on Computer Music Modelling and Retrieval}, 2012. \bibitem{Su2009} X.~Su and T. M. ~Khoshgoftaar. \newblock A Survey of Collaborative Filtering Techniques. \newblock In {\em Advances in Artificial Intelligence,(Section 3):1–19}, 2009. \bibitem{Aucoutourier2002} J. J.~Aucouturier and F~Pachet. \newblock Music Similarity Measures: What is the Use. \newblock In {\em Proceedings of the ISMIR, pages 157–163}, 2002. \bibitem{Kim2010} Y.E.~Kim, E.M.~Schmidt, R.~Migneco, B.G.~Morton, P.~Richardson, J.~Scott, J.A.~Speck and D.~Turnbull. \newblock Music Emotion Recognition: A State of the Art Review. \newblock In {\em Proc. of the 11th Intl. Society for Music Information Retrieval (ISMIR) Conf}, 2010. \bibitem{Celma2010} \`O.~Celma \newblock Music Recommendation and Discovery:The Long Tail, Long Fail, and Long Play in the Digital Music Space. \newblock Springer Verlag, Heidelberg, 2010. \bibitem{Zhou2004} S.~Zhou and R. J.~Mondrag\'on \newblock The rich-club phenomenon in the Internet topology \newblock In {\em Communications Letters, IEEE}, 2004 \bibitem{Lee2015} J. H.~Lee and R.~Price \newblock Understanding users of commercial music services through personas: design implications. \newblock In {\em Proceedings of the 16th ISMIR Conference}, M\'alaga, Spain, 2015 \bibitem{Jennings2007} D.~Jennings. \newblock Net, Blogs and Rock ’n’ Rolls: How Digital Discovery Works and What It Means for Consumers. \newblock Nicholas Brealey Pub., 2007 \bibitem{Pachet2005} F.~Pachet \newblock Knowledge management and musical metadata. \newblock In {\em Encyclopedia of Knowledge Management}, Schwartz, D. Ed. Idea Group, 2005 \bibitem{FazekasRJS10_OMRAS2} G.~Fazekas, Y.~Raimond, K.~Jakobson, and M.~Sandler. \newblock An overview of semantic web activities in the {OMRAS2} project. \newblock {\em Journal of New Music Research (JNMR)}, 39(4), 2010. \bibitem{Porter:ISMIR:15} A.~Porter, D.~Bogdanov, R.~Kaye, R.~Tsukanov, and X.~Serra. \newblock Acousticbrainz: a community platform for gathering music information obtained from audio. \newblock In {\em 16th International Society for Music Information Retrieval (ISMIR) Conference}, 2015. \bibitem{DBLP:conf/ismir/RaimondASG07} Y~Raimond, S.~Abdallah, M.~Sandler, and F.~Giasson. \newblock The music ontology. \newblock In {\em Proceedings of the 8th International Conference on Music Information Retrieval, ISMIR 2007, Vienna, Austria, September 23-27}, 2007. \bibitem{Suchanek:WWW:2007} F.~Suchanek, G.~Kasneci, and G.~Weikum. \newblock YAGO: A Core of Semantic Knowledge Unifying WordNet and Wikipedia. \newblock In {\em Proceedings of the 16th international World Wide Web conference, May 8–12, 2007, Banff, Alberta, Canada.}, 2007. \bibitem{Landauer1998} T.~Landauer, P.~Folt, and D.~Laham. \newblock An introduction to latent semantic analysis \newblock In {\em Discourse processes, 25}, 1998 \bibitem{Blei2012} D.~Blei, A.~ Ng, and M.I.~Jordan. \newblock Latent Dirichlet Allocation. \newblock In {\em Journal of Machine Learning Research, 3(4-5), 993–1022}, 2012 \end{thebibliography} \end{document}