view Yading/new_track_data.py @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 8c29444cb5fd
children
line wrap: on
line source


import os 
import sys
import pickle 

from xml.dom import minidom
import urllib2 #call url function
import time
import sqlite3
import cPickle

import pyechonest # import echonest API 
import pylast # import last.fm API
from pyechonest import artist, catalog, config, playlist # How it works instead of pyechonest


#change the path to 7-digital python library
sys.path.append("7digital-python/lib/")
import py7digital

""" API Key from echonest """
#Your API Key: SFXNKMTRAZ3ULHK6U
#Your Consumer Key: 54a06c9bd235d47f787d8cf614577a94 
#Your Shared Secret: aiPUaTToTpixW4Ttaf4O9A

"""API key from 7 digital"""
# oauth_consumer_key=7dbpa63h3y3d
# oauth_consumer_secret=zz48d4epsqmrsuvp

"""API key from last.fm"""
# Your API Key is bd1cb09de31188b43aa46f39b8e40614
# Your secret is d2537b0ce8bc859a6068833c5e2a72a7

""""*********************************************************************************"""

# Echo Nest API key
config.ECHO_NEST_API_KEY="SFXNKMTRAZ3ULHK6U "

# 7 digital API key
DIGITAL7_API_KEY = '7dbpa63h3y3d'

"""The use of last.fm API"""
API_KEY = "bd1cb09de31188b43aa46f39b8e40614" 
API_SECRET = "d2537b0ce8bc859a6068833c5e2a72a7"

"""Authentication, username and password"""
username = "QMYading"
password_hash = pylast.md5("123456")
network = pylast.LastFMNetwork(api_key = API_KEY, api_secret = 
    API_SECRET, username = username, password_hash = password_hash)

def url_call(url):
    """
    ***This method is from get_preview_url.py by Thierry Bertin-Mahieux***
    Do a simple request to the 7digital API
    We assume we don't do intense querying, this function is not robust
    Return the answer as na xml document
    """
    stream = urllib2.urlopen(url)
    xmldoc = minidom.parse(stream).documentElement
    stream.close()
    return xmldoc
    
def get_trackid_from_text_search(title,artistname=''):
    """
    ***This method is from get_preview_url.py by Thierry Bertin-Mahieux***
    Search for an artist + title using 7digital search API
    Return None if there is a problem, or tuple (title,trackid)
    """
    url = 'http://api.7digital.com/1.2/track/search?'
    url += 'oauth_consumer_key='+DIGITAL7_API_KEY
    query = title
    if artistname != '':
        query = artistname + ' ' + query
    query = urllib2.quote(query)
    url += '&q='+query
    xmldoc = url_call(url)
    status = xmldoc.getAttribute('status')
    if status != 'ok':
        return None
    resultelem = xmldoc.getElementsByTagName('searchResult')
    if len(resultelem) == 0:
        return None
    track = resultelem[0].getElementsByTagName('track')[0]
    tracktitle = track.getElementsByTagName('title')[0].firstChild.data
    trackid = int(track.getAttribute('id'))
    return (tracktitle,trackid)  
    
def get_preview_from_trackid(trackid):
    """
    ***This method is from get_preview_url.py by Thierry Bertin-Mahieux***
    Ask for the preview to a particular track, get the XML answer
    After calling the API with a given track id,
    we get an XML response that looks like:
    
    <response status="ok" version="1.2" xsi:noNamespaceSchemaLocation="http://api.7digital.com/1.2/static/7digitalAPI.xsd">
      <url>
        http://previews.7digital.com/clips/34/6804688.clip.mp3
      </url>
    </response>

    We parse it for the URL that we return, or '' if a problem
    """
    url = 'http://api.7digital.com/1.2/track/preview?redirect=false'
    url += '&trackid='+str(trackid)
    url += '&oauth_consumer_key='+DIGITAL7_API_KEY
    xmldoc = url_call(url)
    status = xmldoc.getAttribute('status')
    if status != 'ok':
        return ''
    urlelem = xmldoc.getElementsByTagName('url')[0]
    preview = urlelem.firstChild.nodeValue
    return preview   
 
 #function that downloads a file
def download_file(file_url, file_name):
    # open the url
    mp3file = urllib2.urlopen(file_url)   
    
    # open the local file for writing
    local_file = open(file_name, "wb")
    # write to file
    local_file.write(mp3file.read())
    local_file.close()




# get artist object
# artist_object = network.get_artist("System of a Down")

# get the mbid for the tracks
# print(artist_object.get_mbid())

# print(os.getcwd())  #print current working directory
# artist_object.shout("Artist object created successful!")

# get track object 
# track_object = network.get_track("Iron Maiden", "The Nomad")
# track_object.add_tags(("awesome", "favorite")) # add tags to the tracks

#get get_album object 
#album_object = network.get_album("Adele","21")

# get tags object
#os.chdir('./NEW_Tagsets/')  # change current working directory 
#tag_classification = ["Happy", "Sad", "Angry", "Relax","Sport", "Study","Entertainment","Travelling"]
#tag_classification = ["Happy", "Sad", "Angry", "Relax","Sport", "Study","Entertainment","Travel"]
#tag_classification = ["Sad", "Angry", "Relax","Sport", "Study","Entertainment","Travel"]
#tag_classification = ["Travel"]

tag_classification = ["wake me up"]

#tag_classification = ["Passionate","Rollicking","Literate","Humorous","Aggressive"]
#tag_classification = ["Joy","Fun","Cheerful","Pleasing","Pleasant"]
#tag_classification = ["Romantic","Love"]

#tag_classification = ["silly", "campy", "quirky", "whimsical", "witty", "wry"]
#tag_classification = ["fiery","tense","anxious","intense", "volatile","visceral"]
#tag_classification = ["poignant","wistful", "bittersweet", "autumnal", "brooding"]
#tag_classification = ["rousing", "confident","boisterous", "rowdy"]
#tag_classification = ["sweet", "amiable","good natured"]


track = network.track 

# search for similar tag according to the seeds in tag_classification
for tags_name in tag_classification:
    tags = network.search_for_tag(tags_name)
    # filtering the results  
    tags_search = tags.get_next_page()  
    tag_file_name =  tags_name + "_tags_results.txt"
    
    # print the tag name, i.e happy_tag_results.txt
    print(tag_file_name)
    
    print("successfully written in the file")
    
    tag_data = open(tag_file_name, 'wb') # open and write the file for top tags
    
    os.makedirs(str(tags_name))
    os.chdir('./' + str(tags_name) + '/')
    
    # write the similar with a topic
    for each_letter in tags_search:
        # store the each lexical into the file. i.e 'happy' 'very happy'
        tag_data.write("%s\n" % each_letter) 
        # create a directory with tag name  
        os.makedirs(str(each_letter))
        os.chdir('./' + str(each_letter) + '/')

        # store each lexical tag as a tag_object 
        tag_object = network.get_tag(each_letter) 
        tracks_results = tag_object.get_top_tracks()
                 
        track_file_name = str(each_letter) + "_results.txt"            
        tag_track_file = open(track_file_name,'wb') # open and write the file for tracks with tag   
            
        print(track_file_name)
        
        # write the tracks result with special tags 
        for each_line in tracks_results:   
            tag_track_file.write("%s%s\n" % each_line)
                
        tag_track_file.close()
            
        artist_file_name = str(each_letter) + "_artist.txt"        
        title_file_name =  str(each_letter) + "_title.txt"
        #id_file_name = str(each_letter) + "_id.txt"
            
        # open the track info file
        newdata = open(track_file_name)            
        artist_data = open(artist_file_name,'wb')
        title_data = open(title_file_name,'wb')
        #id_file = open(id_file_name,"wb")
            

        for each_row in newdata:
                
            (artist,title) = each_row.split('-',1)            
            title = title.strip()       #delete the extra space 
            title = title.rstrip('0')   #get rid of the 0 at the end                                
            
            artist_data.write("%s\n" % artist)    
            title_data.write("%s\n" % title)
            
            print artist 
            print title
            
            try:
                (tracktitle,trackid) = get_trackid_from_text_search(title,artist)
                
                if trackid is not None:
                    # fetch the url 
                    audio_url = get_preview_from_trackid(trackid)          
                    print(audio_url)
                    # fetch the audio and save in the correct folder
            
                    file_name = tracktitle + u'.wav'
                    file_name = file_name.replace(u'/', u' ')
                    #file_name = t.title + u'.mp3'  
                    print file_name
                    print("downloading")                
 
                   # download_path = os.path.join(file_path, file_name)
                    path_name = './' + file_name
                    mp3 = download_file(audio_url, path_name)
            except:
                pass 
           
                      
            
            # id_file_name.close()
        artist_data.close()
        title_data.close() 
        newdata.close()        
        parent_path = '../'
        os.chdir(parent_path)
    
    os.chdir(parent_path)
               
tag_data.close()

# check the directory, write to the current directory, 
# if it doens't exist, create a new one