wolffd@0: # Compute the weight vector of linear SVM based on the model file
wolffd@0: # Original Perl Author: Thorsten Joachims (thorsten@joachims.org)
wolffd@0: # Python Version: Ori Cohen (orioric@gmail.com)
wolffd@0: # Call: python svm2weights.py svm_model
wolffd@0: 
wolffd@0: import sys
wolffd@0: from operator import itemgetter
wolffd@0: 
wolffd@0: try:
wolffd@0:     import psyco
wolffd@0:     psyco.full()
wolffd@0: except ImportError:
wolffd@0:     print 'Psyco not installed, the program will just run slower'
wolffd@0: 
wolffd@0: def sortbyvalue(d,reverse=True):
wolffd@0:     ''' proposed in PEP 265, using  the itemgetter this function sorts a dictionary'''
wolffd@0:     return sorted(d.iteritems(), key=itemgetter(1), reverse=True)
wolffd@0: 
wolffd@0: def sortbykey(d,reverse=True):
wolffd@0:     ''' proposed in PEP 265, using  the itemgetter this function sorts a dictionary'''
wolffd@0:     return sorted(d.iteritems(), key=itemgetter(0), reverse=False)
wolffd@0: 
wolffd@0: def get_file():
wolffd@0:     """
wolffd@0:     Tries to extract a filename from the command line.  If none is present, it
wolffd@0:     assumes file to be svm_model (default svmLight output).  If the file 
wolffd@0:     exists, it returns it, otherwise it prints an error message and ends
wolffd@0:     execution. 
wolffd@0:     """
wolffd@0:     # Get the name of the data file and load it into 
wolffd@0:     if len(sys.argv) < 2:
wolffd@0:         # assume file to be svm_model (default svmLight output)
wolffd@0:         print "Assuming file as svm_model"
wolffd@0:         filename = 'svm_model' 
wolffd@0:         #filename = sys.stdin.readline().strip()
wolffd@0:     else:
wolffd@0:         filename = sys.argv[1]
wolffd@0: 
wolffd@0:     
wolffd@0:     try:
wolffd@0:         f = open(filename, "r")
wolffd@0:     except IOError:
wolffd@0:         print "Error: The file '%s' was not found on this system." % filename
wolffd@0:         sys.exit(0)
wolffd@0: 
wolffd@0:     return f
wolffd@0: 
wolffd@0: 
wolffd@0: 
wolffd@0: 
wolffd@0: if __name__ == "__main__":
wolffd@0:     f = get_file()
wolffd@0:     i=0
wolffd@0:     lines = f.readlines()
wolffd@0:     printOutput = True
wolffd@0:     w = {}
wolffd@0:     for line in lines:
wolffd@0:         if i>10:
wolffd@0:             features = line[:line.find('#')-1]
wolffd@0:             comments = line[line.find('#'):]
wolffd@0:             alpha = features[:features.find(' ')]
wolffd@0:             feat = features[features.find(' ')+1:]
wolffd@0:             for p in feat.split(' '): # Changed the code here. 
wolffd@0:                 a,v = p.split(':')
wolffd@0:                 if not (int(a) in w):
wolffd@0:                     w[int(a)] = 0
wolffd@0:             for p in feat.split(' '): 
wolffd@0:                 a,v = p.split(':')
wolffd@0:                 w[int(a)] +=float(alpha)*float(v)
wolffd@0:         elif i==1:
wolffd@0:             if line.find('0')==-1:
wolffd@0:                 print 'Not linear Kernel!\n'
wolffd@0:                 printOutput = False
wolffd@0:                 break
wolffd@0:         elif i==10:
wolffd@0:             if line.find('threshold b')==-1:
wolffd@0:                 print "Parsing error!\n"
wolffd@0:                 printOutput = False
wolffd@0:                 break
wolffd@0:         
wolffd@0:         i+=1    
wolffd@0:     f.close()
wolffd@0: 
wolffd@0:     #if you need to sort the features by value and not by feature ID then use this line intead:
wolffd@0:     #ws = sortbyvalue(w) 
wolffd@0:     
wolffd@0:     ws = sortbykey(w)
wolffd@0:     if printOutput == True:
wolffd@0:         for (i,j) in ws:
wolffd@0:             print i,':',j
wolffd@0:             i+=1
wolffd@0: 
wolffd@0: