wolffd@0: # Compute the weight vector of linear SVM based on the model file wolffd@0: # Original Perl Author: Thorsten Joachims (thorsten@joachims.org) wolffd@0: # Python Version: Ori Cohen (orioric@gmail.com) wolffd@0: # Call: python svm2weights.py svm_model wolffd@0: wolffd@0: import sys wolffd@0: from operator import itemgetter wolffd@0: wolffd@0: try: wolffd@0: import psyco wolffd@0: psyco.full() wolffd@0: except ImportError: wolffd@0: print 'Psyco not installed, the program will just run slower' wolffd@0: wolffd@0: def sortbyvalue(d,reverse=True): wolffd@0: ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary''' wolffd@0: return sorted(d.iteritems(), key=itemgetter(1), reverse=True) wolffd@0: wolffd@0: def sortbykey(d,reverse=True): wolffd@0: ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary''' wolffd@0: return sorted(d.iteritems(), key=itemgetter(0), reverse=False) wolffd@0: wolffd@0: def get_file(): wolffd@0: """ wolffd@0: Tries to extract a filename from the command line. If none is present, it wolffd@0: assumes file to be svm_model (default svmLight output). If the file wolffd@0: exists, it returns it, otherwise it prints an error message and ends wolffd@0: execution. wolffd@0: """ wolffd@0: # Get the name of the data file and load it into wolffd@0: if len(sys.argv) < 2: wolffd@0: # assume file to be svm_model (default svmLight output) wolffd@0: print "Assuming file as svm_model" wolffd@0: filename = 'svm_model' wolffd@0: #filename = sys.stdin.readline().strip() wolffd@0: else: wolffd@0: filename = sys.argv[1] wolffd@0: wolffd@0: wolffd@0: try: wolffd@0: f = open(filename, "r") wolffd@0: except IOError: wolffd@0: print "Error: The file '%s' was not found on this system." % filename wolffd@0: sys.exit(0) wolffd@0: wolffd@0: return f wolffd@0: wolffd@0: wolffd@0: wolffd@0: wolffd@0: if __name__ == "__main__": wolffd@0: f = get_file() wolffd@0: i=0 wolffd@0: lines = f.readlines() wolffd@0: printOutput = True wolffd@0: w = {} wolffd@0: for line in lines: wolffd@0: if i>10: wolffd@0: features = line[:line.find('#')-1] wolffd@0: comments = line[line.find('#'):] wolffd@0: alpha = features[:features.find(' ')] wolffd@0: feat = features[features.find(' ')+1:] wolffd@0: for p in feat.split(' '): # Changed the code here. wolffd@0: a,v = p.split(':') wolffd@0: if not (int(a) in w): wolffd@0: w[int(a)] = 0 wolffd@0: for p in feat.split(' '): wolffd@0: a,v = p.split(':') wolffd@0: w[int(a)] +=float(alpha)*float(v) wolffd@0: elif i==1: wolffd@0: if line.find('0')==-1: wolffd@0: print 'Not linear Kernel!\n' wolffd@0: printOutput = False wolffd@0: break wolffd@0: elif i==10: wolffd@0: if line.find('threshold b')==-1: wolffd@0: print "Parsing error!\n" wolffd@0: printOutput = False wolffd@0: break wolffd@0: wolffd@0: i+=1 wolffd@0: f.close() wolffd@0: wolffd@0: #if you need to sort the features by value and not by feature ID then use this line intead: wolffd@0: #ws = sortbyvalue(w) wolffd@0: wolffd@0: ws = sortbykey(w) wolffd@0: if printOutput == True: wolffd@0: for (i,j) in ws: wolffd@0: print i,':',j wolffd@0: i+=1 wolffd@0: wolffd@0: