annotate core/tools/machine_learning/svmlight2weight.py @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 # Compute the weight vector of linear SVM based on the model file
wolffd@0 2 # Original Perl Author: Thorsten Joachims (thorsten@joachims.org)
wolffd@0 3 # Python Version: Ori Cohen (orioric@gmail.com)
wolffd@0 4 # Call: python svm2weights.py svm_model
wolffd@0 5
wolffd@0 6 import sys
wolffd@0 7 from operator import itemgetter
wolffd@0 8
wolffd@0 9 try:
wolffd@0 10 import psyco
wolffd@0 11 psyco.full()
wolffd@0 12 except ImportError:
wolffd@0 13 print 'Psyco not installed, the program will just run slower'
wolffd@0 14
wolffd@0 15 def sortbyvalue(d,reverse=True):
wolffd@0 16 ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary'''
wolffd@0 17 return sorted(d.iteritems(), key=itemgetter(1), reverse=True)
wolffd@0 18
wolffd@0 19 def sortbykey(d,reverse=True):
wolffd@0 20 ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary'''
wolffd@0 21 return sorted(d.iteritems(), key=itemgetter(0), reverse=False)
wolffd@0 22
wolffd@0 23 def get_file():
wolffd@0 24 """
wolffd@0 25 Tries to extract a filename from the command line. If none is present, it
wolffd@0 26 assumes file to be svm_model (default svmLight output). If the file
wolffd@0 27 exists, it returns it, otherwise it prints an error message and ends
wolffd@0 28 execution.
wolffd@0 29 """
wolffd@0 30 # Get the name of the data file and load it into
wolffd@0 31 if len(sys.argv) < 2:
wolffd@0 32 # assume file to be svm_model (default svmLight output)
wolffd@0 33 print "Assuming file as svm_model"
wolffd@0 34 filename = 'svm_model'
wolffd@0 35 #filename = sys.stdin.readline().strip()
wolffd@0 36 else:
wolffd@0 37 filename = sys.argv[1]
wolffd@0 38
wolffd@0 39
wolffd@0 40 try:
wolffd@0 41 f = open(filename, "r")
wolffd@0 42 except IOError:
wolffd@0 43 print "Error: The file '%s' was not found on this system." % filename
wolffd@0 44 sys.exit(0)
wolffd@0 45
wolffd@0 46 return f
wolffd@0 47
wolffd@0 48
wolffd@0 49
wolffd@0 50
wolffd@0 51 if __name__ == "__main__":
wolffd@0 52 f = get_file()
wolffd@0 53 i=0
wolffd@0 54 lines = f.readlines()
wolffd@0 55 printOutput = True
wolffd@0 56 w = {}
wolffd@0 57 for line in lines:
wolffd@0 58 if i>10:
wolffd@0 59 features = line[:line.find('#')-1]
wolffd@0 60 comments = line[line.find('#'):]
wolffd@0 61 alpha = features[:features.find(' ')]
wolffd@0 62 feat = features[features.find(' ')+1:]
wolffd@0 63 for p in feat.split(' '): # Changed the code here.
wolffd@0 64 a,v = p.split(':')
wolffd@0 65 if not (int(a) in w):
wolffd@0 66 w[int(a)] = 0
wolffd@0 67 for p in feat.split(' '):
wolffd@0 68 a,v = p.split(':')
wolffd@0 69 w[int(a)] +=float(alpha)*float(v)
wolffd@0 70 elif i==1:
wolffd@0 71 if line.find('0')==-1:
wolffd@0 72 print 'Not linear Kernel!\n'
wolffd@0 73 printOutput = False
wolffd@0 74 break
wolffd@0 75 elif i==10:
wolffd@0 76 if line.find('threshold b')==-1:
wolffd@0 77 print "Parsing error!\n"
wolffd@0 78 printOutput = False
wolffd@0 79 break
wolffd@0 80
wolffd@0 81 i+=1
wolffd@0 82 f.close()
wolffd@0 83
wolffd@0 84 #if you need to sort the features by value and not by feature ID then use this line intead:
wolffd@0 85 #ws = sortbyvalue(w)
wolffd@0 86
wolffd@0 87 ws = sortbykey(w)
wolffd@0 88 if printOutput == True:
wolffd@0 89 for (i,j) in ws:
wolffd@0 90 print i,':',j
wolffd@0 91 i+=1
wolffd@0 92
wolffd@0 93