annotate core/tools/machine_learning/svmlight2weight.py @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 # Compute the weight vector of linear SVM based on the model file
Daniel@0 2 # Original Perl Author: Thorsten Joachims (thorsten@joachims.org)
Daniel@0 3 # Python Version: Ori Cohen (orioric@gmail.com)
Daniel@0 4 # Call: python svm2weights.py svm_model
Daniel@0 5
Daniel@0 6 import sys
Daniel@0 7 from operator import itemgetter
Daniel@0 8
Daniel@0 9 try:
Daniel@0 10 import psyco
Daniel@0 11 psyco.full()
Daniel@0 12 except ImportError:
Daniel@0 13 print 'Psyco not installed, the program will just run slower'
Daniel@0 14
Daniel@0 15 def sortbyvalue(d,reverse=True):
Daniel@0 16 ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary'''
Daniel@0 17 return sorted(d.iteritems(), key=itemgetter(1), reverse=True)
Daniel@0 18
Daniel@0 19 def sortbykey(d,reverse=True):
Daniel@0 20 ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary'''
Daniel@0 21 return sorted(d.iteritems(), key=itemgetter(0), reverse=False)
Daniel@0 22
Daniel@0 23 def get_file():
Daniel@0 24 """
Daniel@0 25 Tries to extract a filename from the command line. If none is present, it
Daniel@0 26 assumes file to be svm_model (default svmLight output). If the file
Daniel@0 27 exists, it returns it, otherwise it prints an error message and ends
Daniel@0 28 execution.
Daniel@0 29 """
Daniel@0 30 # Get the name of the data file and load it into
Daniel@0 31 if len(sys.argv) < 2:
Daniel@0 32 # assume file to be svm_model (default svmLight output)
Daniel@0 33 print "Assuming file as svm_model"
Daniel@0 34 filename = 'svm_model'
Daniel@0 35 #filename = sys.stdin.readline().strip()
Daniel@0 36 else:
Daniel@0 37 filename = sys.argv[1]
Daniel@0 38
Daniel@0 39
Daniel@0 40 try:
Daniel@0 41 f = open(filename, "r")
Daniel@0 42 except IOError:
Daniel@0 43 print "Error: The file '%s' was not found on this system." % filename
Daniel@0 44 sys.exit(0)
Daniel@0 45
Daniel@0 46 return f
Daniel@0 47
Daniel@0 48
Daniel@0 49
Daniel@0 50
Daniel@0 51 if __name__ == "__main__":
Daniel@0 52 f = get_file()
Daniel@0 53 i=0
Daniel@0 54 lines = f.readlines()
Daniel@0 55 printOutput = True
Daniel@0 56 w = {}
Daniel@0 57 for line in lines:
Daniel@0 58 if i>10:
Daniel@0 59 features = line[:line.find('#')-1]
Daniel@0 60 comments = line[line.find('#'):]
Daniel@0 61 alpha = features[:features.find(' ')]
Daniel@0 62 feat = features[features.find(' ')+1:]
Daniel@0 63 for p in feat.split(' '): # Changed the code here.
Daniel@0 64 a,v = p.split(':')
Daniel@0 65 if not (int(a) in w):
Daniel@0 66 w[int(a)] = 0
Daniel@0 67 for p in feat.split(' '):
Daniel@0 68 a,v = p.split(':')
Daniel@0 69 w[int(a)] +=float(alpha)*float(v)
Daniel@0 70 elif i==1:
Daniel@0 71 if line.find('0')==-1:
Daniel@0 72 print 'Not linear Kernel!\n'
Daniel@0 73 printOutput = False
Daniel@0 74 break
Daniel@0 75 elif i==10:
Daniel@0 76 if line.find('threshold b')==-1:
Daniel@0 77 print "Parsing error!\n"
Daniel@0 78 printOutput = False
Daniel@0 79 break
Daniel@0 80
Daniel@0 81 i+=1
Daniel@0 82 f.close()
Daniel@0 83
Daniel@0 84 #if you need to sort the features by value and not by feature ID then use this line intead:
Daniel@0 85 #ws = sortbyvalue(w)
Daniel@0 86
Daniel@0 87 ws = sortbykey(w)
Daniel@0 88 if printOutput == True:
Daniel@0 89 for (i,j) in ws:
Daniel@0 90 print i,':',j
Daniel@0 91 i+=1
Daniel@0 92
Daniel@0 93