wolffd@0
|
1 # Compute the weight vector of linear SVM based on the model file
|
wolffd@0
|
2 # Original Perl Author: Thorsten Joachims (thorsten@joachims.org)
|
wolffd@0
|
3 # Python Version: Ori Cohen (orioric@gmail.com)
|
wolffd@0
|
4 # Call: python svm2weights.py svm_model
|
wolffd@0
|
5
|
wolffd@0
|
6 import sys
|
wolffd@0
|
7 from operator import itemgetter
|
wolffd@0
|
8
|
wolffd@0
|
9 try:
|
wolffd@0
|
10 import psyco
|
wolffd@0
|
11 psyco.full()
|
wolffd@0
|
12 except ImportError:
|
wolffd@0
|
13 print 'Psyco not installed, the program will just run slower'
|
wolffd@0
|
14
|
wolffd@0
|
15 def sortbyvalue(d,reverse=True):
|
wolffd@0
|
16 ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary'''
|
wolffd@0
|
17 return sorted(d.iteritems(), key=itemgetter(1), reverse=True)
|
wolffd@0
|
18
|
wolffd@0
|
19 def sortbykey(d,reverse=True):
|
wolffd@0
|
20 ''' proposed in PEP 265, using the itemgetter this function sorts a dictionary'''
|
wolffd@0
|
21 return sorted(d.iteritems(), key=itemgetter(0), reverse=False)
|
wolffd@0
|
22
|
wolffd@0
|
23 def get_file():
|
wolffd@0
|
24 """
|
wolffd@0
|
25 Tries to extract a filename from the command line. If none is present, it
|
wolffd@0
|
26 assumes file to be svm_model (default svmLight output). If the file
|
wolffd@0
|
27 exists, it returns it, otherwise it prints an error message and ends
|
wolffd@0
|
28 execution.
|
wolffd@0
|
29 """
|
wolffd@0
|
30 # Get the name of the data file and load it into
|
wolffd@0
|
31 if len(sys.argv) < 2:
|
wolffd@0
|
32 # assume file to be svm_model (default svmLight output)
|
wolffd@0
|
33 print "Assuming file as svm_model"
|
wolffd@0
|
34 filename = 'svm_model'
|
wolffd@0
|
35 #filename = sys.stdin.readline().strip()
|
wolffd@0
|
36 else:
|
wolffd@0
|
37 filename = sys.argv[1]
|
wolffd@0
|
38
|
wolffd@0
|
39
|
wolffd@0
|
40 try:
|
wolffd@0
|
41 f = open(filename, "r")
|
wolffd@0
|
42 except IOError:
|
wolffd@0
|
43 print "Error: The file '%s' was not found on this system." % filename
|
wolffd@0
|
44 sys.exit(0)
|
wolffd@0
|
45
|
wolffd@0
|
46 return f
|
wolffd@0
|
47
|
wolffd@0
|
48
|
wolffd@0
|
49
|
wolffd@0
|
50
|
wolffd@0
|
51 if __name__ == "__main__":
|
wolffd@0
|
52 f = get_file()
|
wolffd@0
|
53 i=0
|
wolffd@0
|
54 lines = f.readlines()
|
wolffd@0
|
55 printOutput = True
|
wolffd@0
|
56 w = {}
|
wolffd@0
|
57 for line in lines:
|
wolffd@0
|
58 if i>10:
|
wolffd@0
|
59 features = line[:line.find('#')-1]
|
wolffd@0
|
60 comments = line[line.find('#'):]
|
wolffd@0
|
61 alpha = features[:features.find(' ')]
|
wolffd@0
|
62 feat = features[features.find(' ')+1:]
|
wolffd@0
|
63 for p in feat.split(' '): # Changed the code here.
|
wolffd@0
|
64 a,v = p.split(':')
|
wolffd@0
|
65 if not (int(a) in w):
|
wolffd@0
|
66 w[int(a)] = 0
|
wolffd@0
|
67 for p in feat.split(' '):
|
wolffd@0
|
68 a,v = p.split(':')
|
wolffd@0
|
69 w[int(a)] +=float(alpha)*float(v)
|
wolffd@0
|
70 elif i==1:
|
wolffd@0
|
71 if line.find('0')==-1:
|
wolffd@0
|
72 print 'Not linear Kernel!\n'
|
wolffd@0
|
73 printOutput = False
|
wolffd@0
|
74 break
|
wolffd@0
|
75 elif i==10:
|
wolffd@0
|
76 if line.find('threshold b')==-1:
|
wolffd@0
|
77 print "Parsing error!\n"
|
wolffd@0
|
78 printOutput = False
|
wolffd@0
|
79 break
|
wolffd@0
|
80
|
wolffd@0
|
81 i+=1
|
wolffd@0
|
82 f.close()
|
wolffd@0
|
83
|
wolffd@0
|
84 #if you need to sort the features by value and not by feature ID then use this line intead:
|
wolffd@0
|
85 #ws = sortbyvalue(w)
|
wolffd@0
|
86
|
wolffd@0
|
87 ws = sortbykey(w)
|
wolffd@0
|
88 if printOutput == True:
|
wolffd@0
|
89 for (i,j) in ws:
|
wolffd@0
|
90 print i,':',j
|
wolffd@0
|
91 i+=1
|
wolffd@0
|
92
|
wolffd@0
|
93
|