comparison extra/soundsoftware/get-apache-log-stats.rb @ 979:56a38a9f6204 cannam

Various fixes
author Chris Cannam
date Thu, 25 Oct 2012 14:08:58 +0100
parents bbb88c44f805
children 9b4919de5317
comparison
equal deleted inserted replaced
978:bbb88c44f805 979:56a38a9f6204
36 @projects = Hash.new 36 @projects = Hash.new
37 37
38 parseable = 0 38 parseable = 0
39 unparseable = 0 39 unparseable = 0
40 40
41 def known_project?(project) 41 def is_public_project?(project)
42 if !project 42 if !project
43 false 43 false
44 elsif @projects.key?(project) 44 elsif @projects.key?(project)
45 true 45 @projects[project].is_public?
46 else 46 else
47 pobj = Project.find_by_identifier(project) 47 pobj = Project.find_by_identifier(project)
48 if pobj 48 if pobj
49 @projects[project] = pobj 49 @projects[project] = pobj
50 true 50 pobj.is_public?
51 else 51 else
52 print "Project not found: ", project 52 print "Project not found: ", project, "\n"
53 false 53 false
54 end 54 end
55 end 55 end
56 end 56 end
57 57
58 ARGF.each do |line| 58 STDIN.each do |line|
59 59
60 record = parser.parse(line) 60 record = parser.parse(line)
61 61
62 # most annoyingly, the parser can't handle the comma-separated list 62 # most annoyingly, the parser can't handle the comma-separated list
63 # in X-Forwarded-For where it has more than one element. If it has 63 # in X-Forwarded-For where it has more than one element. If it has
67 record = parser.parse(filtered) 67 record = parser.parse(filtered)
68 end 68 end
69 69
70 # discard, but count, unparseable lines 70 # discard, but count, unparseable lines
71 if not record 71 if not record
72 print "Line not parseable: ", line, "\n"
72 unparseable += 1 73 unparseable += 1
73 next 74 next
74 end 75 end
75 76
76 # discard everything that isn't a 200 OK response 77 # discard everything that isn't a 200 OK response
82 # pull out request e.g. GET / HTTP/1.0 83 # pull out request e.g. GET / HTTP/1.0
83 request = record["%r"] 84 request = record["%r"]
84 85
85 # split into method, path, protocol 86 # split into method, path, protocol
86 if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/ 87 if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/
88 print "Line not parseable (bad method, path, protocol): ", line, "\n"
87 unparseable += 1 89 unparseable += 1
88 next 90 next
89 end 91 end
90 92
91 # get the path e.g. /projects/weevilmatic and split on / 93 # get the path e.g. /projects/weevilmatic and split on /
93 components = path.split("/") 95 components = path.split("/")
94 96
95 # should have at least two elements unless path is "/"; first should 97 # should have at least two elements unless path is "/"; first should
96 # be empty (begins with /) 98 # be empty (begins with /)
97 if path != "/" and (components.size < 2 or components[0] != "") 99 if path != "/" and (components.size < 2 or components[0] != "")
100 print "Line not parseable (degenerate path): ", line, "\n"
98 unparseable += 1 101 unparseable += 1
99 next 102 next
100 end 103 end
101 104
102 if components[1] == "hg" 105 if components[1] == "hg"
103 106
104 # path is /hg/project?something or /hg/project/something 107 # path is /hg/project?something or /hg/project/something
105 108
106 project = components[2].split("?")[0] 109 project = components[2].split("?")[0]
107 if not known_project?(project) 110 if not is_public_project?(project)
108 next 111 next
109 end 112 end
110 113
111 if components[2] =~ /&roots=00*$/ 114 if components[2] =~ /&roots=00*$/
112 clones[project] += 1 115 clones[project] += 1
121 elsif components[1] == "projects" 124 elsif components[1] == "projects"
122 125
123 # path is /projects/project or /projects/project/something 126 # path is /projects/project or /projects/project/something
124 127
125 project = components[2] 128 project = components[2]
126 if not known_project?(project) 129 project = project.split("?")[0] if project
130 if not is_public_project?(project)
127 next 131 next
128 end 132 end
129 133
130 project = project.split("?")[0] 134 project = project.split("?")[0]
131 hits[project] += 1 135 hits[project] += 1