Chris@975
|
1
|
Chris@975
|
2 # Use the ApacheLogRegex parser, a neat thing
|
Chris@975
|
3 # See http://www.simonecarletti.com/blog/2009/02/apache-log-regex-a-lightweight-ruby-apache-log-parser/
|
Chris@975
|
4 require 'apachelogregex'
|
Chris@975
|
5
|
Chris@975
|
6 # This is the format defined in our httpd.conf
|
Chris@975
|
7 vhost_combined_format = '%v:%p %h %{X-Forwarded-For}i %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"'
|
Chris@975
|
8
|
Chris@975
|
9 parser = ApacheLogRegex.new(vhost_combined_format)
|
Chris@975
|
10
|
Chris@975
|
11 # project name -> count of hg clones
|
Chris@975
|
12 clones = Hash.new(0)
|
Chris@975
|
13
|
Chris@975
|
14 # project name -> count of hg pulls
|
Chris@975
|
15 pulls = Hash.new(0)
|
Chris@975
|
16
|
Chris@975
|
17 # project name -> count of hg commits
|
Chris@975
|
18 commits = Hash.new(0)
|
Chris@975
|
19
|
Chris@975
|
20 # project name -> count of hg archive requests (i.e. Download as Zip)
|
Chris@975
|
21 zips = Hash.new(0)
|
Chris@975
|
22
|
Chris@975
|
23 # project name -> count of hits to pages under /projects/projectname
|
Chris@975
|
24 hits = Hash.new(0)
|
Chris@975
|
25
|
Chris@975
|
26 parseable = 0
|
Chris@975
|
27 unparseable = 0
|
Chris@975
|
28
|
Chris@975
|
29 ARGF.each do |line|
|
Chris@975
|
30
|
Chris@975
|
31 record = parser.parse(line)
|
Chris@975
|
32
|
Chris@975
|
33 # most annoyingly, the parser can't handle the comma-separated list
|
Chris@975
|
34 # in X-Forwarded-For where it has more than one element. If it has
|
Chris@975
|
35 # failed, remove any IP addresses with trailing commas and try again
|
Chris@975
|
36 if not record
|
Chris@975
|
37 filtered = line.gsub(/([0-9]+\.){3}[0-9]+,\s*/, "")
|
Chris@975
|
38 record = parser.parse(filtered)
|
Chris@975
|
39 end
|
Chris@975
|
40
|
Chris@975
|
41 # discard, but count, unparseable lines
|
Chris@975
|
42 if not record
|
Chris@975
|
43 unparseable += 1
|
Chris@975
|
44 next
|
Chris@975
|
45 end
|
Chris@975
|
46
|
Chris@975
|
47 # discard everything that isn't a 200 OK response
|
Chris@975
|
48 next if record["%>s"] != "200"
|
Chris@975
|
49
|
Chris@975
|
50 # discard anything apparently requested by a crawler
|
Chris@975
|
51 next if record["%{User-Agent}i"] =~ /(bot|slurp|crawler|spider|Redmine)\b/i
|
Chris@975
|
52
|
Chris@975
|
53 # pull out request e.g. GET / HTTP/1.0
|
Chris@975
|
54 request = record["%r"]
|
Chris@975
|
55
|
Chris@975
|
56 # split into method, path, protocol
|
Chris@975
|
57 if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/
|
Chris@975
|
58 unparseable += 1
|
Chris@975
|
59 next
|
Chris@975
|
60 end
|
Chris@975
|
61
|
Chris@975
|
62 # get the path e.g. /projects/weevilmatic and split on /
|
Chris@975
|
63 path = $~[1]
|
Chris@975
|
64 components = path.split("/")
|
Chris@975
|
65
|
Chris@975
|
66 # should have at least two elements unless path is "/"; first should
|
Chris@975
|
67 # be empty (begins with /)
|
Chris@975
|
68 if path != "/" and (components.size < 2 or components[0] != "")
|
Chris@975
|
69 unparseable += 1
|
Chris@975
|
70 next
|
Chris@975
|
71 end
|
Chris@975
|
72
|
Chris@975
|
73 if components[1] == "hg"
|
Chris@975
|
74
|
Chris@975
|
75 # path is /hg/project?something or /hg/project/something
|
Chris@975
|
76
|
Chris@975
|
77 project = components[2].split("?")[0]
|
Chris@975
|
78
|
Chris@975
|
79 if components[2] =~ /&roots=00*$/
|
Chris@975
|
80 clones[project] += 1
|
Chris@975
|
81 elsif components[2] =~ /cmd=capabilities/
|
Chris@975
|
82 pulls[project] += 1
|
Chris@975
|
83 elsif components[3] == "archive"
|
Chris@975
|
84 zips[project] += 1
|
Chris@975
|
85 end
|
Chris@975
|
86
|
Chris@975
|
87 elsif components[1] == "projects"
|
Chris@975
|
88
|
Chris@975
|
89 # path is /projects/project or /projects/project/something
|
Chris@975
|
90
|
Chris@975
|
91 project = components[2]
|
Chris@975
|
92 if project
|
Chris@975
|
93 project = project.split("?")[0]
|
Chris@975
|
94 hits[project] += 1
|
Chris@975
|
95 end
|
Chris@975
|
96
|
Chris@975
|
97 end
|
Chris@975
|
98
|
Chris@975
|
99 parseable += 1
|
Chris@975
|
100 end
|
Chris@975
|
101
|
Chris@975
|
102 # Each clone is also a pull; deduct it from the pulls hash, because we
|
Chris@975
|
103 # want that to contain only non-clone pulls
|
Chris@975
|
104
|
Chris@975
|
105 clones.keys.each do |project|
|
Chris@975
|
106 pulls[project] -= 1
|
Chris@975
|
107 end
|
Chris@975
|
108
|
Chris@975
|
109 print clones, "\n"
|
Chris@975
|
110 print pulls, "\n"
|
Chris@975
|
111 print zips, "\n"
|
Chris@975
|
112 print hits, "\n"
|
Chris@975
|
113
|
Chris@975
|
114 print parseable, " parseable\n"
|
Chris@975
|
115 print unparseable, " unparseable\n"
|
Chris@975
|
116
|