Chris@978
|
1
|
Chris@978
|
2 # Read an Apache log file from the SoundSoftware site and produce some
|
Chris@978
|
3 # per-project stats.
|
Chris@978
|
4 #
|
Chris@978
|
5 # Invoke with e.g.
|
Chris@978
|
6 #
|
Chris@978
|
7 # cat /var/log/apache2/code-access.log | \
|
Chris@978
|
8 # script/runner -e production extra/soundsoftware/get-apache-log-stats.rb
|
Chris@978
|
9
|
Chris@975
|
10
|
Chris@975
|
11 # Use the ApacheLogRegex parser, a neat thing
|
Chris@975
|
12 # See http://www.simonecarletti.com/blog/2009/02/apache-log-regex-a-lightweight-ruby-apache-log-parser/
|
Chris@975
|
13 require 'apachelogregex'
|
Chris@975
|
14
|
Chris@975
|
15 # This is the format defined in our httpd.conf
|
Chris@975
|
16 vhost_combined_format = '%v:%p %h %{X-Forwarded-For}i %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"'
|
Chris@975
|
17
|
Chris@975
|
18 parser = ApacheLogRegex.new(vhost_combined_format)
|
Chris@975
|
19
|
Chris@975
|
20 # project name -> count of hg clones
|
Chris@975
|
21 clones = Hash.new(0)
|
Chris@975
|
22
|
Chris@975
|
23 # project name -> count of hg pulls
|
Chris@975
|
24 pulls = Hash.new(0)
|
Chris@975
|
25
|
Chris@978
|
26 # project name -> count of hg pushes
|
Chris@978
|
27 pushes = Hash.new(0)
|
Chris@975
|
28
|
Chris@975
|
29 # project name -> count of hg archive requests (i.e. Download as Zip)
|
Chris@975
|
30 zips = Hash.new(0)
|
Chris@975
|
31
|
Chris@975
|
32 # project name -> count of hits to pages under /projects/projectname
|
Chris@975
|
33 hits = Hash.new(0)
|
Chris@975
|
34
|
Chris@978
|
35 # project name -> Project object
|
Chris@978
|
36 @projects = Hash.new
|
Chris@978
|
37
|
Chris@975
|
38 parseable = 0
|
Chris@975
|
39 unparseable = 0
|
Chris@975
|
40
|
Chris@979
|
41 def is_public_project?(project)
|
Chris@978
|
42 if !project
|
Chris@978
|
43 false
|
Chris@978
|
44 elsif @projects.key?(project)
|
Chris@979
|
45 @projects[project].is_public?
|
Chris@978
|
46 else
|
Chris@978
|
47 pobj = Project.find_by_identifier(project)
|
Chris@978
|
48 if pobj
|
Chris@978
|
49 @projects[project] = pobj
|
Chris@979
|
50 pobj.is_public?
|
Chris@978
|
51 else
|
Chris@979
|
52 print "Project not found: ", project, "\n"
|
Chris@978
|
53 false
|
Chris@978
|
54 end
|
Chris@978
|
55 end
|
Chris@978
|
56 end
|
Chris@978
|
57
|
Chris@979
|
58 STDIN.each do |line|
|
Chris@975
|
59
|
Chris@975
|
60 record = parser.parse(line)
|
Chris@975
|
61
|
Chris@975
|
62 # most annoyingly, the parser can't handle the comma-separated list
|
Chris@975
|
63 # in X-Forwarded-For where it has more than one element. If it has
|
Chris@975
|
64 # failed, remove any IP addresses with trailing commas and try again
|
Chris@975
|
65 if not record
|
Chris@975
|
66 filtered = line.gsub(/([0-9]+\.){3}[0-9]+,\s*/, "")
|
Chris@975
|
67 record = parser.parse(filtered)
|
Chris@975
|
68 end
|
Chris@975
|
69
|
Chris@975
|
70 # discard, but count, unparseable lines
|
Chris@975
|
71 if not record
|
Chris@979
|
72 print "Line not parseable: ", line, "\n"
|
Chris@975
|
73 unparseable += 1
|
Chris@975
|
74 next
|
Chris@975
|
75 end
|
Chris@975
|
76
|
Chris@975
|
77 # discard everything that isn't a 200 OK response
|
Chris@975
|
78 next if record["%>s"] != "200"
|
Chris@975
|
79
|
Chris@975
|
80 # discard anything apparently requested by a crawler
|
Chris@975
|
81 next if record["%{User-Agent}i"] =~ /(bot|slurp|crawler|spider|Redmine)\b/i
|
Chris@975
|
82
|
Chris@975
|
83 # pull out request e.g. GET / HTTP/1.0
|
Chris@975
|
84 request = record["%r"]
|
Chris@975
|
85
|
Chris@975
|
86 # split into method, path, protocol
|
Chris@975
|
87 if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/
|
Chris@979
|
88 print "Line not parseable (bad method, path, protocol): ", line, "\n"
|
Chris@975
|
89 unparseable += 1
|
Chris@975
|
90 next
|
Chris@975
|
91 end
|
Chris@975
|
92
|
Chris@975
|
93 # get the path e.g. /projects/weevilmatic and split on /
|
Chris@975
|
94 path = $~[1]
|
Chris@975
|
95 components = path.split("/")
|
Chris@975
|
96
|
Chris@975
|
97 # should have at least two elements unless path is "/"; first should
|
Chris@975
|
98 # be empty (begins with /)
|
Chris@975
|
99 if path != "/" and (components.size < 2 or components[0] != "")
|
Chris@979
|
100 print "Line not parseable (degenerate path): ", line, "\n"
|
Chris@975
|
101 unparseable += 1
|
Chris@975
|
102 next
|
Chris@975
|
103 end
|
Chris@975
|
104
|
Chris@975
|
105 if components[1] == "hg"
|
Chris@975
|
106
|
Chris@975
|
107 # path is /hg/project?something or /hg/project/something
|
Chris@975
|
108
|
Chris@975
|
109 project = components[2].split("?")[0]
|
Chris@979
|
110 if not is_public_project?(project)
|
Chris@978
|
111 next
|
Chris@978
|
112 end
|
Chris@975
|
113
|
Chris@975
|
114 if components[2] =~ /&roots=00*$/
|
Chris@975
|
115 clones[project] += 1
|
Chris@975
|
116 elsif components[2] =~ /cmd=capabilities/
|
Chris@975
|
117 pulls[project] += 1
|
Chris@978
|
118 elsif components[2] =~ /cmd=unbundle/
|
Chris@978
|
119 pushes[project] += 1
|
Chris@975
|
120 elsif components[3] == "archive"
|
Chris@975
|
121 zips[project] += 1
|
Chris@975
|
122 end
|
Chris@975
|
123
|
Chris@975
|
124 elsif components[1] == "projects"
|
Chris@975
|
125
|
Chris@975
|
126 # path is /projects/project or /projects/project/something
|
Chris@975
|
127
|
Chris@975
|
128 project = components[2]
|
Chris@979
|
129 project = project.split("?")[0] if project
|
Chris@979
|
130 if not is_public_project?(project)
|
Chris@978
|
131 next
|
Chris@975
|
132 end
|
Chris@975
|
133
|
Chris@978
|
134 project = project.split("?")[0]
|
Chris@978
|
135 hits[project] += 1
|
Chris@978
|
136
|
Chris@975
|
137 end
|
Chris@975
|
138
|
Chris@975
|
139 parseable += 1
|
Chris@975
|
140 end
|
Chris@975
|
141
|
Chris@975
|
142 # Each clone is also a pull; deduct it from the pulls hash, because we
|
Chris@975
|
143 # want that to contain only non-clone pulls
|
Chris@975
|
144
|
Chris@975
|
145 clones.keys.each do |project|
|
Chris@975
|
146 pulls[project] -= 1
|
Chris@975
|
147 end
|
Chris@975
|
148
|
Chris@975
|
149 print clones, "\n"
|
Chris@975
|
150 print pulls, "\n"
|
Chris@978
|
151 print pushes, "\n"
|
Chris@975
|
152 print zips, "\n"
|
Chris@975
|
153 print hits, "\n"
|
Chris@975
|
154
|
Chris@975
|
155 print parseable, " parseable\n"
|
Chris@975
|
156 print unparseable, " unparseable\n"
|
Chris@975
|
157
|