To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / extra / soundsoftware / get-apache-log-stats.rb @ 978:bbb88c44f805

History | View | Annotate | Download (3.53 KB)

1 978:bbb88c44f805 Chris
2
# Read an Apache log file from the SoundSoftware site and produce some
3
# per-project stats.
4
#
5
# Invoke with e.g.
6
#
7
# cat /var/log/apache2/code-access.log | \
8
#   script/runner -e production extra/soundsoftware/get-apache-log-stats.rb
9
10 975:198f764e734c Chris
11
# Use the ApacheLogRegex parser, a neat thing
12
# See http://www.simonecarletti.com/blog/2009/02/apache-log-regex-a-lightweight-ruby-apache-log-parser/
13
require 'apachelogregex'
14
15
# This is the format defined in our httpd.conf
16
vhost_combined_format = '%v:%p %h %{X-Forwarded-For}i %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"'
17
18
parser = ApacheLogRegex.new(vhost_combined_format)
19
20
# project name -> count of hg clones
21
clones = Hash.new(0)
22
23
# project name -> count of hg pulls
24
pulls = Hash.new(0)
25
26 978:bbb88c44f805 Chris
# project name -> count of hg pushes
27
pushes = Hash.new(0)
28 975:198f764e734c Chris
29
# project name -> count of hg archive requests (i.e. Download as Zip)
30
zips = Hash.new(0)
31
32
# project name -> count of hits to pages under /projects/projectname
33
hits = Hash.new(0)
34
35 978:bbb88c44f805 Chris
# project name -> Project object
36
@projects = Hash.new
37
38 975:198f764e734c Chris
parseable = 0
39
unparseable = 0
40
41 978:bbb88c44f805 Chris
def known_project?(project)
42
  if !project
43
    false
44
  elsif @projects.key?(project)
45
    true
46
  else
47
    pobj = Project.find_by_identifier(project)
48
    if pobj
49
      @projects[project] = pobj
50
      true
51
    else
52
      print "Project not found: ", project
53
      false
54
    end
55
  end
56
end
57
58 975:198f764e734c Chris
ARGF.each do |line|
59
60
  record = parser.parse(line)
61
62
  # most annoyingly, the parser can't handle the comma-separated list
63
  # in X-Forwarded-For where it has more than one element. If it has
64
  # failed, remove any IP addresses with trailing commas and try again
65
  if not record
66
    filtered = line.gsub(/([0-9]+\.){3}[0-9]+,\s*/, "")
67
    record = parser.parse(filtered)
68
  end
69
70
  # discard, but count, unparseable lines
71
  if not record
72
    unparseable += 1
73
    next
74
  end
75
76
  # discard everything that isn't a 200 OK response
77
  next if record["%>s"] != "200"
78
79
  # discard anything apparently requested by a crawler
80
  next if record["%{User-Agent}i"] =~ /(bot|slurp|crawler|spider|Redmine)\b/i
81
82
  # pull out request e.g. GET / HTTP/1.0
83
  request = record["%r"]
84
85
  # split into method, path, protocol
86
  if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/
87
    unparseable += 1
88
    next
89
  end
90
91
  # get the path e.g. /projects/weevilmatic and split on /
92
  path = $~[1]
93
  components = path.split("/")
94
95
  # should have at least two elements unless path is "/"; first should
96
  # be empty (begins with /)
97
  if path != "/" and (components.size < 2 or components[0] != "")
98
    unparseable += 1
99
    next
100
  end
101
102
  if components[1] == "hg"
103
104
    # path is /hg/project?something or /hg/project/something
105
106
    project = components[2].split("?")[0]
107 978:bbb88c44f805 Chris
    if not known_project?(project)
108
      next
109
    end
110 975:198f764e734c Chris
111
    if components[2] =~ /&roots=00*$/
112
      clones[project] += 1
113
    elsif components[2] =~ /cmd=capabilities/
114
      pulls[project] += 1
115 978:bbb88c44f805 Chris
    elsif components[2] =~ /cmd=unbundle/
116
      pushes[project] += 1
117 975:198f764e734c Chris
    elsif components[3] == "archive"
118
      zips[project] += 1
119
    end
120
121
  elsif components[1] == "projects"
122
123
    # path is /projects/project or /projects/project/something
124
125
    project = components[2]
126 978:bbb88c44f805 Chris
    if not known_project?(project)
127
      next
128 975:198f764e734c Chris
    end
129
130 978:bbb88c44f805 Chris
    project = project.split("?")[0]
131
    hits[project] += 1
132
133 975:198f764e734c Chris
  end
134
135
  parseable += 1
136
end
137
138
# Each clone is also a pull; deduct it from the pulls hash, because we
139
# want that to contain only non-clone pulls
140
141
clones.keys.each do |project|
142
  pulls[project] -= 1
143
end
144
145
print clones, "\n"
146
print pulls, "\n"
147 978:bbb88c44f805 Chris
print pushes, "\n"
148 975:198f764e734c Chris
print zips, "\n"
149
print hits, "\n"
150
151
print parseable, " parseable\n"
152
print unparseable, " unparseable\n"