Mercurial > hg > soundsoftware-site
comparison extra/soundsoftware/get-apache-log-stats.rb @ 979:56a38a9f6204 cannam
Various fixes
author | Chris Cannam |
---|---|
date | Thu, 25 Oct 2012 14:08:58 +0100 |
parents | bbb88c44f805 |
children | 9b4919de5317 |
comparison
equal
deleted
inserted
replaced
978:bbb88c44f805 | 979:56a38a9f6204 |
---|---|
36 @projects = Hash.new | 36 @projects = Hash.new |
37 | 37 |
38 parseable = 0 | 38 parseable = 0 |
39 unparseable = 0 | 39 unparseable = 0 |
40 | 40 |
41 def known_project?(project) | 41 def is_public_project?(project) |
42 if !project | 42 if !project |
43 false | 43 false |
44 elsif @projects.key?(project) | 44 elsif @projects.key?(project) |
45 true | 45 @projects[project].is_public? |
46 else | 46 else |
47 pobj = Project.find_by_identifier(project) | 47 pobj = Project.find_by_identifier(project) |
48 if pobj | 48 if pobj |
49 @projects[project] = pobj | 49 @projects[project] = pobj |
50 true | 50 pobj.is_public? |
51 else | 51 else |
52 print "Project not found: ", project | 52 print "Project not found: ", project, "\n" |
53 false | 53 false |
54 end | 54 end |
55 end | 55 end |
56 end | 56 end |
57 | 57 |
58 ARGF.each do |line| | 58 STDIN.each do |line| |
59 | 59 |
60 record = parser.parse(line) | 60 record = parser.parse(line) |
61 | 61 |
62 # most annoyingly, the parser can't handle the comma-separated list | 62 # most annoyingly, the parser can't handle the comma-separated list |
63 # in X-Forwarded-For where it has more than one element. If it has | 63 # in X-Forwarded-For where it has more than one element. If it has |
67 record = parser.parse(filtered) | 67 record = parser.parse(filtered) |
68 end | 68 end |
69 | 69 |
70 # discard, but count, unparseable lines | 70 # discard, but count, unparseable lines |
71 if not record | 71 if not record |
72 print "Line not parseable: ", line, "\n" | |
72 unparseable += 1 | 73 unparseable += 1 |
73 next | 74 next |
74 end | 75 end |
75 | 76 |
76 # discard everything that isn't a 200 OK response | 77 # discard everything that isn't a 200 OK response |
82 # pull out request e.g. GET / HTTP/1.0 | 83 # pull out request e.g. GET / HTTP/1.0 |
83 request = record["%r"] | 84 request = record["%r"] |
84 | 85 |
85 # split into method, path, protocol | 86 # split into method, path, protocol |
86 if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/ | 87 if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/ |
88 print "Line not parseable (bad method, path, protocol): ", line, "\n" | |
87 unparseable += 1 | 89 unparseable += 1 |
88 next | 90 next |
89 end | 91 end |
90 | 92 |
91 # get the path e.g. /projects/weevilmatic and split on / | 93 # get the path e.g. /projects/weevilmatic and split on / |
93 components = path.split("/") | 95 components = path.split("/") |
94 | 96 |
95 # should have at least two elements unless path is "/"; first should | 97 # should have at least two elements unless path is "/"; first should |
96 # be empty (begins with /) | 98 # be empty (begins with /) |
97 if path != "/" and (components.size < 2 or components[0] != "") | 99 if path != "/" and (components.size < 2 or components[0] != "") |
100 print "Line not parseable (degenerate path): ", line, "\n" | |
98 unparseable += 1 | 101 unparseable += 1 |
99 next | 102 next |
100 end | 103 end |
101 | 104 |
102 if components[1] == "hg" | 105 if components[1] == "hg" |
103 | 106 |
104 # path is /hg/project?something or /hg/project/something | 107 # path is /hg/project?something or /hg/project/something |
105 | 108 |
106 project = components[2].split("?")[0] | 109 project = components[2].split("?")[0] |
107 if not known_project?(project) | 110 if not is_public_project?(project) |
108 next | 111 next |
109 end | 112 end |
110 | 113 |
111 if components[2] =~ /&roots=00*$/ | 114 if components[2] =~ /&roots=00*$/ |
112 clones[project] += 1 | 115 clones[project] += 1 |
121 elsif components[1] == "projects" | 124 elsif components[1] == "projects" |
122 | 125 |
123 # path is /projects/project or /projects/project/something | 126 # path is /projects/project or /projects/project/something |
124 | 127 |
125 project = components[2] | 128 project = components[2] |
126 if not known_project?(project) | 129 project = project.split("?")[0] if project |
130 if not is_public_project?(project) | |
127 next | 131 next |
128 end | 132 end |
129 | 133 |
130 project = project.split("?")[0] | 134 project = project.split("?")[0] |
131 hits[project] += 1 | 135 hits[project] += 1 |