Revision 1025:02ee54197879 extra/soundsoftware

View differences:

extra/soundsoftware/extract-javadoc.sh
35 35
# package declarations
36 36

  
37 37
find "$projectdir" -type f -name \*.java \
38
    -exec grep '^ *package [a-zA-Z][a-zA-Z0-9\._-]*; *$' \{\} /dev/null \; |
39
    sed -e 's/\/[^\/]*: *package */:/' -e 's/; *$//' |
38
    -exec egrep '^ *package +[a-zA-Z][a-zA-Z0-9\._-]*;.*$' \{\} /dev/null \; |
39
    sed -e 's/\/[^\/]*: *package */:/' -e 's/;.*$//' |
40 40
    sort | uniq | (
41 41
	current_prefix=
42 42
	current_packages=
extra/soundsoftware/get-apache-log-stats.rb
1

  
2
# Read an Apache log file in SoundSoftware site format from stdin and
3
# produce some per-project stats.
4
#
5
# Invoke with e.g.
6
#
7
# cat /var/log/apache2/code-access.log | \
8
#   script/runner -e production extra/soundsoftware/get-apache-log-stats.rb
9

  
10

  
11
# Use the ApacheLogRegex parser, a neat thing
12
# See http://www.simonecarletti.com/blog/2009/02/apache-log-regex-a-lightweight-ruby-apache-log-parser/
13
require 'apachelogregex'
14

  
15
# This is the format defined in our httpd.conf
16
vhost_combined_format = '%v:%p %h %{X-Forwarded-For}i %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"'
17

  
18
parser = ApacheLogRegex.new(vhost_combined_format)
19

  
20
# project name -> count of hg clones
21
clones = Hash.new(0)
22

  
23
# project name -> count of hg pulls
24
pulls = Hash.new(0)
25

  
26
# project name -> count of hg pushes
27
pushes = Hash.new(0)
28

  
29
# project name -> count of hg archive requests (i.e. Download as Zip)
30
zips = Hash.new(0)
31

  
32
# project name -> count of hits to pages under /projects/projectname
33
hits = Hash.new(0)
34

  
35
# project name -> Project object
36
@projects = Hash.new
37

  
38
parseable = 0
39
unparseable = 0
40

  
41
def is_public_project?(project)
42
  if !project
43
    false
44
  elsif project =~ /^\d+$/
45
    # ignore numerical project ids, they are only used when editing projects
46
    false
47
  elsif @projects.key?(project)
48
    @projects[project].is_public? 
49
  else
50
    pobj = Project.find_by_identifier(project)
51
    if pobj
52
      @projects[project] = pobj
53
      pobj.is_public?
54
    else
55
      print "Project not found: ", project, "\n"
56
      false
57
    end
58
  end
59
end
60

  
61
def print_stats(h)
62
  h.keys.sort { |a,b| h[b] <=> h[a] }.each do |p|
63
    if h[p] > 0
64
      print h[p], " ", @projects[p].name, " [", p, "]\n"
65
    end
66
  end
67
end
68

  
69
STDIN.each do |line|
70

  
71
  record = parser.parse(line)
72

  
73
  # most annoyingly, the parser can't handle the comma-separated list
74
  # in X-Forwarded-For where it has more than one element. If it has
75
  # failed, remove any IP addresses or the word "unknown" with
76
  # trailing commas and try again
77
  if not record
78
    filtered = line.gsub(/(unknown|([0-9]+\.){3}[0-9]+),\s*/, "")
79
    record = parser.parse(filtered)
80
  end
81

  
82
  # discard, but count, unparseable lines
83
  if not record
84
    print "Line not parseable: ", line, "\n"
85
    unparseable += 1
86
    next
87
  end
88

  
89
  # discard everything that isn't a 200 OK response
90
  next if record["%>s"] != "200"
91

  
92
  # discard anything apparently requested by a crawler
93
  next if record["%{User-Agent}i"] =~ /(bot|slurp|crawler|spider|Redmine)\b/i
94

  
95
  # pull out request e.g. GET / HTTP/1.0
96
  request = record["%r"]
97

  
98
  # split into method, path, protocol
99
  if not request =~ /^[^\s]+ ([^\s]+) [^\s]+$/
100
    print "Line not parseable (bad method, path, protocol): ", line, "\n"
101
    unparseable += 1
102
    next
103
  end
104

  
105
  # get the path e.g. /projects/weevilmatic and split on /
106
  path = $~[1]
107
  components = path.split("/")
108
  
109
  # should have at least two elements unless path is "/"; first should
110
  # be empty (begins with /)
111
  if path != "/" and (components.size < 2 or components[0] != "")
112
    print "Line not parseable (degenerate path): ", line, "\n"
113
    unparseable += 1
114
    next
115
  end
116

  
117
  if components[1] == "hg"
118
    
119
    # path is /hg/project?something or /hg/project/something
120

  
121
    project = components[2].split("?")[0]
122
    if not is_public_project?(project)
123
      next
124
    end
125

  
126
    if components[2] =~ /&roots=00*$/
127
      clones[project] += 1
128
    elsif components[2] =~ /cmd=capabilities/
129
      pulls[project] += 1
130
    elsif components[2] =~ /cmd=unbundle/
131
      pushes[project] += 1
132
    elsif components[3] == "archive"
133
      zips[project] += 1
134
    end
135

  
136
  elsif components[1] == "projects"
137

  
138
    # path is /projects/project or /projects/project/something
139

  
140
    project = components[2]
141
    project = project.split("?")[0] if project
142
    if not is_public_project?(project)
143
      next
144
    end
145

  
146
    hits[project] += 1
147

  
148
  end
149

  
150
  parseable += 1
151
end
152

  
153
# Each clone is also a pull; deduct it from the pulls hash, because we
154
# want that to contain only non-clone pulls
155

  
156
clones.keys.each do |project|
157
  pulls[project] -= 1
158
end
159

  
160
print parseable, " parseable\n"
161
print unparseable, " unparseable\n"
162

  
163

  
164
print "\nMercurial clones:\n"
165
print_stats clones
166

  
167
print "\nMercurial pulls (excluding clones):\n"
168
print_stats pulls
169

  
170
print "\nMercurial pushes:\n"
171
print_stats pushes
172

  
173
print "\nMercurial archive (zip file) downloads:\n"
174
print_stats zips
175

  
176
print "\nProject page hits (excluding crawlers):\n"
177
print_stats hits
178

  
179

  
extra/soundsoftware/get-statistics.rb
1
# this script will get stats from the repo and print them to stdout
2

  
3
# USAGE: 
4

  
5
# ./script/runner -e production extra/soundsoftware/get-statistics.rb 
6
#
7

  
8
d1 = Date.parse("20100701") # => 1 Jul 2010
9
d2 = Date.today
10

  
11
def delta_array (iarray)
12
  # returns an array with the deltas
13
  ## prepends a zero and drops the last element
14
  deltas = [0] + iarray
15
  deltas = deltas.first(deltas.size - 1)
16

  
17
  return iarray.zip(deltas).map { |x, y| x - y }
18

  
19
end
20

  
21
def months_between(d1, d2)
22
   months = []
23
   start_date = Date.civil(d1.year, d1.month, 1)
24
   end_date = Date.civil(d2.year, d2.month, 1)
25

  
26
   raise ArgumentError unless d1 <= d2
27

  
28
   while (start_date < end_date)
29
     months << start_date
30
     start_date = start_date >>1
31
   end
32

  
33
   months << end_date
34
end
35

  
36
def weeks_between(d1, d2)
37
   weeks = []
38
   start_date = Date.civil(d1.year, d1.month, d1.day)
39
   end_date = Date.civil(d2.year, d2.month, d2.day)
40

  
41
   raise ArgumentError unless d1 <= d2
42

  
43
   while (start_date < end_date)
44
     weeks << start_date
45
     start_date = start_date + 2.week
46
   end
47

  
48
   weeks << end_date
49
end
50

  
51
def get_user_project_evol_stats()
52
  # dates = months_between(d1, d2)
53
  dates = months_between(d1, d2)
54
  
55
  # number of users 
56
  n_users = []
57
  n_projects = []
58
  qm_users = []
59
  
60
  dates.each do |date|
61
    users =  User.find_by_sql ["SELECT * FROM users WHERE users.status = '1' AND users.created_on <= ?;", date]
62
    projects =  Project.find_by_sql ["SELECT * FROM projects WHERE projects.created_on <= ?;", date]
63
    
64
    qm_users_list = User.find_by_sql ["SELECT * FROM users,ssamr_user_details WHERE users.status = '1' AND ssamr_user_details.user_id = users.id AND (users.mail like '%qmul%' OR ssamr_user_details.institution_id = '99') AND users.created_on <= ?;", date ]
65
    
66
    qm_users << qm_users_list.count
67
    n_users << users.count
68
    n_projects << projects.count
69
    
70
    #  private_projects =  Project.find(:all, :conditions => {:created_on  => d1..date, is_public => false})
71
  end
72
  
73
  user_deltas = delta_array(n_users)
74
  proj_deltas = delta_array(n_projects)
75
  qm_user_deltas = delta_array(qm_users)
76
  
77
  puts "Date Users D_Users QM_Users D_QM_users Projects D_Projects"
78
  
79
  dates.zip(n_users, user_deltas, qm_users, qm_user_deltas, n_projects, proj_deltas).each do |a, b, c, d, e, f, g|
80
    puts "#{a} #{b} #{c} #{d} #{e} #{f} #{g}"
81
  end
82
  
83
end
84

  
85

  
86
def get_project_status()
87
  date = "20121101"
88
  
89
   all_projects = Project.find(:all, :conditions => ["created_on < ?", date])
90
  #  all_projects = Project.find(:all, :conditions => ["is_public = ? AND created_on < ?", true, date])
91
#  all_projects = Project.find(:all, :conditions => ["is_public = ? AND created_on < ?", false, date])
92
  
93
  collab = []
94
  users_per_proj = []
95
  
96
  #  puts "Public Users Institutions"
97

  
98
  all_projects.each do |proj| 
99
    insts = []
100

  
101
    proj.users.each do |u|  
102
      if u.institution == "" || u.institution == "No Institution Set"
103
        if u.mail.include?("qmul.ac.uk") || u.mail.include?("andrewrobertson77")
104
          insts << "Queen Mary, University of London"          
105
        else
106
          insts << u.mail
107
        end
108
      else
109
        insts << u.institution
110
      end
111
    end
112

  
113
    users_per_proj << proj.users.count
114
    collab << insts.uniq.count
115
  end
116
  
117
  
118
  #  freq = collab.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
119
  #  freq = freq.sort_by {|key, value| value}
120
  #  puts freq.inspect.sort
121

  
122
  puts "Projects: #{all_projects.count} UpP: #{users_per_proj.sum / users_per_proj.size.to_f} Users1+: #{users_per_proj.count{|x| x> 1}} Users2+: #{users_per_proj.count{|x| x> 2}} Collab1+: #{collab.count{|x| x > 1}} Collab2+: #{collab.count{|x| x > 2}} IpP: #{collab.sum / collab.size.to_f}"
123
end
124

  
125
def get_user_projects_ratios()
126
  user_projects = User.find(:all, :conditions=> {:status => 1})
127
  pub_proj_user = user_projects.map{|u| u.projects.find(:all, :conditions=>{:is_public => true}).count}
128

  
129
  user_projects.zip(pub_proj_user).each do |u, pub|
130
      puts "#{u.projects.count} #{pub}"
131
  end
132

  
133
end
134

  
135
def get_inst_list()
136
  users = User.find(:all, :conditions => {:status => 1})
137
  inst_list = users.map{|user| user.institution}
138
  
139
  freq = inst_list.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
140
  
141
end
142

  
143

  
144
# get_user_projects_ratios()
145
# get_user_project_evol_stats()
146

  
147
get_project_status()

Also available in: Unified diff