comparison cpack/dml/examples/system_metrics.swinb @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:718306e29690
1 <div class="notebook">
2
3 <div class="nb-cell markdown">
4 ## Analysis of collection level computation performance
5
6 This following block of code defines some predicates for browsing the database of collection level computations and tagging them by what kind of computation it was, what the implementation language was, the date and time of the computation, the size of the collection and the duration of the computation.
7
8 Scroll down the query boxes below and press the green arrow to run each query.
9 </div>
10
11 <div class="nb-cell program">
12 :- use_module(library(computations)).
13 :- use_module(library(mlserver)).
14 :- use_module(library(real)).
15 :- use_module(library(dml_c3)).
16
17 :- use_rendering(rdf,[resource_format(nslabel)]).
18 :- use_rendering(matlab,[format(svg),size(15,10)]).
19 :- use_rendering(c3).
20
21 % Enumerates collection level analysis events, providing
22 % Func=Label/Lang, collection size, and computation duration.
23 cla_op(Lab,Size,Dur) :- cla_op(label&gt;&gt;relabel-size-dur,Lab-Size-Dur).
24
25 % main CLA browser predicate. Uses Filter to determine which computations
26 % are returned and what information is provided.
27 cla_op(Filter,Out) :-
28 browse(perspectives:cla_memo(Op,CID,_),comp(_,Time,Dur)-ok),
29 dataset_size(CID,Size),
30 x(Filter,op(Op,Time,Size,Dur),Out).
31
32 % field extractor predicate (a v. small arrow interpreter!)
33 x(id,X,X).
34 x(F&gt;&gt;G,X,Y) :- x(F,X,Z), x(G,Z,Y).
35 x(fst(F),X1-Y,X2-Y) :- x(F,X1,X2).
36 x(snd(F),X-Y1,X-Y2) :- x(F,Y1,Y2).
37 x(F1-F2,X,Y1-Y2) :- x(F1,X,Y1), x(F2,X,Y2).
38 x(H,X,Y) :- defx(H,F), x(F,X,Y).
39 x(add(N),X,Y) :- Y is X+N.
40 x(arg(N),X,Y) :- arg(N,X,Y).
41 x(log,X,Y) :- Y is log10(X).
42 x(jitter(L,U),X,Y) :- random(D), Y is L+(U-L)*D+X.
43 x(divide,X-Y,Z) :- Z is X/Y.
44 x(diff,X-Y,Z) :- Z is Y-X.
45 x(quant(Q),X,Y) :- Y is Q*floor(X/Q).
46 x(event,T1-_,T1-start).
47 x(event,T1-DT,T2-stop) :- T2 is T1+DT.
48
49 % gets month number and name from timestamp
50 x(month_num_name,Time,Num-Name) :-
51 format_time(atom(Name),'%B',Time),
52 stamp_date_time(Time,Date,local),
53 date_time_value(month,Date,Num).
54
55 % Transforms collection level operation spec to Label/Lang,
56 % where Lang is in { ml, pl, r, py }.
57 x(relabel,Op,Label/Lang) :- cla_label_lang(Op,Label,Lang).
58 x(lang,_/Lang,Lang).
59
60 defx(op, arg(1)).
61 defx(time, arg(2)).
62 defx(size, arg(3)).
63 defx(dur, arg(4)).
64 defx(month, time&gt;&gt;month_num_name).
65
66 normalise_hist(Name-Pairs,Name-Pairs2) :-
67 unzip(Pairs,Vals,Counts),
68 stoch(Counts,Probs),
69 unzip(Pairs2,Vals,Probs).
70
71 stoch(Xs,Ys) :- sumlist(Xs,Total), maplist(divby(Total),Xs,Ys).
72 divby(Z,X,Y) :- Y is X/Z.
73
74 concurrency([T0-start|StartStopEvents],ConcurrencyEvents) :-
75 foldl(conc,StartStopEvents,ConcurrencyEvents,T0-1,_).
76
77 conc(T2-Event,(T1-T2)-N1,T1-N1,T2-N2) :-
78 ( Event=start -&gt; succ(N1,N2)
79 ; Event=stop -&gt; succ(N2,N1)
80 ).
81
82 % ------- tools for building C3 charts -----------
83
84 %% add_points(+Data:pair(term,list(pair(number,number))))// is det.
85 % Adds a set of points to a scatter plot
86 add_points(Name-Pairs) --&gt;
87 {unzip(Pairs,Xs,Ys)},
88 add_points(Name,Xs,Ys).
89
90 %% add_points(+Name:term,+X:list(number),+Y:list(number),+C1:c3,-C2:c3) is det.
91 % adds a named set of points to a C3 scatter plot.
92 add_points(Name1,Xs,Ys,Ch1,Ch2) :-
93 term_to_atom(Name1,Name),
94 atom_concat(Name,'_x',NameX),
95 Ch2=Ch1.put(data/columns,[[NameX|Xs],[Name|Ys]|Ch1.data.columns])
96 .put(data/xs/Name,NameX).
97 </div>
98
99 <div class="nb-cell markdown">
100 This query shows the relationship between collection size and computation time for all collection level analyses, grouped by month of computation. Note that the axis scales
101 are _logarithmic_.
102 </div>
103
104 <div class="nb-cell query">
105 setof(Size-Dur,
106 cla_op(month-size&gt;&gt;log&gt;&gt;jitter(0,0.05)-dur&gt;&gt;log,(_-M)-Size-Dur),
107 _Points),
108 call_dcg(( c3:scat('log size','log dur'),
109 add_points(all-_Points),
110 c3:legend(false),
111 c3:zoom(true)
112 ),
113 c3{},Ch0).
114 </div>
115
116 <div class="nb-cell markdown">
117 Histogram of logarithm of computation time per item, quantised to 0.1 bins (a ratio of about 1.26).
118 </div>
119
120 <div class="nb-cell query">
121 Q=0.1, % size of quantisation bin (in log domain)
122 histof(T, cla_op((dur-size)&gt;&gt;divide&gt;&gt;log&gt;&gt;quant(Q),T), _Hist),
123 call_dcg(( c3:bar('log dur','count'),
124 add_points(all-_Hist),
125 c3:put(bar/width/ratio,Q)
126 ), c3{},Chart).
127 </div>
128
129 <div class="nb-cell markdown">
130 Histogram of logarithm of computation time per item, quantised to bins of width 0.25, grouped computation label and language.
131 </div>
132
133 <div class="nb-cell query">
134 findall(L-Hist,
135 histof(T,
136 cla_op(op&gt;&gt;relabel-(dur-size)&gt;&gt;divide&gt;&gt;log&gt;&gt;quant(0.25),L-T),
137 Hist),
138 _Hists),
139 maplist(normalise_hist,_Hists,_Dists),
140 call_dcg(( c3:bar('log dur','count'),
141 foldl(add_points,_Dists),
142 c3:put(bar/width/ratio,0.5)
143 ), c3{}, Ch0).
144 </div>
145
146 <div class="nb-cell markdown">
147 Histogram of logarithm of computation time per item, quantised to bins of width 0.25, grouped language.
148 </div>
149
150 <div class="nb-cell query">
151 findall(L-Hist,
152 histof(T,
153 cla_op(op&gt;&gt;relabel&gt;&gt;lang - (dur-size)&gt;&gt;divide&gt;&gt;log&gt;&gt;quant(0.25),L-T),
154 Hist),
155 Hists),
156 maplist(normalise_hist,Hists,_Dists),
157 call_dcg(( c3:bar('log dur','fraction'),
158 foldl(add_points,_Dists),
159 c3:put(bar/width/ratio,0.45)
160 ), c3{}, Ch0).
161 </div>
162
163 <div class="nb-cell markdown">
164 This next query shows how collection size vs computation duration varies with the kind of analysis being done and the implementation language. Note that computations in Prolog have the lowest overheads, and that computations in Matlab seem to have the most variable range of durations for a given collection size.
165 </div>
166
167 <div class="nb-cell query">
168 findall(Op-_Ps,
169 setof(Size-Dur,
170 cla_op(op&gt;&gt;relabel-(size&gt;&gt;log)-dur&gt;&gt;log,Op-Size-Dur),
171 _Ps),
172 _Rs),
173 foldl(add_points,_Rs,c3{}.scat('log size','log dur'),Ch1).
174 </div>
175
176 <div class="nb-cell markdown">
177 This query is like the previous one, but grouped by language only.
178 </div>
179
180 <div class="nb-cell query">
181 findall(Op-_Ps,
182 setof(Size-Dur,
183 cla_op(op&gt;&gt;relabel&gt;&gt;lang-(size&gt;&gt;log)-dur&gt;&gt;log,Op-Size-Dur),
184 _Ps),
185 _Rs),
186 foldl(add_points,_Rs,c3{}.scat('log size','log dur'),Ch1).
187 </div>
188
189 <div class="nb-cell markdown">
190 This query breaks down the performance of analysis method by month. There does not seem to be any significant pattern in this other than the overall volume of computation done in each month.
191 </div>
192
193 <div class="nb-cell query">
194 setof(Month-_Ps,
195 setof(Size-Dur,
196 cla_op( month - op&gt;&gt;relabel - size&gt;&gt;log - dur&gt;&gt;log,
197 Month - Label - Size - Dur),
198 _Ps),
199 _Rs),
200 call(foldl(add_points,_Rs), c3{}.scat(log_size,log_dur), Ch).
201 </div>
202
203 <div class="nb-cell markdown">
204 This query analyses the degree of concurrency of collection level computations. It works by getting a complete set of point events describing the beginning and ending times of computations. Then the predicate concurrency/2 (defined in the initial code block) folds over these events and produces a list of time interval events of the form (StartTime-EndTime)-Concurrency, where Concurrency is the number of concurrent computations occuring over that interval. Then, the time intervals are mapped to durations and a histogram of concurrency weighted by duration is produced.
205 </div>
206
207 <div class="nb-cell query">
208 setof(Ev,cla_op((time-dur)&gt;&gt;event,Ev),_Evs),
209 concurrency(_Evs,_CEvs),
210 maplist(x(fst(diff)),_CEvs,_CEvs2),
211 weighted_histof(Dur,Conc,member(Dur-Conc,_CEvs2),Hist),
212 select(0-_,Hist,_Hist1),
213 unzip(_Hist1,_Values,_Durs),
214 c3_bar(concurrency-_Values,duration-_Durs,Chart).
215 </div>
216
217 </div>