Daniel@0
|
1 /* Part of DML (Digital Music Laboratory)
|
Daniel@0
|
2 Copyright 2014-2015 Samer Abdallah, University of London
|
Daniel@0
|
3
|
Daniel@0
|
4 This program is free software; you can redistribute it and/or
|
Daniel@0
|
5 modify it under the terms of the GNU General Public License
|
Daniel@0
|
6 as published by the Free Software Foundation; either version 2
|
Daniel@0
|
7 of the License, or (at your option) any later version.
|
Daniel@0
|
8
|
Daniel@0
|
9 This program is distributed in the hope that it will be useful,
|
Daniel@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Daniel@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Daniel@0
|
12 GNU General Public License for more details.
|
Daniel@0
|
13
|
Daniel@0
|
14 You should have received a copy of the GNU General Public
|
Daniel@0
|
15 License along with this library; if not, write to the Free Software
|
Daniel@0
|
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Daniel@0
|
17 */
|
Daniel@0
|
18
|
Daniel@0
|
19 :- module(compression,
|
Daniel@0
|
20 [ with_transducer/3
|
Daniel@0
|
21 , stream_length/1
|
Daniel@0
|
22 , print_delimited_dcg/1
|
Daniel@0
|
23 , complexity_method/1
|
Daniel@0
|
24 , complexity/3
|
Daniel@0
|
25 , conditional_complexity/4
|
Daniel@0
|
26 , min_complexity/3
|
Daniel@0
|
27 , min_conditional_complexity/4
|
Daniel@0
|
28 , ncd/4
|
Daniel@0
|
29 , conditional_ncd/5
|
Daniel@0
|
30 , write_bytes/1
|
Daniel@0
|
31 , length_prob/3
|
Daniel@0
|
32 , with_stream_encoding/3
|
Daniel@0
|
33 , with_zlib_stream/1
|
Daniel@0
|
34 , zsync//1
|
Daniel@0
|
35 ]).
|
Daniel@0
|
36
|
Daniel@0
|
37 /** <module> Tools for applying compression programs to abitrary streams.
|
Daniel@0
|
38
|
Daniel@0
|
39 This module provides a framework for applying a range of compression and
|
Daniel@0
|
40 delta compression programs to arbitrary streams generated by Prolog code.
|
Daniel@0
|
41 In particular, the predicates complexity/3 and conditional_complexity/4
|
Daniel@0
|
42 use compression programs to estimate Kolmogorov complexity and conditional
|
Daniel@0
|
43 complexity.
|
Daniel@0
|
44
|
Daniel@0
|
45 Compression programs are invoked by a collection of shell scripts included
|
Daniel@0
|
46 with this software, in the scripts/compression subdirectory of the dml cpack.
|
Daniel@0
|
47 The method to be used is determined by a term of type compression_method:
|
Daniel@0
|
48 ==
|
Daniel@0
|
49 compression_method ---> id % no compression
|
Daniel@0
|
50 ; compress(c_method)
|
Daniel@0
|
51 ; diff(d_method,pred).
|
Daniel@0
|
52 c_method ---> lzma ; gz ; bz.
|
Daniel@0
|
53
|
Daniel@0
|
54 d_method ---> bsdiff
|
Daniel@0
|
55 ; xdelta % using xdelta3 program
|
Daniel@0
|
56 ; vcdiff % using open-vcdiff
|
Daniel@0
|
57 ; vczip % using vczip tools from AT&T
|
Daniel@0
|
58 ; xdiff % using binary to text encoding and diff
|
Daniel@0
|
59 ; zzd(c_method, d_method)
|
Daniel@0
|
60 ; zzcd(c_method, d_method)
|
Daniel@0
|
61 ; dz(d_method, c_method).
|
Daniel@0
|
62 ==
|
Daniel@0
|
63
|
Daniel@0
|
64 Currently we do not subtract an known overheads associated with redundant
|
Daniel@0
|
65 information in compression program outputs, such as 'magic bytes' in a header
|
Daniel@0
|
66 used to enable tools to recognise which program was used to compress a given file.
|
Daniel@0
|
67 */
|
Daniel@0
|
68
|
Daniel@0
|
69 :- use_module(library(settings)).
|
Daniel@0
|
70 :- use_module(library(memo)).
|
Daniel@0
|
71 :- use_module(library(process)).
|
Daniel@0
|
72 :- use_module(library(fileutils)).
|
Daniel@0
|
73 :- use_module(library(swipe)).
|
Daniel@0
|
74 :- use_module(library(dcg_core)).
|
Daniel@0
|
75 :- use_module(library(dcg_codes)).
|
Daniel@0
|
76 :- use_module(library(zlib)).
|
Daniel@0
|
77
|
Daniel@0
|
78 user:file_search_path(compression,dml(scripts/compression)).
|
Daniel@0
|
79
|
Daniel@0
|
80 :- initialization time(memo_attach(memo(compression),[])).
|
Daniel@0
|
81
|
Daniel@0
|
82 %% print_delimited_dcg(+P:phrase(list(code))) is det.
|
Daniel@0
|
83 %
|
Daniel@0
|
84 % Runs the DCG phrase to generate a list of character codes, and then
|
Daniel@0
|
85 % print the textually encoded length of the resulting list, followed by
|
Daniel@0
|
86 % a carriage return, followed by the characters themselves. Thus, the resulting
|
Daniel@0
|
87 % sequence of bytes is self-delimiting.
|
Daniel@0
|
88 print_delimited_dcg(Phrase) :-
|
Daniel@0
|
89 phrase(Phrase,Codes),
|
Daniel@0
|
90 length(Codes,Length),
|
Daniel@0
|
91 format('~d\n~s',[Length,Codes]).
|
Daniel@0
|
92
|
Daniel@0
|
93 :- meta_predicate
|
Daniel@0
|
94 complexity(+,0,-),
|
Daniel@0
|
95 min_complexity(-,0,-),
|
Daniel@0
|
96 conditional_complexity(+,0,0,-),
|
Daniel@0
|
97 min_conditional_complexity(-,0,0,-),
|
Daniel@0
|
98 conditional_ncd(+,0,0,0,-),
|
Daniel@0
|
99 ncd(+,0,0,-).
|
Daniel@0
|
100
|
Daniel@0
|
101 :- meta_predicate with_zlib_stream(//).
|
Daniel@0
|
102
|
Daniel@0
|
103 %% complexity(+Method:compression_method, +Writer:pred, -K:nonneg) is det.
|
Daniel@0
|
104 %% complexity(-Method:compression_method, +Writer:pred, -K:nonneg) is nondet.
|
Daniel@0
|
105 %% complexity(-Method:compression_method, -Writer:pred, -K:nonneg) is nondet.
|
Daniel@0
|
106 %
|
Daniel@0
|
107 % Estimates the Kolmogorov complexity in bits of the sequence of bytes written to the
|
Daniel@0
|
108 % current output stream by the goal Writer. If Writer is unbound on entry, then
|
Daniel@0
|
109 % browses the set of previously computed results, which are stored in a volatile memo.
|
Daniel@0
|
110 %
|
Daniel@0
|
111 % Method can be =|id|= for no compression, or =|compress(M)|= for compression using
|
Daniel@0
|
112 % program M. If Method is a ground term of the form =|diff(M,RefWriter)|=, then
|
Daniel@0
|
113 % complexity is estimated using a delta compression with respect to the bytes written
|
Daniel@0
|
114 % by RefWriter. Methods are described in the module header. Conditional complexity
|
Daniel@0
|
115 % can more conveniently be estimated using conditional_complexity/4.
|
Daniel@0
|
116 complexity(Method,Mod:Writer,K) :-
|
Daniel@0
|
117 ( nonvar(Writer), complexity_method(Method), ground(Method)
|
Daniel@0
|
118 *->complexity1(Method,Mod:Writer,K)
|
Daniel@0
|
119 ; browse(complexity1(Method,Mod:Writer,K))
|
Daniel@0
|
120 ).
|
Daniel@0
|
121
|
Daniel@0
|
122 execable(A) :- var(A), !, fail.
|
Daniel@0
|
123 execable((A,B)) :- !, execable(A), execable(B).
|
Daniel@0
|
124 execable(_).
|
Daniel@0
|
125
|
Daniel@0
|
126 :- volatile_memo min_complexity(-ground,+callable,-number).
|
Daniel@0
|
127 min_complexity(Best,Writer,KMin) :-
|
Daniel@0
|
128 aggregate(min(K,Meth),complexity(Meth,Writer,K),min(KMin,Best)).
|
Daniel@0
|
129
|
Daniel@0
|
130 %% conditional_complexity(+M:cc_method, +RefWriter:pred, +Writer:pred, -K:nonneg) is det.
|
Daniel@0
|
131 %
|
Daniel@0
|
132 % Estimate conditional Kolmogorov complexity of byte sequence produced by Writer
|
Daniel@0
|
133 % given bit sequence produced by RefWriter, using one of several methods:
|
Daniel@0
|
134 % ==
|
Daniel@0
|
135 % cc_method ---> id % no compression, complexity is length of data from Writer
|
Daniel@0
|
136 % ; compress(c_method) % ignore reference sequence and compress
|
Daniel@0
|
137 % ; delta(d_method) % do delta compression using given method
|
Daniel@0
|
138 % ; naive(c_method). % use naive estimate using concatentation method
|
Daniel@0
|
139 % ==
|
Daniel@0
|
140 % The naive estimate of K(x|y) is C(yx) - C(y), where C(.) is compressed length and
|
Daniel@0
|
141 % xy is simply x concatenated with y. It may give nonsensical zero results.
|
Daniel@0
|
142 % It is not the length of any concrete message that can produce x given y.
|
Daniel@0
|
143 conditional_complexity(Method,Ref,Writer,K) :-
|
Daniel@0
|
144 ( Method=delta(Diff), M=diff(Diff,Ref)
|
Daniel@0
|
145 % ; Method=compress(C), M=compress(C)
|
Daniel@0
|
146 % ; Method=compress(zlib), M=zlib
|
Daniel@0
|
147 ; Method=delta(zlib), M=zlib(Ref)
|
Daniel@0
|
148 ; Method=delta(dlzma), M=dlzma(Ref)
|
Daniel@0
|
149 % ; Method=id, M=id
|
Daniel@0
|
150 ),
|
Daniel@0
|
151 complexity(M,Writer,K).
|
Daniel@0
|
152
|
Daniel@0
|
153 conditional_complexity(naive(C),Ref,Writer,K) :-
|
Daniel@0
|
154 complexity(compress(C),Ref,KY),
|
Daniel@0
|
155 complexity(compress(C),(Ref,Writer),KXY),
|
Daniel@0
|
156 K is KXY - KY.
|
Daniel@0
|
157
|
Daniel@0
|
158 ncd(naive(C),Writer1,Writer2,D) :-
|
Daniel@0
|
159 complexity(compress(C),Writer1,K1),
|
Daniel@0
|
160 complexity(compress(C),Writer2,K2),
|
Daniel@0
|
161 complexity(compress(C),(Writer1,Writer2),K12),
|
Daniel@0
|
162 D is (K12 - min(K1,K2))/max(K1,K2).
|
Daniel@0
|
163
|
Daniel@0
|
164 ncd(CM-DM,Writer1,Writer2,D) :-
|
Daniel@0
|
165 complexity(CM,Writer1,K1),
|
Daniel@0
|
166 complexity(CM,Writer2,K2),
|
Daniel@0
|
167 conditional_complexity(DM,Writer1,Writer2,K2g1),
|
Daniel@0
|
168 conditional_complexity(DM,Writer2,Writer1,K1g2),
|
Daniel@0
|
169 D is max(min(K1,K1g2)/K1, min(K2,K2g1)/K2).
|
Daniel@0
|
170
|
Daniel@0
|
171 ncd(opt,Writer1,Writer2,D) :-
|
Daniel@0
|
172 min_complexity(_,Writer1,K1),
|
Daniel@0
|
173 min_complexity(_,Writer2,K2),
|
Daniel@0
|
174 min_conditional_complexity(_,Writer1,Writer2,K2g1),
|
Daniel@0
|
175 min_conditional_complexity(_,Writer2,Writer1,K1g2),
|
Daniel@0
|
176 D is max(min(K1,K1g2)/K1, min(K2,K2g1)/K2).
|
Daniel@0
|
177
|
Daniel@0
|
178 conditional_ncd(naive(Method),Ref,Writer1,Writer2,D) :-
|
Daniel@0
|
179 conditional_complexity(Method,Ref,Writer1,K1),
|
Daniel@0
|
180 conditional_complexity(Method,Ref,Writer2,K2),
|
Daniel@0
|
181 conditional_complexity(Method,Ref,(Writer1,Writer2),K12),
|
Daniel@0
|
182 D is (K12 - min(K1,K2))/max(K1,K2).
|
Daniel@0
|
183
|
Daniel@0
|
184 conditional_ncd(DM,Ref,Writer1,Writer2,D) :-
|
Daniel@0
|
185 conditional_complexity(DM,Ref,Writer1,K1),
|
Daniel@0
|
186 conditional_complexity(DM,Ref,Writer2,K2),
|
Daniel@0
|
187 conditional_complexity(DM,(Ref,Writer1),Writer2,K2g1),
|
Daniel@0
|
188 conditional_complexity(DM,(Ref,Writer2),Writer1,K1g2),
|
Daniel@0
|
189 D is max(min(K1,K1g2)/K1, min(K2,K2g1)/K2).
|
Daniel@0
|
190
|
Daniel@0
|
191 :- volatile_memo min_conditional_complexity(-ground,+callable,+callable,-number).
|
Daniel@0
|
192 min_conditional_complexity(Best,Ref,Writer,KMin) :-
|
Daniel@0
|
193 aggregate(min(K,Meth),(conditional_complexity(Meth,Ref,Writer,K),Meth\=naive(_)), min(KMin,Best)).
|
Daniel@0
|
194
|
Daniel@0
|
195 %% complexity_method(-M:compression_method) is nondet.
|
Daniel@0
|
196 complexity_method(diff(Diff,_)) :- command(diff0(Diff,library('swipe.pl')),_,_).
|
Daniel@0
|
197 complexity_method(compress(Comp)) :- command(compress(Comp),_,_).
|
Daniel@0
|
198 complexity_method(id).
|
Daniel@0
|
199 complexity_method(zlib).
|
Daniel@0
|
200 complexity_method(zlibi).
|
Daniel@0
|
201 complexity_method(zlib(_)).
|
Daniel@0
|
202 complexity_method(dlzma(_)).
|
Daniel@0
|
203
|
Daniel@0
|
204 :- persistent_memo complexity1(+ground,+callable,-number).
|
Daniel@0
|
205 complexity1(id,Writer,K) :- with_transducer(cat,Writer,stream_length(K)).
|
Daniel@0
|
206 complexity1(compress(Meth),Writer,K) :- with_transducer(compress(Meth), Writer, stream_length(K)).
|
Daniel@0
|
207 complexity1(diff(Diff,RefWriter),Writer,K) :-
|
Daniel@0
|
208 with_temp_files([F1],
|
Daniel@0
|
209 ( with_output_to_file(F1,RefWriter),
|
Daniel@0
|
210 with_transducer(diff0(Diff,F1), Writer, stream_length(K)))).
|
Daniel@0
|
211 complexity1(zlib,Writer,K) :-
|
Daniel@0
|
212 with_zlib_stream(call_zsync(Writer,K)).
|
Daniel@0
|
213
|
Daniel@0
|
214 complexity1(zlibi,Writer,K) :-
|
Daniel@0
|
215 with_zlib_stream((call_zsync(write_bytes([255]),_), call_zsync(Writer,K))).
|
Daniel@0
|
216
|
Daniel@0
|
217 complexity1(zlib(Ref),Writer,K) :-
|
Daniel@0
|
218 with_zlib_stream((call_zsync(Ref,_), call_zsync(Writer,K))).
|
Daniel@0
|
219
|
Daniel@0
|
220 complexity1(dlzma(RefWriter),Writer,K) :-
|
Daniel@0
|
221 with_temp_files([F1],
|
Daniel@0
|
222 ( with_output_to_file(F1,RefWriter),
|
Daniel@0
|
223 with_transducer(dlzma(F1,['-b0']), Writer, read_line_to_string(current_input,KString)))),
|
Daniel@0
|
224 number_string(K,KString).
|
Daniel@0
|
225
|
Daniel@0
|
226
|
Daniel@0
|
227 subs(_,_,T1,T2) :- var(T1), !, T1=T2.
|
Daniel@0
|
228 subs(Old,New,T1,T2) :- T1==Old, !, T2=New.
|
Daniel@0
|
229 subs(Old,New,T1,T2) :-
|
Daniel@0
|
230 T1 =.. [F | Args1],
|
Daniel@0
|
231 maplist(subs(Old,New),Args1,Args2),
|
Daniel@0
|
232 T2 =.. [F | Args2].
|
Daniel@0
|
233
|
Daniel@0
|
234
|
Daniel@0
|
235 % this is hopeless...
|
Daniel@0
|
236 length_prob(Meth,L,Prob) :-
|
Daniel@0
|
237 length(X,L),
|
Daniel@0
|
238 aggregate( sum(2**(-8*B)),
|
Daniel@0
|
239 X^(maplist(between(0,255),X),
|
Daniel@0
|
240 with_transducer(compress(Meth), write_bytes(X), stream_length(B)),
|
Daniel@0
|
241 format(user_error, '\r~w : ~w', [X,B])
|
Daniel@0
|
242 ),
|
Daniel@0
|
243 Prob).
|
Daniel@0
|
244
|
Daniel@0
|
245 %% write_bytes(+Bytes:list(between(0,255))) is det.
|
Daniel@0
|
246 % Writes a sequence of bytes to current_output.
|
Daniel@0
|
247 write_bytes(Bytes) :- with_stream_encoding(current_output,octet,maplist(put_code,Bytes)).
|
Daniel@0
|
248
|
Daniel@0
|
249
|
Daniel@0
|
250 % -------------------------------- Using ZLIB ------------------------------
|
Daniel@0
|
251
|
Daniel@0
|
252
|
Daniel@0
|
253 with_zlib_stream(Phrase) :-
|
Daniel@0
|
254 setup_call_cleanup(
|
Daniel@0
|
255 open_null_stream(Out),
|
Daniel@0
|
256 setup_call_cleanup(
|
Daniel@0
|
257 zopen(Out,ZOut,[level(9),close_parent(false)]),
|
Daniel@0
|
258 with_output_to(ZOut,call_dcg(Phrase,Out-0,_)),
|
Daniel@0
|
259 close(ZOut)),
|
Daniel@0
|
260 close(Out)).
|
Daniel@0
|
261
|
Daniel@0
|
262 zpos(Pos,Out-Pos,Out-Pos).
|
Daniel@0
|
263 zsync(Bits,Out-Pos0,Out-Pos1) :-
|
Daniel@0
|
264 flush_output,
|
Daniel@0
|
265 byte_count(Out,Pos1),
|
Daniel@0
|
266 ( Pos1=Pos0 -> Bits=0 % no data since last sync
|
Daniel@0
|
267 ; Bits is 8*((Pos1-Pos0)-4)-(3+3) % subtract estimate of final type 0 block.
|
Daniel@0
|
268 ).
|
Daniel@0
|
269
|
Daniel@0
|
270 call_zsync(Goal,Delta) --> {call(Goal)}, zsync(Delta).
|
Daniel@0
|
271
|
Daniel@0
|
272
|
Daniel@0
|
273 % ------ Method comparison -----
|
Daniel@0
|
274 evaluate_delta(KRel,Method) :-
|
Daniel@0
|
275 call_with_mode(browse, setof(K-Meth,conditional_complexity(Meth,_:_,_:_,K),KMeths)),
|
Daniel@0
|
276 memberchk(_-delta(_),KMeths),
|
Daniel@0
|
277 exclude(eval_exclude,KMeths,KMeths1),
|
Daniel@0
|
278 KMeths1 = [KBest-_|_],
|
Daniel@0
|
279 member(K-Method,KMeths1),
|
Daniel@0
|
280 KRel is K/KBest.
|
Daniel@0
|
281
|
Daniel@0
|
282 :- public method_rankings/2.
|
Daniel@0
|
283 method_rankings(Method,Scores) :-
|
Daniel@0
|
284 setof( Method-KRels,
|
Daniel@0
|
285 bagof(KRel, evaluate_delta(KRel,Method), KRels),
|
Daniel@0
|
286 AllScores),
|
Daniel@0
|
287 member(Method-Scores,AllScores).
|
Daniel@0
|
288
|
Daniel@0
|
289 eval_exclude(_-Method) :- exclude_method(Method).
|
Daniel@0
|
290 exclude_method(naive(_)).
|
Daniel@0
|
291 exclude_method(delta(DMethod)) :- exclude_dmethod(DMethod).
|
Daniel@0
|
292 exclude_dmethod(xdiff).
|
Daniel@0
|
293 exclude_dmethod(dz(DM,_)) :- exclude_dmethod(DM).
|
Daniel@0
|
294 exclude_dmethod(zzcd(_,DM)) :- exclude_dmethod(DM).
|
Daniel@0
|
295 exclude_dmethod(zzd(_,DM)) :- exclude_dmethod(DM).
|
Daniel@0
|
296
|
Daniel@0
|
297 delta_method(compress(C),compress(C)).
|
Daniel@0
|
298 delta_method(delta(M),delta(M)).
|
Daniel@0
|
299 delta_method(id,id).
|
Daniel@0
|
300
|
Daniel@0
|
301
|
Daniel@0
|
302 % ----------------------- Compression framework --------------------------------------
|
Daniel@0
|
303
|
Daniel@0
|
304
|
Daniel@0
|
305 swipe:def(Pipe,Def) :- def(Pipe,Def).
|
Daniel@0
|
306
|
Daniel@0
|
307 def( findcat(Dir), sh(0 >> $_, '~s ~s',[compression(findcat)+execute, file(Dir,[file_type(directory)])])).
|
Daniel@0
|
308 def( humdump(Dir), findcat(Dir) >> sh($hum >> $hum,'rid -G')).
|
Daniel@0
|
309 def( length, sh($_ >> $number, 'wc -c')).
|
Daniel@0
|
310 def( prepend(File^T), sh($T >> $T,'cat ~s -',[File+read])).
|
Daniel@0
|
311 def( prepend(_<Pipe), Pipe * cat). % sh($T >> $T,'cat ~s -',[T<Pipe])).
|
Daniel@0
|
312 def( unprepend(File^T), sh($T >> $T, 'tail -c +$((1+~s))',[$(File^T :> length)])).
|
Daniel@0
|
313 def( unprepend(T<Pipe), sh($T >> $T, 'tail -c +$((1+~s))',[$(Pipe >> length)])).
|
Daniel@0
|
314 def( compress(Method), sh( $X >> $z(Method,X), Cmd)) :- compressor(Method,Cmd,_).
|
Daniel@0
|
315 def( decompress(Method), sh( $z(Method,X) >> $X, Cmd)) :- compressor(Method,_,Cmd).
|
Daniel@0
|
316
|
Daniel@0
|
317 def( diff0(Diff,Ref), diff(Diff,Ref)).
|
Daniel@0
|
318 % def( diff0(dz(Diff,Comp),Ref), diff(Diff,Ref) >> compress(Comp)).
|
Daniel@0
|
319 def( diff(Diff,Ref), diff(Diff, encode, Ref+read)).
|
Daniel@0
|
320 % def( diff(zzd(Comp,Diff),Ref), compress(Comp) >> buf(2,diff(Diff, encode, T<(Ref^T :> compress(Comp))))).
|
Daniel@0
|
321 def( diff(zzcd(Comp,Diff),Ref), prepend(Ref^T) >> compress(Comp) >> buf(2,diff(Diff, encode, _<(Ref^T :> compress(Comp))))).
|
Daniel@0
|
322
|
Daniel@0
|
323 def( patch(Method,Ref), diff(Method, decode, Ref+read)).
|
Daniel@0
|
324 def( patch(zzd(Comp,Diff),Ref), buf(2,diff(Diff, decode, _<(Ref^_ :> compress(Comp)))) >> decompress(Comp)).
|
Daniel@0
|
325 def( patch(zzcd(Comp,Diff),Ref), buf(2,diff(Diff, decode, _<(Ref^T :> compress(Comp)))) >> decompress(Comp) >> unprepend(Ref^T)).
|
Daniel@0
|
326
|
Daniel@0
|
327 def( diff(Method,encode,RefSource), sh( $X >> $dz(Method,X), [compression(Script)+execute, encode, RefSource | Args])) :- differ(Method,Script,Args,_).
|
Daniel@0
|
328 def( diff(Method,decode,RefSource), sh( $dz(Method,X) >> $X, [compression(Script)+execute, decode, RefSource | Args])) :- differ(Method,Script,_,Args), Method\=zvcz.
|
Daniel@0
|
329 def( buf(N,Pipe), sh(Type,'~s ~d ~s',[compression(bufs)+execute, \N, \Cmd])) :- command(Pipe,Type,Cmd).
|
Daniel@0
|
330
|
Daniel@0
|
331 def( diff1(Diff,Ref), diff2(Diff, Ref)).
|
Daniel@0
|
332 def( diff1(zzd(Comp,Diff),Ref), compress(Comp) >> diff2(Diff, Ref >> compress(Comp))).
|
Daniel@0
|
333 def( diff1(zzcd(Comp,Diff),Ref), prepend(_<Ref) >> compress(Comp) >> diff2(Diff, Ref >> compress(Comp))).
|
Daniel@0
|
334 def( patch1(Diff,Ref), patch2(Diff, Ref)).
|
Daniel@0
|
335 def( patch1(zzd(Comp,Diff),Ref), patch2(Diff, Ref >> compress(Comp)) >> decompress(Comp)).
|
Daniel@0
|
336 def( patch1(zzcd(Comp,Diff),Ref), patch2(Diff, Ref >> compress(Comp)) >> decompress(Comp) >> unprepend(_<Ref)).
|
Daniel@0
|
337
|
Daniel@0
|
338 def( diff2(Method,Ref), sh( $X >> $dz(Method,X), [compression(encode)+execute, Script, X<Ref | Args])) :- differ(Method,Script,Args,_).
|
Daniel@0
|
339 def( patch2(Method,Ref), sh( $dz(Method,X) >> $X, [compression(decode)+execute, Script, X<Ref | Args])) :- differ(Method,Script,_,Args), Method\=zvcz.
|
Daniel@0
|
340
|
Daniel@0
|
341 def( dlzma(Ref,Opts), sh($X >> $dz(dlzma,X), [compression(dlzma)+execute | Args])) :-
|
Daniel@0
|
342 append(Opts,[Ref],Args).
|
Daniel@0
|
343
|
Daniel@0
|
344 % compressor(xz,xz,xzcat).
|
Daniel@0
|
345 compressor(lzma,lzma,lzcat).
|
Daniel@0
|
346 compressor(xzraw,'xz -q -F raw','xzcat -F raw').
|
Daniel@0
|
347 compressor(gz,gzip,zcat).
|
Daniel@0
|
348 compressor(bz,bzip2,bzcat).
|
Daniel@0
|
349
|
Daniel@0
|
350 contains(Diff,Diff).
|
Daniel@0
|
351 contains(dz(D1,_),D2) :- contains(D1,D2).
|
Daniel@0
|
352 contains(zzd(_,D1),D2) :- contains(D1,D2).
|
Daniel@0
|
353 contains(zzcd(_,D1),D2) :- contains(D1,D2).
|
Daniel@0
|
354
|
Daniel@0
|
355
|
Daniel@0
|
356 % :- setting(max_vczip_chain_length, nonneg, 2, 'Maximum length of vczip processing chain').
|
Daniel@0
|
357
|
Daniel@0
|
358 differ(Method,Exec,[],[]) :- differ(Method,Exec).
|
Daniel@0
|
359 differ(Method-Opts, Exec, EncArgs, []) :- differ(Method,Exec), options_for(Method,Opts,EncArgs).
|
Daniel@0
|
360 differ(vczip(vcdiff),zvcz,['-Vcdiff'],[]).
|
Daniel@0
|
361 differ(vczip(delta),zvcz,['-mdelta'],[]).
|
Daniel@0
|
362 differ(vczip(sieve),zvcz,['-msieve.delta'],[]).
|
Daniel@0
|
363 % differ(vczip(Delta,Chain,Encode),zvcz,[\Codes],[]) :-
|
Daniel@0
|
364 % setting(max_vczip_chain_length,N),
|
Daniel@0
|
365 % between(1,N,L),
|
Daniel@0
|
366 % length(Chain,L),
|
Daniel@0
|
367 % vczip_chain(Delta,Chain,Encode,Codes,[]).
|
Daniel@0
|
368
|
Daniel@0
|
369 % differ(bsdiff,zbs).
|
Daniel@0
|
370 % differ(xdelta,zxd).
|
Daniel@0
|
371 differ(vcdiff,zvcd).
|
Daniel@0
|
372 % differ(vczip,zvcz).
|
Daniel@0
|
373 % differ(xdiff,zdiff).
|
Daniel@0
|
374
|
Daniel@0
|
375 options_for(Method,Opts,Args) :-
|
Daniel@0
|
376 Opts=[_|_],
|
Daniel@0
|
377 setof(opt(Opt,Gen,A1,A2),method_option(Method,Opt,Gen,A1,A2),Possible),
|
Daniel@0
|
378 seqmap(maybe_option,Possible,Opts-Args,[]-[]).
|
Daniel@0
|
379
|
Daniel@0
|
380 maybe_option(_) --> [].
|
Daniel@0
|
381 maybe_option(opt(Opt,Gen,A1,A2),[Opt|Opts]-A1,Opts-A2) :- call(Gen).
|
Daniel@0
|
382
|
Daniel@0
|
383 method_option(xdelta, secondary(A), member(A,[djw,fgk])) --> ['-S',A].
|
Daniel@0
|
384 method_option(vcdiff, target_matches, true) --> ['-target_matches'].
|
Daniel@0
|
385
|
Daniel@0
|
386 % vczip_chain(Delta,Chain,Encode) -->
|
Daniel@0
|
387 % "-m",
|
Daniel@0
|
388 % vczip_delta(Delta), ",",
|
Daniel@0
|
389 % seqmap_with_sep(",",vczip_transform,Chain), ",",
|
Daniel@0
|
390 % vczip_encode(Encode).
|
Daniel@0
|
391
|
Daniel@0
|
392 % vczip_delta(delta) --> "delta".
|
Daniel@0
|
393 % vczip_delta(sieve) --> "sieve.delta".
|
Daniel@0
|
394
|
Daniel@0
|
395 % vczip_transform(bwt) --> "bwt".
|
Daniel@0
|
396 % vczip_transform(mtf) --> "mtf".
|
Daniel@0
|
397 % vczip_transform(rle) --> "rle".
|
Daniel@0
|
398 % vczip_transform(rle(N)) --> "rle.", {between(0,1,N)}, at(N).
|
Daniel@0
|
399
|
Daniel@0
|
400 % vczip_encode(huffman) --> "huffman".
|
Daniel@0
|
401 % vczip_encode(huffgroup) --> "huffgroup".
|
Daniel@0
|
402 % vczip_encode(huffpart) --> "huffpart".
|
Daniel@0
|
403
|
Daniel@0
|
404
|
Daniel@0
|
405 compressor_overhead(gz,18). % see http://www.onicos.com/staff/iz/formats/gzip.html, https://tools.ietf.org/html/rfc1952
|
Daniel@0
|
406 compressor_overhead(bz,4). % http://en.wikipedia.org/wiki/Bzip2 (need to know number of blocks for better estimate)
|
Daniel@0
|
407 compressor_overhead(lzma,0). % http://svn.python.org/projects/external/xz-5.0.3/doc/lzma-file-format.txt
|
Daniel@0
|
408
|
Daniel@0
|
409 % ------------------------- Pipe and file tools ----------------------------------
|
Daniel@0
|
410
|
Daniel@0
|
411 splice_in(Pipe) :- with_pipe_input(S,Pipe,copy_stream_data(current_input,S)).
|
Daniel@0
|
412 splice_out(Pipe) :-
|
Daniel@0
|
413 with_pipe_output(S,Pipe,
|
Daniel@0
|
414 ( with_stream_encoding(S,octet,
|
Daniel@0
|
415 with_stream_encoding(current_output,octet,
|
Daniel@0
|
416 copy_stream_data(S,current_output))))).
|
Daniel@0
|
417
|
Daniel@0
|
418 :- op(1050,xfy,&).
|
Daniel@0
|
419
|
Daniel@0
|
420
|
Daniel@0
|
421 %% with_transducer(+P:pipe, +Writer:pred, +Reader:pred) is det.
|
Daniel@0
|
422 % Runs a pipeline defined using the framework provided by library(swipe),
|
Daniel@0
|
423 % while concurrently sending data written by Writer to the process
|
Daniel@0
|
424 % on its standard input and making data from the standard output of the
|
Daniel@0
|
425 % process available to Reader on current_input.
|
Daniel@0
|
426 :- meta_predicate with_transducer(+,0,0).
|
Daniel@0
|
427 with_transducer(Pipe,Writer,Reader) :-
|
Daniel@0
|
428 with_pipe_io( To-From, Pipe,
|
Daniel@0
|
429 ( call_cleanup(with_output_to(To,Writer), close(To))
|
Daniel@0
|
430 & call_cleanup(with_input_from(From,Reader), close(From))
|
Daniel@0
|
431 )).
|
Daniel@0
|
432
|
Daniel@0
|
433
|
Daniel@0
|
434 A & B :- concurrent(2, [A,B], []).
|
Daniel@0
|
435
|
Daniel@0
|
436 %% with_stream_encoding(+S:stream,+E:encoding,+G:pred) is det.
|
Daniel@0
|
437 % Call goall G with encoding of stream S temporarily set to E. Encoding
|
Daniel@0
|
438 % is restored afterwards.
|
Daniel@0
|
439 :- meta_predicate with_stream_encoding(+,+,0).
|
Daniel@0
|
440 with_stream_encoding(S,Enc,Goal) :-
|
Daniel@0
|
441 stream_property(S,encoding(Old)),
|
Daniel@0
|
442 setup_call_cleanup(set_stream(S,encoding(Enc)), Goal, set_stream(S,encoding(Old))).
|
Daniel@0
|
443
|
Daniel@0
|
444 %% stream_length(-L:natural) is det.
|
Daniel@0
|
445 % Reads all the data available on Prolog stream current_input and returns the
|
Daniel@0
|
446 % number of bits consumed.
|
Daniel@0
|
447 stream_length(S,Bits) :- with_input_from(S,stream_length(Bits)).
|
Daniel@0
|
448 stream_length(Bits) :-
|
Daniel@0
|
449 with_stream_encoding(current_input,octet,accum_length(0,Bytes)),
|
Daniel@0
|
450 Bits is 8*Bytes.
|
Daniel@0
|
451
|
Daniel@0
|
452 accum_length(L,L) :- at_end_of_stream, !.
|
Daniel@0
|
453 accum_length(L1,L3) :-
|
Daniel@0
|
454 read_pending_input(current_input, Codes, []),
|
Daniel@0
|
455 length(Codes,L),
|
Daniel@0
|
456 L2 is L1 + L,
|
Daniel@0
|
457 accum_length(L2,L3).
|
Daniel@0
|
458
|
Daniel@0
|
459 with_temp_files(Files,Goal) :-
|
Daniel@0
|
460 must_be(list(var),Files),
|
Daniel@0
|
461 setup_call_cleanup(
|
Daniel@0
|
462 maplist(tmp_file_stream(binary),Files,Streams),
|
Daniel@0
|
463 (maplist(close,Streams), call(Goal)),
|
Daniel@0
|
464 maplist(delete_file,Files)).
|