d@41
|
1 (in-package "AMUSE-UTILS")
|
d@41
|
2
|
d@41
|
3 (defun get-n-grams (state-sequence start-n &optional (finish-n nil))
|
d@41
|
4 (let ((memory (make-array (or finish-n start-n) :initial-element :BLANK))
|
d@41
|
5 (pointer 0) (step 0)
|
d@41
|
6 (n-grams (make-array (or finish-n start-n)
|
d@41
|
7 :initial-element nil))
|
d@41
|
8 (current-sequence) (last-time))
|
d@41
|
9 (dolist (state state-sequence n-grams)
|
d@41
|
10 (setf (aref memory pointer) state
|
d@41
|
11 current-sequence nil
|
d@41
|
12 last-time nil
|
d@41
|
13 step (1+ step))
|
d@41
|
14 (do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory))))
|
d@41
|
15 (last-time (setf current-sequence (reverse current-sequence)))
|
d@41
|
16 (when (= pointer i)
|
d@41
|
17 (setf last-time t))
|
d@41
|
18 (push (aref memory i) current-sequence))
|
d@41
|
19 (setf pointer (mod (1+ pointer) (length memory)))
|
d@41
|
20 (loop for i from start-n to (or finish-n start-n)
|
d@41
|
21 do (when (> step i)
|
d@41
|
22 (unless (aref n-grams (1- i))
|
d@41
|
23 (setf (aref n-grams (1- i)) (make-hash-table :test #'equal)))
|
d@41
|
24 (if (gethash (subseq current-sequence (- (or finish-n start-n) i))
|
d@41
|
25 (aref n-grams (1- i)))
|
d@41
|
26 (incf (gethash (subseq current-sequence (- (or finish-n start-n) i))
|
d@41
|
27 (aref n-grams (1- i))))
|
d@41
|
28 (setf (gethash (subseq current-sequence (- (or finish-n start-n) i))
|
d@41
|
29 (aref n-grams (1- i)))
|
d@41
|
30 1)))))))
|
d@41
|
31
|
d@41
|
32 (defun get-n-gram (state-sequence n)
|
d@41
|
33 (aref (get-n-grams state-sequence n) (1- n)))
|
d@41
|
34
|
d@41
|
35 (defun n-gram-stats (n-gram &key (alphabet-size nil))
|
d@41
|
36 (let ((count 0)
|
d@41
|
37 (frequencies)
|
d@41
|
38 (n))
|
d@41
|
39 (maphash #'(lambda (key val)
|
d@41
|
40 (push (cons key val) frequencies)
|
d@41
|
41 (incf count val)
|
d@41
|
42 (unless n
|
d@41
|
43 (setf n (length key))))
|
d@41
|
44 n-gram)
|
d@41
|
45 (if alphabet-size
|
d@41
|
46 (values count (hash-table-count n-gram) frequencies (/ (hash-table-count n-gram)
|
d@41
|
47 (expt alphabet-size n)))
|
d@41
|
48 (values count (hash-table-count n-gram) frequencies))))
|