d@41: (in-package "AMUSE-UTILS") d@41: d@41: (defun get-n-grams (state-sequence start-n &optional (finish-n nil)) d@41: (let ((memory (make-array (or finish-n start-n) :initial-element :BLANK)) d@41: (pointer 0) (step 0) d@41: (n-grams (make-array (or finish-n start-n) d@41: :initial-element nil)) d@41: (current-sequence) (last-time)) d@41: (dolist (state state-sequence n-grams) d@41: (setf (aref memory pointer) state d@41: current-sequence nil d@41: last-time nil d@41: step (1+ step)) d@41: (do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory)))) d@41: (last-time (setf current-sequence (reverse current-sequence))) d@41: (when (= pointer i) d@41: (setf last-time t)) d@41: (push (aref memory i) current-sequence)) d@41: (setf pointer (mod (1+ pointer) (length memory))) d@41: (loop for i from start-n to (or finish-n start-n) d@41: do (when (> step i) d@41: (unless (aref n-grams (1- i)) d@41: (setf (aref n-grams (1- i)) (make-hash-table :test #'equal))) d@41: (if (gethash (subseq current-sequence (- (or finish-n start-n) i)) d@41: (aref n-grams (1- i))) d@41: (incf (gethash (subseq current-sequence (- (or finish-n start-n) i)) d@41: (aref n-grams (1- i)))) d@41: (setf (gethash (subseq current-sequence (- (or finish-n start-n) i)) d@41: (aref n-grams (1- i))) d@41: 1))))))) d@41: d@41: (defun get-n-gram (state-sequence n) d@41: (aref (get-n-grams state-sequence n) (1- n))) d@41: d@41: (defun n-gram-stats (n-gram &key (alphabet-size nil)) d@41: (let ((count 0) d@41: (frequencies) d@41: (n)) d@41: (maphash #'(lambda (key val) d@41: (push (cons key val) frequencies) d@41: (incf count val) d@41: (unless n d@41: (setf n (length key)))) d@41: n-gram) d@41: (if alphabet-size d@41: (values count (hash-table-count n-gram) frequencies (/ (hash-table-count n-gram) d@41: (expt alphabet-size n))) d@41: (values count (hash-table-count n-gram) frequencies))))