# HG changeset patch # User David Lewis # Date 1182337443 -3600 # Node ID 49aae39b96dcc06fe24298d86baa24c39924547f # Parent ce12c691e661a379fe802aea9cdd61922af40958 Update to (in wrong place) n-grams.lisp darcs-hash:20070620110403-f76cc-67a5d10a8ee11b3c036448b1fb1f8a740e52bc32.gz diff -r ce12c691e661 -r 49aae39b96dc utils/n-grams.lisp --- a/utils/n-grams.lisp Fri Jun 15 13:24:08 2007 +0100 +++ b/utils/n-grams.lisp Wed Jun 20 12:04:03 2007 +0100 @@ -1,33 +1,37 @@ (in-package "AMUSE-UTILS") -(defun get-n-grams (state-sequence start-n &optional (finish-n nil)) +(defun get-n-grams (state-sequence start-n &optional (finish-n nil) (test-for-duplicates nil)) (let ((memory (make-array (or finish-n start-n) :initial-element :BLANK)) - (pointer 0) (step 0) + (pointer 0) (step 0) (prev-state) (n-grams (make-array (or finish-n start-n) :initial-element nil)) (current-sequence) (last-time)) (dolist (state state-sequence n-grams) - (setf (aref memory pointer) state - current-sequence nil - last-time nil - step (1+ step)) - (do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory)))) - (last-time (setf current-sequence (reverse current-sequence))) - (when (= pointer i) - (setf last-time t)) - (push (aref memory i) current-sequence)) - (setf pointer (mod (1+ pointer) (length memory))) - (loop for i from start-n to (or finish-n start-n) - do (when (> step i) - (unless (aref n-grams (1- i)) - (setf (aref n-grams (1- i)) (make-hash-table :test #'equal))) - (if (gethash (subseq current-sequence (- (or finish-n start-n) i)) - (aref n-grams (1- i))) - (incf (gethash (subseq current-sequence (- (or finish-n start-n) i)) - (aref n-grams (1- i)))) - (setf (gethash (subseq current-sequence (- (or finish-n start-n) i)) - (aref n-grams (1- i))) - 1))))))) + (unless (and test-for-duplicates + prev-state + (funcall test-for-duplicates state prev-state)) + (setf (aref memory pointer) state + prev-state state + current-sequence nil + last-time nil + step (1+ step)) + (do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory)))) + (last-time (setf current-sequence (reverse current-sequence))) + (when (= pointer i) + (setf last-time t)) + (push (aref memory i) current-sequence)) + (setf pointer (mod (1+ pointer) (length memory))) + (loop for i from start-n to (or finish-n start-n) + do (when (> step i) + (unless (aref n-grams (1- i)) + (setf (aref n-grams (1- i)) (make-hash-table :test #'equal))) + (if (gethash (subseq current-sequence (- (or finish-n start-n) i)) + (aref n-grams (1- i))) + (incf (gethash (subseq current-sequence (- (or finish-n start-n) i)) + (aref n-grams (1- i)))) + (setf (gethash (subseq current-sequence (- (or finish-n start-n) i)) + (aref n-grams (1- i))) + 1)))))))) (defun get-n-gram (state-sequence n) (aref (get-n-grams state-sequence n) (1- n)))