changeset 50:49aae39b96dc

Update to (in wrong place) n-grams.lisp darcs-hash:20070620110403-f76cc-67a5d10a8ee11b3c036448b1fb1f8a740e52bc32.gz
author David Lewis <d.lewis@gold.ac.uk>
date Wed, 20 Jun 2007 12:04:03 +0100
parents ce12c691e661
children 894fb5156603
files utils/n-grams.lisp
diffstat 1 files changed, 27 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/utils/n-grams.lisp	Fri Jun 15 13:24:08 2007 +0100
+++ b/utils/n-grams.lisp	Wed Jun 20 12:04:03 2007 +0100
@@ -1,33 +1,37 @@
 (in-package "AMUSE-UTILS")
 
-(defun get-n-grams (state-sequence start-n &optional (finish-n nil))
+(defun get-n-grams (state-sequence start-n &optional (finish-n nil) (test-for-duplicates nil))
   (let ((memory (make-array (or finish-n start-n) :initial-element :BLANK))
-	(pointer 0) (step 0)
+	(pointer 0) (step 0) (prev-state)
 	(n-grams (make-array (or finish-n start-n)
 			     :initial-element nil))
 	(current-sequence) (last-time))
     (dolist (state state-sequence n-grams)
-      (setf (aref memory pointer) state
-	    current-sequence nil
-	    last-time nil
-	    step (1+ step))
-      (do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory))))
-	  (last-time (setf current-sequence (reverse current-sequence)))
-	(when (= pointer i)
-	  (setf last-time t))
-	(push (aref memory i) current-sequence))
-      (setf pointer (mod (1+ pointer) (length memory)))
-      (loop for i from start-n to (or finish-n start-n)
-	 do (when (> step i)
-	      (unless (aref n-grams (1- i))
-		(setf (aref n-grams (1- i)) (make-hash-table :test #'equal)))
-	      (if (gethash (subseq current-sequence (- (or finish-n start-n) i))
-			   (aref n-grams (1- i)))
-		  (incf (gethash (subseq current-sequence (- (or finish-n start-n) i))
-				 (aref n-grams (1- i))))
-		  (setf (gethash (subseq current-sequence (- (or finish-n start-n) i))
-				 (aref n-grams (1- i)))
-			1)))))))
+      (unless (and test-for-duplicates
+		   prev-state
+		   (funcall test-for-duplicates state prev-state))
+	(setf (aref memory pointer) state
+	      prev-state state
+	      current-sequence nil
+	      last-time nil
+	      step (1+ step))
+	(do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory))))
+	    (last-time (setf current-sequence (reverse current-sequence)))
+	  (when (= pointer i)
+	    (setf last-time t))
+	  (push (aref memory i) current-sequence))
+	(setf pointer (mod (1+ pointer) (length memory)))
+	(loop for i from start-n to (or finish-n start-n)
+	   do (when (> step i)
+		(unless (aref n-grams (1- i))
+		  (setf (aref n-grams (1- i)) (make-hash-table :test #'equal)))
+		(if (gethash (subseq current-sequence (- (or finish-n start-n) i))
+			     (aref n-grams (1- i)))
+		    (incf (gethash (subseq current-sequence (- (or finish-n start-n) i))
+				   (aref n-grams (1- i))))
+		    (setf (gethash (subseq current-sequence (- (or finish-n start-n) i))
+				   (aref n-grams (1- i)))
+			  1))))))))
 
 (defun get-n-gram (state-sequence n)
   (aref (get-n-grams state-sequence n) (1- n)))