Mercurial > hg > amuse
view utils/n-grams.lisp @ 41:90abdf9adb60
monodising and some n-gram utilities
darcs-hash:20070614140028-f76cc-9bdeba6db4097e425b1fee4f58a3327eeb486685.gz
author | David Lewis <d.lewis@gold.ac.uk> |
---|---|
date | Thu, 14 Jun 2007 15:00:28 +0100 |
parents | |
children | 49aae39b96dc |
line wrap: on
line source
(in-package "AMUSE-UTILS") (defun get-n-grams (state-sequence start-n &optional (finish-n nil)) (let ((memory (make-array (or finish-n start-n) :initial-element :BLANK)) (pointer 0) (step 0) (n-grams (make-array (or finish-n start-n) :initial-element nil)) (current-sequence) (last-time)) (dolist (state state-sequence n-grams) (setf (aref memory pointer) state current-sequence nil last-time nil step (1+ step)) (do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory)))) (last-time (setf current-sequence (reverse current-sequence))) (when (= pointer i) (setf last-time t)) (push (aref memory i) current-sequence)) (setf pointer (mod (1+ pointer) (length memory))) (loop for i from start-n to (or finish-n start-n) do (when (> step i) (unless (aref n-grams (1- i)) (setf (aref n-grams (1- i)) (make-hash-table :test #'equal))) (if (gethash (subseq current-sequence (- (or finish-n start-n) i)) (aref n-grams (1- i))) (incf (gethash (subseq current-sequence (- (or finish-n start-n) i)) (aref n-grams (1- i)))) (setf (gethash (subseq current-sequence (- (or finish-n start-n) i)) (aref n-grams (1- i))) 1))))))) (defun get-n-gram (state-sequence n) (aref (get-n-grams state-sequence n) (1- n))) (defun n-gram-stats (n-gram &key (alphabet-size nil)) (let ((count 0) (frequencies) (n)) (maphash #'(lambda (key val) (push (cons key val) frequencies) (incf count val) (unless n (setf n (length key)))) n-gram) (if alphabet-size (values count (hash-table-count n-gram) frequencies (/ (hash-table-count n-gram) (expt alphabet-size n))) (values count (hash-table-count n-gram) frequencies))))