Mercurial > hg > amuse
view utils/n-grams.lisp @ 330:2fbff655ba47 tip
Removed cpitch-adj and cents SQL columns
author | Jeremy Gow <jeremy.gow@gmail.com> |
---|---|
date | Mon, 21 Jan 2013 11:08:11 +0000 |
parents | 5e362d998f29 |
children |
line wrap: on
line source
(in-package "AMUSE-UTILS") (defun get-n-grams (state-sequence start-n &optional (finish-n nil) (test-for-duplicates nil)) "Takes a state sequence and an order, or the bounding indices for a range of orders, and returns an array of n-gram-frequency hash tables. The order of n-gram is used as the index for the n-grams in the returned array. N-grams are represented as equal hash tables with lists of states as the keys and counts as values. If test-for-duplicates is provided, it is assumed that a) consecutive instances of a state to not count as an interesting transition for noting and b) that (funcall test-for-duplicates state1 state2) returns true if the states are the same by a relevant measure. FIXME: Why are these optionals, not keys?" (let ((memory (make-array (or finish-n start-n) :initial-element :BLANK)) (pointer 0) (step 1) (prev-state) (n-grams (make-array (or finish-n start-n) :initial-element nil)) (current-sequence) (last-time)) (dolist (state state-sequence n-grams) (unless (and test-for-duplicates prev-state (funcall test-for-duplicates state prev-state)) (setf (aref memory pointer) state prev-state state current-sequence nil last-time nil) (do ((i (mod (1+ pointer) (length memory)) (mod (1+ i) (length memory)))) (last-time (setf current-sequence (reverse current-sequence))) (when (= pointer i) (setf last-time t)) (push (aref memory i) current-sequence)) (setf pointer (mod (1+ pointer) (length memory))) (loop for i from start-n to (or finish-n start-n) do (when (>= step i) (unless (aref n-grams (1- i)) (setf (aref n-grams (1- i)) (make-hash-table :test #'equal))) (if (gethash (subseq current-sequence (- (or finish-n start-n) i)) (aref n-grams (1- i))) (incf (gethash (subseq current-sequence (- (or finish-n start-n) i)) (aref n-grams (1- i)))) (setf (gethash (subseq current-sequence (- (or finish-n start-n) i)) (aref n-grams (1- i))) 1))))) (incf step)))) (defun get-n-gram (state-sequence n) "Uses get-n-grams to return a single order n n-gram from state-sequence" (aref (get-n-grams state-sequence n) (1- n))) (defun n-gram-stats (n-gram &key (alphabet-size nil)) "Summarises the vital statistics of an n-gram, returning values for the total number of entries, the number of different n-grams occurring and a list of the hash table contents as a list of lists. If alphabet-size is provided, an additional value is returned for the proportion of n-grams occurring as compared with the number possible with alphabet-size symbols." (let ((count 0) (frequencies) (n)) (maphash #'(lambda (key val) (push (cons key val) frequencies) (incf count val) (unless n (setf n (length key)))) n-gram) (if alphabet-size (values count (hash-table-count n-gram) frequencies (/ (hash-table-count n-gram) (expt alphabet-size n))) (values count (hash-table-count n-gram) frequencies))))