changeset 37:9aeb5bff013a

Added levenshtein distance with cost functions darcs-hash:20070516124658-f76cc-8b96080b5c4018f2077650697bec4d1213917505.gz
author David Lewis <d.lewis@gold.ac.uk>
date Wed, 16 May 2007 13:46:58 +0100
parents ad321ce17e3e
children 163c5cb24993
files utils/package.lisp utils/utils.lisp
diffstat 2 files changed, 38 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/utils/package.lisp	Fri May 11 13:09:16 2007 +0100
+++ b/utils/package.lisp	Wed May 16 13:46:58 2007 +0100
@@ -23,4 +23,5 @@
 	   #:get-velocity-for-midi
 	   #:vector-correlation
 	   #:krumhansl-key-finder
+	   #:levenshtein-distance
 	   ))
--- a/utils/utils.lisp	Fri May 11 13:09:16 2007 +0100
+++ b/utils/utils.lisp	Wed May 16 13:46:58 2007 +0100
@@ -146,3 +146,40 @@
 	       (setf key (list i :minor)
 		     best-score (aref minors i))))
     key))
+
+(defun levenshtein-distance (s1 s2 &key (insertion-cost 1)
+			  (insertion-function) (deletion-cost 1)
+			  (deletion-function) (substitution-cost 1)
+			  (substitution-test #'equal) (substitution-function))
+  ;; This is an implementation of the Levenshtein distance measure
+  ;; based on the cliki asdf package, itself based on the wikipedia
+  ;; scheme example of the same algorithm. This version is generalised
+  ;; such that operations costs may take constant or calculated
+  ;; values. If insertion-function, deletion-function or
+  ;; substitution-test are specified, the applicable cost values are
+  ;; ignored and the function output is used instead.
+  (let* ((width (1+ (length s1)))
+	 (height (1+ (length s2)))
+	 (d (make-array (list height width))))
+    (dotimes (x width)
+      (setf (aref d 0 x) (* x deletion-cost)))
+    (dotimes (y height)
+      (setf (aref d y 0) (* y insertion-cost)))
+    (dotimes (x (length s1))
+      (dotimes (y (length s2))
+	(setf (aref d (1+ y) (1+ x))
+	      (min (+ (if insertion-function
+			  (apply insertion-function (elt s1 x))
+			  insertion-cost)
+		      (aref d y (1+ x)))
+		   (+ (if deletion-function
+			  (apply deletion-function (elt s2 y))
+			  deletion-cost)
+		      (aref d (1+ y) x))
+		   (+ (aref d y x)
+		      (if substitution-function
+			  (apply substitution-function (list (elt s1 x) (elt s2 y)))
+			  (if (apply substitution-test (list (elt s1 x) (elt s2 y)))
+			      0
+			      substitution-cost)))))))
+    (aref d (1- height) (1- width))))
\ No newline at end of file