comparison utils/utils.lisp @ 37:9aeb5bff013a

Added levenshtein distance with cost functions darcs-hash:20070516124658-f76cc-8b96080b5c4018f2077650697bec4d1213917505.gz
author David Lewis <d.lewis@gold.ac.uk>
date Wed, 16 May 2007 13:46:58 +0100
parents ad321ce17e3e
children 262aa7a3d500
comparison
equal deleted inserted replaced
36:ad321ce17e3e 37:9aeb5bff013a
144 (loop for i from 0 to 11 144 (loop for i from 0 to 11
145 do (when (> (aref minors i) best-score) 145 do (when (> (aref minors i) best-score)
146 (setf key (list i :minor) 146 (setf key (list i :minor)
147 best-score (aref minors i)))) 147 best-score (aref minors i))))
148 key)) 148 key))
149
150 (defun levenshtein-distance (s1 s2 &key (insertion-cost 1)
151 (insertion-function) (deletion-cost 1)
152 (deletion-function) (substitution-cost 1)
153 (substitution-test #'equal) (substitution-function))
154 ;; This is an implementation of the Levenshtein distance measure
155 ;; based on the cliki asdf package, itself based on the wikipedia
156 ;; scheme example of the same algorithm. This version is generalised
157 ;; such that operations costs may take constant or calculated
158 ;; values. If insertion-function, deletion-function or
159 ;; substitution-test are specified, the applicable cost values are
160 ;; ignored and the function output is used instead.
161 (let* ((width (1+ (length s1)))
162 (height (1+ (length s2)))
163 (d (make-array (list height width))))
164 (dotimes (x width)
165 (setf (aref d 0 x) (* x deletion-cost)))
166 (dotimes (y height)
167 (setf (aref d y 0) (* y insertion-cost)))
168 (dotimes (x (length s1))
169 (dotimes (y (length s2))
170 (setf (aref d (1+ y) (1+ x))
171 (min (+ (if insertion-function
172 (apply insertion-function (elt s1 x))
173 insertion-cost)
174 (aref d y (1+ x)))
175 (+ (if deletion-function
176 (apply deletion-function (elt s2 y))
177 deletion-cost)
178 (aref d (1+ y) x))
179 (+ (aref d y x)
180 (if substitution-function
181 (apply substitution-function (list (elt s1 x) (elt s2 y)))
182 (if (apply substitution-test (list (elt s1 x) (elt s2 y)))
183 0
184 substitution-cost)))))))
185 (aref d (1- height) (1- width))))