;;; Functional version of EM benchmark by Jeffrey Mark Siskind. ;;; Posted 7 April 1998 to comp.lang.functional, comp.lang.lisp, comp.lang.ml, ;;; and comp.lang.scheme. ;;; Modified by Will Clinger and Lars Hansen. (define (e-step x models) (let ((z (map-vector (lambda (xi) ;; Compute for each model. (map-vector (lambda (model) (let ((log-pi (model-log-pi model)) (mu (model-mu model)) (sigma-inverse (model-sigma-inverse model)) (log-determinant-sigma (model-log-determinant-sigma model))) ;; Compute likelihoods (note: up to constant for all models). (do ((k1 0 (+ k1 1)) (t 0.0 (do ((k2 0 (+ k2 1)) (t t (let ((sigma-inverse-k1 (vector-ref sigma-inverse k1))) (+ t (* (- (vector-ref xi k1) (vector-ref mu k1)) (vector-ref sigma-inverse-k1 k2) (- (vector-ref xi k2) (vector-ref mu k2))))))) ; gratuitous inefficiency ((= k2 (vector-length xi)) t)))) ; gratuitous inefficiency ((= k1 (vector-length xi)) (- log-pi (* 0.5 (+ log-determinant-sigma t))))))) models)) x))) ;; Normalize ownerships to sum to one. (let ((s (map-vector (lambda (zi) (do ((j 0 (+ j 1)) (sj minus-infinity (add-exp sj (vector-ref zi j)))) ((= j (vector-length models)) sj))) z))) ;; Return log likelihood and ownerships matrix. (list (reduce-vector + s 0.0) (map-n-vector (lambda (i) (let ((zi (vector-ref z i)) (si (vector-ref s i))) (map-n-vector (lambda (j) (exp (- (vector-ref zi j) si))) ; gratuitous inefficiency (vector-length models)))) (vector-length x)))))) (define (m-step x z clip) ;; Returns new set of models. (let* ((ii (vector-length x)) (kk (vector-length (vector-ref x 0)))) ;; For each model, optimize parameters. (map-n-vector ; return the vector of models obtained by mapping over each column of z (lambda (j) (let* (;; Optimize values. (zj-sum (do ((i 0 (+ i 1)) (s 0.0 (+ s (vector-ref (vector-ref z i) j)))) ((= i ii) s))) (si (/ zj-sum)) (mu (map-n-vector (lambda (k) (do ((i 0 (+ i 1)) (m 0.0 (+ m (* (vector-ref (vector-ref z i) j) (vector-ref (vector-ref x i) k))))) ((= i (vector-length x)) (* si m)))) kk)) (sigma (clip-eigenvalues (map-n-vector (lambda (k1) (let ((mu-k1 (vector-ref mu k1))) (map-n-vector (lambda (k2) (let ((mu-k2 (vector-ref mu k2))) (do ((i 0 (+ i 1)) (m 0.0 (+ m (* (vector-ref (vector-ref z i) j) (* (- (vector-ref (vector-ref x i) k1) mu-k1) (- (vector-ref (vector-ref x i) k2) mu-k2)))))) ((= i (vector-length x)) (* si m))))) kk))) kk) clip))) (make-model (/ zj-sum ii) mu sigma (ieee-log (/ zj-sum ii)) (invert-matrix sigma) (ieee-log (determinant sigma))))) (matrix-columns z)))) (define (em x models clip em-kick-off-tolerance em-convergence-tolerance) (let ((jj (vector-length models))) (let loop ((models models) (old-log-likelihood minus-infinity) (starting? #t)) (let ((log-likelihood-z (e-step x models))) (if (or (and starting? (> (first log-likelihood-z) old-log-likelihood)) (> (first log-likelihood-z) (+ old-log-likelihood em-convergence-tolerance))) (loop (m-step x (second log-likelihood-z) clip) (first log-likelihood-z) (and starting? (not (= jj 1)) (or (= old-log-likelihood minus-infinity) (< (first log-likelihood-z) (+ old-log-likelihood em-kick-off-tolerance))))) (list old-log-likelihood models)))))) (define (initial-z ii jj) (map-n-vector (lambda (i) (let* ((zi (map-n-vector (lambda (j) (+ (/ (exact->inexact jj)) (noise (/ (exact->inexact jj))))) jj)) (s (do ((j 0 (+ j 1)) (s 0.0 (+ s (vector-ref zi j)))) ((= j jj) s)))) (k*v (/ s) zi))) ii)) (define (ems x clip em-kick-off-tolerance em-convergence-tolerance ems-convergence-tolerance) (let loop ((jj 1) ;; needs work: Should replace #F with ((LAMBDA ())). (old-log-likelihood-models (list minus-infinity #f))) (let* ((models (m-step x (initial-z (vector-length x) jj) clip)) (new-log-likelihood-models (em x models clip em-kick-off-tolerance em-convergence-tolerance))) (if (> (- (/ (first old-log-likelihood-models) (first new-log-likelihood-models)) 1.0) ems-convergence-tolerance) (loop (+ jj 1) new-log-likelihood-models) (second old-log-likelihood-models))))) (define (em-clusterer x clip em-kick-off-tolerance em-convergence-tolerance ems-convergence-tolerance) (let* ((z (second (e-step x (ems x clip em-kick-off-tolerance em-convergence-tolerance ems-convergence-tolerance)))) (clusters (map-n (lambda (i) (let ((zi (vector->list (vector-ref z i)))) (list i (positionv (reduce max zi minus-infinity) zi)))) (vector-length z)))) (map-n (lambda (j) (map (lambda (cluster) (vector-ref x (first cluster))) (remove-if-not (lambda (cluster) (= (second cluster) j)) clusters))) (vector-length (vector-ref z 0))))) ;;; (em-fun-benchmark n ) times n iterations, and writes the output to . ;;; If the is omitted, no output is generated. ;;; If both n and the are omitted, it defaults to 100 iterations. (define (em-fun-benchmark . rest) (let ((n (if (null? rest) 100 (car rest))) (output-file (if (or (null? rest) (null? (cdr rest))) "/dev/null" (cadr rest))) (vecs '())) (call-with-current-continuation (lambda (k) (set! panic (lambda (s) (display "PANIC: ") (display s) (newline) (k (void)))) (random:initialize) (run-benchmark "EM-FUN" (lambda () (set! vecs (cons (em-clusterer '#(#(1.0) #(2.0) #(3.0) #(11.0) #(12.0) #(13.0)) '#(1.0) 10.0 1.0 0.01) vecs))) n) (if (not (string=? output-file "/dev/null")) (call-with-output-file output-file (lambda (p) (for-each (lambda (v) (write v p) (newline p)) (reverse vecs)))))))))