;;; Imperative version of EM benchmark by Jeffrey Mark Siskind. ;;; Posted 7 April 1998 to comp.lang.functional, comp.lang.lisp, comp.lang.ml, ;;; and comp.lang.scheme. ;;; Modified by Will Clinger and Lars Hansen. (define (e-step! x z models) (do ((i 0 (+ i 1))) ((= i (vector-length x))) (let ((xi (vector-ref x i)) (zi (vector-ref z i))) (do ((j 0 (+ j 1))) ((= j (vector-length models))) ;; Compute for each model. (let* ((model (vector-ref models j)) (log-pi (model-log-pi model)) (mu (model-mu model)) (sigma-inverse (model-sigma-inverse model)) (log-determinant-sigma (model-log-determinant-sigma model)) (t 0.0)) ;; Compute likelihoods (note: up to constant for all models). (set! t 0.0) (do ((k1 0 (+ k1 1))) ((= k1 (vector-length xi))) (let ((sigma-inverse-k1 (vector-ref sigma-inverse k1))) (do ((k2 0 (+ k2 1))) ((= k2 (vector-length xi))) (set! t (+ t (* (- (vector-ref xi k1) (vector-ref mu k1)) (vector-ref sigma-inverse-k1 k2) (- (vector-ref xi k2) (vector-ref mu k2)))))))) (vector-set! zi j (- log-pi (* 0.5 (+ log-determinant-sigma t)))))))) (let ((l 0.0)) (do ((i 0 (+ i 1))) ((= i (vector-length x))) (let ((s minus-infinity) (zi (vector-ref z i))) ;; Normalize ownerships to sum to one. (do ((j 0 (+ j 1))) ((= j (vector-length models))) (set! s (add-exp s (vector-ref zi j)))) (do ((j 0 (+ j 1))) ((= j (vector-length models))) (vector-set! zi j (exp (- (vector-ref zi j) s)))) (set! l (+ l s)))) ;; Return log likelihood. l)) (define (m-step! x models z clip) (let ((kk (vector-length (vector-ref x 0)))) ;; For each model, optimize parameters. (do ((j 0 (+ j 1))) ((= j (vector-length models))) (let* ((model (vector-ref models j)) (mu (model-mu model)) (sigma (model-sigma model)) (s 0.0)) ;; Optimize values. (do ((i 0 (+ i 1))) ((= i (vector-length x))) (set! s (+ s (vector-ref (vector-ref z i) j)))) (let ((si (/ s))) (do ((k 0 (+ k 1))) ((= k kk)) (let ((m 0.0)) (do ((i 0 (+ i 1))) ((= i (vector-length x))) (set! m (+ m (* (vector-ref (vector-ref z i) j) (vector-ref (vector-ref x i) k))))) (vector-set! mu k (* si m))))) (let ((si (/ s))) (do ((k1 0 (+ k1 1))) ((= k1 kk)) (let ((sigma-k1 (vector-ref sigma k1)) (mu-k1 (vector-ref mu k1))) (do ((k2 0 (+ k2 1))) ((= k2 kk)) (let ((mu-k2 (vector-ref mu k2)) (m 0.0)) (do ((i 0 (+ i 1))) ((= i (vector-length x))) (set! m (+ m (* (vector-ref (vector-ref z i) j) (* (- (vector-ref (vector-ref x i) k1) mu-k1) (- (vector-ref (vector-ref x i) k2) mu-k2)))))) (vector-set! sigma-k1 k2 (* si m))))))) (clip-eigenvalues! sigma clip) (set-model-pi! model (/ s (vector-length x))) (set-model-log-pi! model (ieee-log (/ s (vector-length x)))) (invert-matrix! sigma (model-sigma-inverse model)) (set-model-log-determinant-sigma! model (ieee-log (determinant sigma))))))) (define (em! x z models clip em-kick-off-tolerance em-convergence-tolerance) (let loop ((old-log-likelihood minus-infinity) (starting? #t)) (let ((log-likelihood (e-step! x z models))) (cond ((or (and starting? (> log-likelihood old-log-likelihood)) (> log-likelihood (+ old-log-likelihood em-convergence-tolerance))) (m-step! x models z clip) (loop log-likelihood (and starting? (not (= (vector-length models) 1)) (or (= old-log-likelihood minus-infinity) (< log-likelihood (+ old-log-likelihood em-kick-off-tolerance)))))) (else old-log-likelihood))))) (define (initial-z! ii jj) (map-n-vector (lambda (i) (let ((zi (map-n-vector (lambda (j) (+ (/ (exact->inexact jj)) (noise (/ (exact->inexact jj))))) jj)) (s 0.0)) (do ((j 0 (+ j 1))) ((= j jj)) (set! s (+ s (vector-ref zi j)))) (let ((si (/ s))) (do ((j 0 (+ j 1))) ((= j jj)) (vector-set! zi j (* si (vector-ref zi j))))) zi)) ii)) (define (ems! x clip em-kick-off-tolerance em-convergence-tolerance ems-convergence-tolerance) (let loop ((jj 1) (old-z (void)) (old-models (void)) (old-log-likelihood minus-infinity)) (let* ((kk (vector-length (vector-ref x 0))) (z (initial-z! (vector-length x) jj)) (models (map-n-vector (lambda (j) ;; needs work: Should replace 0.0 with ((LAMBDA ())). (make-model 0.0 (make-vector kk) (make-matrix kk kk) 0.0 (make-matrix kk kk) 0.0)) jj))) (m-step! x models z clip) (let ((new-log-likelihood (em! x z models clip em-kick-off-tolerance em-convergence-tolerance))) (if (> (- (/ old-log-likelihood new-log-likelihood) 1.0) ems-convergence-tolerance) (loop (+ jj 1) z models new-log-likelihood) (list old-z old-models)))))) (define (em-clusterer! x clip em-kick-off-tolerance em-convergence-tolerance ems-convergence-tolerance) (let* ((z-models (ems! x clip em-kick-off-tolerance em-convergence-tolerance ems-convergence-tolerance)) (z (first z-models)) (models (second z-models))) (e-step! x z models) (let ((clusters (map-n (lambda (i) (let ((zi (vector->list (vector-ref z i)))) (list i (positionv (reduce max zi minus-infinity) zi)))) (vector-length z)))) (map-n (lambda (j) (map (lambda (cluster) (vector-ref x (first cluster))) (remove-if-not (lambda (cluster) (= (second cluster) j)) clusters))) (vector-length (vector-ref z 0)))))) ;;; (em-imp-benchmark n ) times n iterations, and writes the output to . ;;; If the is omitted, no output is generated. ;;; If both n and the are omitted, it defaults to 100 iterations. (define (em-imp-benchmark . rest) (let ((n (if (null? rest) 100 (car rest))) (output-file (if (or (null? rest) (null? (cdr rest))) "/dev/null" (cadr rest))) (vecs '())) (call-with-current-continuation (lambda (k) (set! panic (lambda (s) (display "PANIC: ") (display s) (newline) (k (void)))) (random:initialize) (run-benchmark "EM-IMP" (lambda () (set! vecs (cons (em-clusterer! '#(#(1.0) #(2.0) #(3.0) #(11.0) #(12.0) #(13.0)) '#(1.0) 10.0 1.0 0.01) vecs))) n) (if (not (string=? output-file "/dev/null")) (call-with-output-file output-file (lambda (p) (for-each (lambda (v) (write v p) (newline p)) (reverse vecs)))))))))