;;; Copyright 2006 William D Clinger. ;;; ;;; Please do not copy this software. It is in alpha test form. ;;; A better version will be distributed at some later date. ;;; ;;; Last modified 2 March 2006 8:15am EST ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; EQUIV? ;;; ;;; EQUIV? is a version of EQUAL? that terminates on all arguments. ;;; ;;; The basic idea of the algorithm is presented in ;;; ;;; J E Hopcroft and R M Karp. A Linear Algorithm for ;;; Testing Equivalence of Finite Automata. ;;; Cornell University Technical Report 71-114, ;;; December 1971. ;;; http://techreports.library.cornell.edu:8081/Dienst/UI/1.0/Display/cul.cs/TR71-114 ;;; ;;; The algorithm uses FIND and MERGE operations, which are ;;; called done? and equate! in the implementation below. ;;; The algorithm maintains a stack of comparisons to do, ;;; and a set of equivalences that would be implied by the ;;; comparisons yet to be done. ;;; ;;; When comparing objects x and y whose equality cannot be ;;; determined without recursion, the algorithm pushes all ;;; the recursive subgoals onto the stack, and merges the ;;; equivalence classes for x and y. If any of the subgoals ;;; involve comparing x and y, the algorithm will notice ;;; that they are in the same equivalence class and will ;;; avoid circularity by assuming x and y are equal. ;;; If all of the subgoals succeed, then x and y really are ;;; equal, so the algorithm is correct. ;;; ;;; If we had hash tables with amortized constant-time lookup on ;;; object identity, then this algorithm could be made to run in ;;; O(n) time, where n is the number of nodes in the larger of ;;; the two structures being compared. ;;; ;;; As implemented below, in portable R5RS Scheme, the algorithm ;;; should run in O(n^2) time, or close to it. ;;; ;;; This implementation uses several different techniques to ;;; reduce the cost of the algorithm for common special cases: ;;; ;;; It uses the traditional recursive algorithm to bounded depth. ;;; It handles easy cases specially. ; How long should we try the traditional recursive algorithm ; before switching to the terminating algorithm? (define equiv:bound-on-recursion 1000000) (define (equiv? x y) ; The traditional recursive algorithm, with bounded recursion. ; Returns #f or an exact integer n. ; If n > 0, then x and y are equal and the comparison involved ; bound - n recursive calls. ; If n <= 0, then the algorithm terminated before ; it could determine whether x and y are equal. (define (equal? x y bound) (cond ((eq? x y) bound) ((<= bound 0) bound) ((and (pair? x) (pair? y)) (if (eq? (car x) (car y)) (equal? (cdr x) (cdr y) (- bound 1)) (let ((result (equal? (cdr x) (cdr y) (- bound 1)))) (if result (equal? (car x) (car y) result) #f)))) ((and (vector? x) (vector? y)) (let ((nx (vector-length x)) (ny (vector-length y))) (if (= nx ny) (let loop ((i 0) (bound (- bound 1))) (if (< i nx) (let ((result (equal? (vector-ref x i) (vector-ref y i) bound))) (if result (loop (+ i 1) result) #f)) bound)) #f))) ((and (string? x) (string? y)) (if (string-equal? x y) bound #f)) ((eqv? x y) bound) (else #f))) ; Returns #t iff x and y would have the same (possibly infinite) ; printed representation. Always terminates. (define (equiv? x y) (let ((done (initial-equivalences))) ; done is a hash table that maps objects to their ; equivalence classes. ; ; Algorithmic invariant: If all of the comparisons that ; are in progress (pushed onto the control stack) come out ; equal, then all of the equivalences in done are correct. ; ; Invariant of this implementation: The equivalence classes ; omit easy cases, which are defined as cases in which eqv? ; always returns the correct answer. The equivalence classes ; also omit strings, because strings can be compared without ; risk of circularity. ; ; Invariant of this prototype: The equivalence classes include ; only pairs and vectors. If records or other things are to be ; compared recursively, then they should be added to done. ; ; Without constant-time lookups, it is important to keep ; done as small as possible. This implementation takes ; advantage of several common cases for which it is not ; necessary to keep track of a node's equivalence class. (define (equiv? x y) ;(step x y done) (cond ((eqv? x y) #t) ((and (pair? x) (pair? y)) (let ((x1 (car x)) (y1 (car y)) (x2 (cdr x)) (y2 (cdr y))) (cond ((done? x y done) #t) ((eqv? x1 y1) (equate! x y done) (equiv? x2 y2)) ((eqv? x2 y2) (equate! x y done) (equiv? x1 y1)) ((easy? x1 y1) #f) ((easy? x2 y2) #f) (else (equate! x y done) (and (equiv? x1 y1) (equiv? x2 y2)))))) ((and (vector? x) (vector? y)) (let ((n (vector-length x))) (if (= n (vector-length y)) (if (done? x y done) #t (begin (equate! x y done) (vector-equiv? x y n 0))) #f))) ((and (string? x) (string? y)) (string-equal? x y)) (else #f))) ; Like equiv? above, except x and y are known to be vectors, ; n is the length of both, and i is the first index that has ; not yet been pushed onto the todo set. (define (vector-equiv? x y n i) (if (< i n) (let ((xi (vector-ref x i)) (yi (vector-ref y i))) (if (easy? xi yi) (if (eqv? xi yi) (vector-equiv? x y n (+ i 1)) #f) (and (equiv? xi yi) (vector-equiv? x y n (+ i 1))))) #t)) (equiv? x y))) ; A comparison is easy if eqv? returns the right answer. (define (easy? x y) (cond ((eqv? x y) #t) ((pair? x) (not (pair? y))) ((pair? y) #t) ((vector? x) (not (vector? y))) ((vector? y) #t) ((not (string? x)) #t) ((not (string? y)) #t) (else #f))) (let ((result (equal? x y equiv:bound-on-recursion))) (if result (if (> result 0) #t (equiv? x y)) #f))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; An ADT of hash tables that map pairs and vectors ; to equivalence classes. ; ; The implementation below should be replaced by hash tables ; with amortized constant-time lookups on object identity. ; The implementation below was included to make this file ; self-contained within the limitations of portable R5RS code. ; ; To achieve some tiny degree of hashing, these tables ; are represented as ( #( ...)), where ; is a list of equivalence classes of pairs, and ; is a list of equivalence classes of vectors. ; The vectors can be hashed according to their length. ; Each equivalence class is represented as a non-empty list. ; ; Note: The equal? procedure implicitly assumes its arguments ; are not mutated during the comparison, so the hash table could ; use a hash function based on a shallow comparison of contents. ; This would not improve the worst-case behavior, but might ; improve performance for the average case. ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Returns the number of equivalence classes in the table. (define (table-size table) (apply + (length (car table)) (map length (vector->list (cadr table))))) (define (initial-equivalences) (list '() (make-vector vec-hash-size '()))) ; Are x and y equivalent according to the table? (define (done? x y table) #f) ; assigned below ; Merge the equivalence classes of x and y in the table, ; and return the table. Changes the table. (define (equate! x y table) table) ; assigned below ; Private helpers for the ADT of hash tables. ; FIXME: Should be bummed so far as possible within the ; limitations of R5RS portable code. (define vec-hash-size 5) (let () (define (already-done? x y classes) (cond ((null? classes) #f) ((memq x (car classes)) (memq y (car classes))) (else (already-done? x y (cdr classes))))) (define (equate-pairs! x y table) (if (not (equate-objects x y (car table))) (set-car! table (cons (list x y) (car table))))) (define (equate-vectors! x y table) (let* ((i (modulo (vector-length x) vec-hash-size)) (classes (vector-ref (cadr table) i))) (if (not (equate-objects x y classes)) (vector-set! (cadr table) i (cons (list x y) classes))))) ; Destructively merges the equivalence classes of x and y ; and returns #t, or returns #f if it cannot perform the ; merge destructively. (define (equate-objects x y classes) (cond ((null? classes) #f) ((memq x (car classes)) (if (memq y (car classes)) #t (begin (merge-classes! (car classes) y (cdr classes) classes) #t))) ((memq y (car classes)) (if (memq x (car classes)) #t (begin (merge-classes! (car classes) x (cdr classes) classes) #t))) (else (equate-objects x y (cdr classes))))) ; Destructively merges to-merge with the equivalence class ; of x, which may be found within classes. The trailer is ; the list whose cdr is classes. (define (merge-classes! to-merge x classes trailer) (cond ((null? classes) (set-cdr! to-merge (cons x (cdr to-merge)))) ((memq x (car classes)) (set-cdr! (last-pair to-merge) (car classes)) (set-cdr! trailer (cdr classes))) (else (merge-classes! to-merge x (cdr classes) classes)))) (define (last-pair x) (if (pair? (cdr x)) (last-pair (cdr x)) x)) ; End of private helper functions for the "hash table" ; of equivalence classes. (set! done? (lambda (x y table) (cond ((pair? x) (and (pair? y) (already-done? x y (car table)))) ((vector? x) (and (vector? y) (= (vector-length x) (vector-length y)) (let ((i (modulo (vector-length x) vec-hash-size))) (already-done? x y (vector-ref (cadr table) i))))) (else ???)))) (set! equate! (lambda ( x y table) (cond ((pair? x) (equate-pairs! x y table)) ((vector? x) (equate-vectors! x y table)) (else ???)))) #t) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Debugging. ; ; This stepper can be hacked to avoid printing circular structures, ; or to print them only to some finite depth. (define *stepping* #t) (define *depth* 5) (define (step x y done) (if *stepping* (let ((x (limit-depth x)) (y (limit-depth y)) (done (limit-depth done))) (display "stepping:") (newline) (write x) (newline) (write y) (newline) (write done) (newline)))) (define (limit-depth x) (define (limit-depth x depth) (cond ((pair? x) (if (<= depth 0) '... (cons (limit-depth (car x) (- depth 1)) (limit-depth (cdr x) (- depth 1))))) ((vector? x) (if (<= depth 0) '... (list->vector (map (lambda (x) (limit-depth x (- depth 1))) (vector->list x))))) (else x))) (limit-depth x *depth*)) ; Computes the number of nodes in a structure, which should ; bound the number of equivalence classes in the algorithm. (define (object-count x) (define (object-count x nodes) (cond ((memq x nodes) nodes) ((pair? x) (object-count (car x) (object-count (cdr x) (cons x nodes)))) ((vector? x) (do ((nodes (cons x nodes) (object-count (car elements) nodes)) (elements (vector->list x) (cdr elements))) ((null? elements) nodes))) (else nodes))) (length (object-count x '()))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; Tests and benchmarks. ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (make-test-list1 n x) (if (zero? n) '() (cons x (make-test-list1 (- n 1) x)))) (define (make-test-tree1 n) (if (zero? n) '() (make-test-list1 n (make-test-tree1 (- n 1))))) (define (make-test-list2 n thunk) (if (zero? n) '() (cons (thunk) (make-test-list2 (- n 1) thunk)))) (define (make-test-tree2 n) (if (zero? n) '() (make-test-list2 n (lambda () (make-test-tree2 (- n 1)))))) (define (make-test-tree5 n) (if (zero? n) '() (cons (make-test-tree5 (- n 1)) 'a))) ; A simple circular list is a worst case for R5RS equal?. (define (equality-benchmark0 same? n) (let ((x (vector->list (make-vector n 'a)))) (set-cdr! (list-tail x (- n 1)) x) (run-benchmark (string-append "equality0:" (number->string n)) n (lambda () (same? x (cdr x))) (lambda (answer) (eq? answer #t))))) ; DAG with much sharing. ; 10 is a good parameter for n. (define (equality-benchmark1 same? n) (let ((x (make-test-tree1 n)) (y (make-test-tree1 n))) (run-benchmark (string-append "equality1:" (number->string n)) n (lambda () (same? x y)) (lambda (answer) (eq? answer #t))))) ; Tree with no sharing. ; 8 is a good parameter for n. (define (equality-benchmark2 same? n) (let ((x (make-test-tree2 n)) (y (make-test-tree2 n))) (run-benchmark (string-append "equality2:" (number->string n)) n (lambda () (same? x y)) (lambda (answer) (eq? answer #t))))) ; Flat lists. ; 1000 might be a good parameter for n. (define (equality-benchmark3 same? n) (let* ((x (vector->list (make-vector n 'a))) (y (vector->list (make-vector n 'a)))) (run-benchmark (string-append "equality3:" (number->string n)) n (lambda () (same? x y)) (lambda (answer) (eq? answer #t))))) ; Shallow lists. ; 300 might be a good parameter for n. (define (equality-benchmark4 same? n) (let* ((x (vector->list (make-vector n (make-test-tree2 3)))) (y (vector->list (make-vector n (make-test-tree2 3))))) (run-benchmark (string-append "equality4:" (number->string n)) n (lambda () (same? x y)) (lambda (answer) (eq? answer #t))))) ; Possible worst case for equiv? as implemented above. ; No sharing, no proper lists, ; and deep following car chains instead of cdr. (define (equality-benchmark5 same? n . rest) (let* ((x (make-test-tree5 n)) (y (make-test-tree5 n)) (iterations (if (null? rest) n (car rest)))) (run-benchmark (string-append "equality5:" (number->string n)) iterations (lambda () (same? x y)) (lambda (answer) (eq? answer #t))))) ; A shorter form of the benchmark above. (define (equality-benchmark5short same? n) (equality-benchmark5 same? n 100)) (define (equality-benchmarks . rest) (define short (and (not (null? rest)) (eq? (car rest) 'short))) ; (equality-benchmark0 equal? 10) ; probably doesn't terminate (equality-benchmark0 equiv? 10) (equality-benchmark1 equal? 10) (equality-benchmark1 equiv? 10) (equality-benchmark3 equal? 1000) (equality-benchmark3 equiv? 1000) (equality-benchmark4 equal? 300) (equality-benchmark4 equiv? 300) (if short (begin (equality-benchmark5short equal? 500) (equality-benchmark5short equiv? 500) (equality-benchmark5short equal? 1000) (equality-benchmark5short equiv? 1000) (equality-benchmark5short equal? 2000) (equality-benchmark5short equiv? 2000)) (begin (equality-benchmark5 equal? 500) (equality-benchmark5 equiv? 500) (equality-benchmark5 equal? 1000) (equality-benchmark5 equiv? 1000) (equality-benchmark5 equal? 2000) (equality-benchmark5 equiv? 2000))) (equality-benchmark2 equal? 7) (equality-benchmark2 equiv? 7) (equality-benchmark2 equal? 8) (equality-benchmark2 equiv? 8) (equality-benchmark2 equal? 9) (equality-benchmark2 equiv? 9)) ; might run for a very long time (define (basic-equiv-tests equiv?) (call-with-current-continuation (lambda (exit) (let-syntax ((return (syntax-rules () ((return) (exit #t)))) (test (syntax-rules () ((test n exp) (if (not exp) (begin (display "*****BUG*****") (newline) (display "Failed test ") (display n) (display ":") (newline) (write 'exp) (newline) (exit #f))))))) (let ((x (list 1 2 3 'a)) (y (list 1 2 3 1 2 3 'b))) (set-cdr! (list-tail x 2) x) (set-cdr! (list-tail y 5) y) (test 1 (equiv? '() '())) (test 2 (equiv? '(a) (list 'a))) (test 3 (not (equiv? (list 'a) '(b)))) (test 4 (equiv? (vector) '#())) (test 5 (equiv? (vector 34.5) '#(34.5))) (test 6 (not (equiv? (vector 34.5) '#(a)))) (test 7 (equiv? (make-test-tree1 3) (make-test-tree2 3))) (test 8 (equiv? (make-test-tree2 3) (make-test-tree1 3))) (test 9 (equiv? x y)) (test 10 (not (equiv? (list x y 'a) (list y x 'b))))))))) (basic-equiv-tests equiv?)