;;; Copyright 2006 William D Clinger.
;;;
;;; Please do not copy this software.  It is in alpha test form.
;;; A better version will be distributed at some later date.
;;;
;;; Last modified 2 March 2006 8:15am EST
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; EQUIV?
;;;
;;; EQUIV? is a version of EQUAL? that terminates on all arguments.
;;;
;;; The basic idea of the algorithm is presented in
;;;
;;; J E Hopcroft and R M Karp.  A Linear Algorithm for
;;; Testing Equivalence of Finite Automata.
;;; Cornell University Technical Report 71-114,
;;; December 1971.
;;; http://techreports.library.cornell.edu:8081/Dienst/UI/1.0/Display/cul.cs/TR71-114
;;;
;;; The algorithm uses FIND and MERGE operations, which are
;;; called done? and equate! in the implementation below.
;;; The algorithm maintains a stack of comparisons to do,
;;; and a set of equivalences that would be implied by the
;;; comparisons yet to be done.
;;;
;;; When comparing objects x and y whose equality cannot be
;;; determined without recursion, the algorithm pushes all
;;; the recursive subgoals onto the stack, and merges the
;;; equivalence classes for x and y.  If any of the subgoals
;;; involve comparing x and y, the algorithm will notice
;;; that they are in the same equivalence class and will
;;; avoid circularity by assuming x and y are equal.
;;; If all of the subgoals succeed, then x and y really are
;;; equal, so the algorithm is correct.
;;;
;;; If we had hash tables with amortized constant-time lookup on
;;; object identity, then this algorithm could be made to run in
;;; O(n) time, where n is the number of nodes in the larger of
;;; the two structures being compared.
;;;
;;; As implemented below, in portable R5RS Scheme, the algorithm
;;; should run in O(n^2) time, or close to it.
;;;
;;; This implementation uses several different techniques to
;;; reduce the cost of the algorithm for common special cases:
;;;
;;; It uses the traditional recursive algorithm to bounded depth.
;;; It handles easy cases specially.

; How long should we try the traditional recursive algorithm
; before switching to the terminating algorithm?

(define equiv:bound-on-recursion 1000000)

(define (equiv? x y)

  ; The traditional recursive algorithm, with bounded recursion.
  ; Returns #f or an exact integer n.
  ; If n > 0, then x and y are equal and the comparison involved
  ; bound - n recursive calls.
  ; If n <= 0, then the algorithm terminated before
  ; it could determine whether x and y are equal.

  (define (equal? x y bound)
    (cond ((eq? x y)
           bound)
          ((<= bound 0)
           bound)
          ((and (pair? x) (pair? y))
           (if (eq? (car x) (car y))
               (equal? (cdr x) (cdr y) (- bound 1))
               (let ((result (equal? (cdr x) (cdr y) (- bound 1))))
                 (if result
                     (equal? (car x) (car y) result)
                     #f))))
          ((and (vector? x) (vector? y))
           (let ((nx (vector-length x))
                 (ny (vector-length y)))
             (if (= nx ny)
                 (let loop ((i 0)
                            (bound (- bound 1)))
                   (if (< i nx)
                       (let ((result (equal? (vector-ref x i)
                                             (vector-ref y i)
                                             bound)))
                         (if result
                             (loop (+ i 1) result)
                             #f))
                       bound))
                 #f)))
          ((and (string? x) (string? y))
           (if (string-equal? x y) bound #f))
          ((eqv? x y)
           bound)
          (else #f)))

  ; Returns #t iff x and y would have the same (possibly infinite)
  ; printed representation.  Always terminates.

  (define (equiv? x y)
    (let ((done (initial-equivalences)))

      ; done is a hash table that maps objects to their
      ; equivalence classes.
      ;
      ; Algorithmic invariant:  If all of the comparisons that
      ; are in progress (pushed onto the control stack) come out
      ; equal, then all of the equivalences in done are correct.
      ;
      ; Invariant of this implementation:  The equivalence classes
      ; omit easy cases, which are defined as cases in which eqv?
      ; always returns the correct answer.  The equivalence classes
      ; also omit strings, because strings can be compared without
      ; risk of circularity.
      ;
      ; Invariant of this prototype:  The equivalence classes include
      ; only pairs and vectors.  If records or other things are to be
      ; compared recursively, then they should be added to done.
      ;
      ; Without constant-time lookups, it is important to keep
      ; done as small as possible.  This implementation takes
      ; advantage of several common cases for which it is not
      ; necessary to keep track of a node's equivalence class.

      (define (equiv? x y)
        ;(step x y done)
        (cond ((eqv? x y)
               #t)
              ((and (pair? x) (pair? y))
               (let ((x1 (car x))
                     (y1 (car y))
                     (x2 (cdr x))
                     (y2 (cdr y)))
                 (cond ((done? x y done)
                        #t)
                       ((eqv? x1 y1)
                        (equate! x y done)
                        (equiv? x2 y2))
                       ((eqv? x2 y2)
                        (equate! x y done)
                        (equiv? x1 y1))
                       ((easy? x1 y1)
                        #f)
                       ((easy? x2 y2)
                        #f)
                       (else
                        (equate! x y done)
                        (and (equiv? x1 y1)
                             (equiv? x2 y2))))))
              ((and (vector? x) (vector? y))
               (let ((n (vector-length x)))
                 (if (= n (vector-length y))
                     (if (done? x y done)
                         #t
                         (begin (equate! x y done)
                                (vector-equiv? x y n 0)))
                     #f)))
              ((and (string? x) (string? y))
               (string-equal? x y))
              (else #f)))

      ; Like equiv? above, except x and y are known to be vectors,
      ; n is the length of both, and i is the first index that has
      ; not yet been pushed onto the todo set.
    
      (define (vector-equiv? x y n i)
        (if (< i n)
            (let ((xi (vector-ref x i))
                  (yi (vector-ref y i)))
              (if (easy? xi yi)
                  (if (eqv? xi yi)
                      (vector-equiv? x y n (+ i 1))
                      #f)
                  (and (equiv? xi yi)
                       (vector-equiv? x y n (+ i 1)))))
            #t))

      (equiv? x y)))

  ; A comparison is easy if eqv? returns the right answer.

  (define (easy? x y)
    (cond ((eqv? x y)
           #t)
          ((pair? x)
           (not (pair? y)))
          ((pair? y)
           #t)
          ((vector? x)
           (not (vector? y)))
          ((vector? y)
           #t)
          ((not (string? x))
           #t)
          ((not (string? y))
           #t)
          (else #f)))

  (let ((result (equal? x y equiv:bound-on-recursion)))
    (if result
        (if (> result 0)
            #t
            (equiv? x y))
        #f)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; An ADT of hash tables that map pairs and vectors
; to equivalence classes.
;
; The implementation below should be replaced by hash tables
; with amortized constant-time lookups on object identity.
; The implementation below was included to make this file
; self-contained within the limitations of portable R5RS code.
;
; To achieve some tiny degree of hashing, these tables
; are represented as (<pairs> #(<vecs> ...)), where
; <pairs> is a list of equivalence classes of pairs, and
; <vecs> is a list of equivalence classes of vectors.
; The vectors can be hashed according to their length.
; Each equivalence class is represented as a non-empty list.
;
; Note: The equal? procedure implicitly assumes its arguments
; are not mutated during the comparison, so the hash table could
; use a hash function based on a shallow comparison of contents.
; This would not improve the worst-case behavior, but might
; improve performance for the average case.
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; Returns the number of equivalence classes in the table.

(define (table-size table)
  (apply + (length (car table))
           (map length (vector->list (cadr table)))))

(define (initial-equivalences)
  (list '() (make-vector vec-hash-size '())))

; Are x and y equivalent according to the table?

(define (done? x y table) #f)     ; assigned below

; Merge the equivalence classes of x and y in the table,
; and return the table.  Changes the table.

(define (equate! x y table) table) ; assigned below


; Private helpers for the ADT of hash tables.
; FIXME: Should be bummed so far as possible within the
; limitations of R5RS portable code.

(define vec-hash-size 5)

(let ()

  (define (already-done? x y classes)
    (cond ((null? classes)
           #f)
          ((memq x (car classes))
           (memq y (car classes)))
          (else
           (already-done? x y (cdr classes)))))

  (define (equate-pairs! x y table)
    (if (not (equate-objects x y (car table)))
        (set-car! table (cons (list x y) (car table)))))

  (define (equate-vectors! x y table)
    (let* ((i (modulo (vector-length x) vec-hash-size))
           (classes (vector-ref (cadr table) i)))
      (if (not (equate-objects x y classes))
          (vector-set! (cadr table)
                       i
                       (cons (list x y) classes)))))

  ; Destructively merges the equivalence classes of x and y
  ; and returns #t, or returns #f if it cannot perform the
  ; merge destructively.

  (define (equate-objects x y classes)
    (cond ((null? classes)
           #f)
          ((memq x (car classes))
           (if (memq y (car classes))
               #t
               (begin (merge-classes! (car classes)
                                      y
                                      (cdr classes)
                                      classes)
                      #t)))
          ((memq y (car classes))
           (if (memq x (car classes))
               #t
               (begin (merge-classes! (car classes)
                                      x
                                      (cdr classes)
                                      classes)
                      #t)))
          (else
           (equate-objects x y (cdr classes)))))

  ; Destructively merges to-merge with the equivalence class
  ; of x, which may be found within classes.  The trailer is
  ; the list whose cdr is classes.

  (define (merge-classes! to-merge x classes trailer)
    (cond ((null? classes)
           (set-cdr! to-merge (cons x (cdr to-merge))))
          ((memq x (car classes))
           (set-cdr! (last-pair to-merge) (car classes))
           (set-cdr! trailer (cdr classes)))
          (else
           (merge-classes! to-merge x (cdr classes) classes))))

  (define (last-pair x)
    (if (pair? (cdr x))
        (last-pair (cdr x))
        x))

  ; End of private helper functions for the "hash table"
  ; of equivalence classes.

  (set! done?
    (lambda (x y table)
      (cond ((pair? x)
             (and (pair? y)
                  (already-done? x y (car table))))
            ((vector? x)
             (and (vector? y)
                  (= (vector-length x) (vector-length y))
                  (let ((i (modulo (vector-length x) vec-hash-size)))
                    (already-done? x y (vector-ref (cadr table) i)))))
            (else ???))))

  (set! equate!
    (lambda ( x y table)
      (cond ((pair? x)
             (equate-pairs! x y table))
            ((vector? x)
             (equate-vectors! x y table))
            (else ???))))

  #t)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; Debugging.
;
; This stepper can be hacked to avoid printing circular structures,
; or to print them only to some finite depth.

(define *stepping* #t)
(define *depth* 5)

(define (step x y done)
  (if *stepping*
      (let ((x (limit-depth x))
            (y (limit-depth y))
            (done (limit-depth done)))
        (display "stepping:") (newline)
        (write x) (newline)
        (write y) (newline)
        (write done) (newline))))

(define (limit-depth x)
  (define (limit-depth x depth)
    (cond ((pair? x)
           (if (<= depth 0)
               '...
               (cons (limit-depth (car x) (- depth 1))
                     (limit-depth (cdr x) (- depth 1)))))
          ((vector? x)
           (if (<= depth 0)
               '...
               (list->vector
                (map (lambda (x) (limit-depth x (- depth 1)))
                     (vector->list x)))))
          (else x)))
  (limit-depth x *depth*))

; Computes the number of nodes in a structure, which should
; bound the number of equivalence classes in the algorithm.

(define (object-count x)
  (define (object-count x nodes)
    (cond ((memq x nodes)
           nodes)
          ((pair? x)
           (object-count (car x)
                         (object-count (cdr x)
                                       (cons x nodes))))
          ((vector? x)
           (do ((nodes (cons x nodes)
                       (object-count (car elements) nodes))
                (elements (vector->list x)
                          (cdr elements)))
               ((null? elements) nodes)))
          (else nodes)))
  (length (object-count x '())))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Tests and benchmarks.
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define (make-test-list1 n x)
  (if (zero? n)
      '()
      (cons x (make-test-list1 (- n 1) x))))

(define (make-test-tree1 n)
  (if (zero? n)
      '()
      (make-test-list1 n (make-test-tree1 (- n 1)))))

(define (make-test-list2 n thunk)
  (if (zero? n)
      '()
      (cons (thunk) (make-test-list2 (- n 1) thunk))))

(define (make-test-tree2 n)
  (if (zero? n)
      '()
      (make-test-list2 n (lambda () (make-test-tree2 (- n 1))))))

(define (make-test-tree5 n)
  (if (zero? n)
      '()
      (cons (make-test-tree5 (- n 1))
            'a)))

; A simple circular list is a worst case for R5RS equal?.

(define (equality-benchmark0 same? n)
  (let ((x (vector->list (make-vector n 'a))))
    (set-cdr! (list-tail x (- n 1)) x)
    (run-benchmark (string-append "equality0:" (number->string n))
                   n
                   (lambda () (same? x (cdr x)))
                   (lambda (answer) (eq? answer #t)))))

; DAG with much sharing.
; 10 is a good parameter for n.

(define (equality-benchmark1 same? n)
  (let ((x (make-test-tree1 n))
        (y (make-test-tree1 n)))
    (run-benchmark (string-append "equality1:" (number->string n))
                   n
                   (lambda () (same? x y))
                   (lambda (answer) (eq? answer #t)))))

; Tree with no sharing.
; 8 is a good parameter for n.

(define (equality-benchmark2 same? n)
  (let ((x (make-test-tree2 n))
        (y (make-test-tree2 n)))
    (run-benchmark (string-append "equality2:" (number->string n))
                   n
                   (lambda () (same? x y))
                   (lambda (answer) (eq? answer #t)))))

; Flat lists.
; 1000 might be a good parameter for n.

(define (equality-benchmark3 same? n)
  (let* ((x (vector->list (make-vector n 'a)))
         (y (vector->list (make-vector n 'a))))
    (run-benchmark (string-append "equality3:" (number->string n))
                   n
                   (lambda () (same? x y))
                   (lambda (answer) (eq? answer #t)))))

; Shallow lists.
; 300 might be a good parameter for n.

(define (equality-benchmark4 same? n)
  (let* ((x (vector->list (make-vector n (make-test-tree2 3))))
         (y (vector->list (make-vector n (make-test-tree2 3)))))
    (run-benchmark (string-append "equality4:" (number->string n))
                   n
                   (lambda () (same? x y))
                   (lambda (answer) (eq? answer #t)))))

; Possible worst case for equiv? as implemented above.
; No sharing, no proper lists,
; and deep following car chains instead of cdr.

(define (equality-benchmark5 same? n . rest)
  (let* ((x (make-test-tree5 n))
         (y (make-test-tree5 n))
         (iterations (if (null? rest) n (car rest))))
    (run-benchmark (string-append "equality5:" (number->string n))
                   iterations
                   (lambda () (same? x y))
                   (lambda (answer) (eq? answer #t)))))

; A shorter form of the benchmark above.

(define (equality-benchmark5short same? n)
  (equality-benchmark5 same? n 100))

(define (equality-benchmarks . rest)
  (define short (and (not (null? rest)) (eq? (car rest) 'short)))
; (equality-benchmark0 equal? 10)      ; probably doesn't terminate
  (equality-benchmark0 equiv? 10)
  (equality-benchmark1 equal? 10)
  (equality-benchmark1 equiv? 10)
  (equality-benchmark3 equal? 1000)
  (equality-benchmark3 equiv? 1000)
  (equality-benchmark4 equal? 300)
  (equality-benchmark4 equiv? 300)
  (if short
      (begin
       (equality-benchmark5short equal? 500)
       (equality-benchmark5short equiv? 500)
       (equality-benchmark5short equal? 1000)
       (equality-benchmark5short equiv? 1000)
       (equality-benchmark5short equal? 2000)
       (equality-benchmark5short equiv? 2000))
      (begin
       (equality-benchmark5 equal? 500)
       (equality-benchmark5 equiv? 500)
       (equality-benchmark5 equal? 1000)
       (equality-benchmark5 equiv? 1000)
       (equality-benchmark5 equal? 2000)
       (equality-benchmark5 equiv? 2000)))
  (equality-benchmark2 equal? 7)
  (equality-benchmark2 equiv? 7)
  (equality-benchmark2 equal? 8)
  (equality-benchmark2 equiv? 8)
  (equality-benchmark2 equal? 9)
  (equality-benchmark2 equiv? 9))      ; might run for a very long time


(define (basic-equiv-tests equiv?)
  (call-with-current-continuation
   (lambda (exit)
     (let-syntax ((return (syntax-rules ()
                           ((return) (exit #t))))
                  (test (syntax-rules ()
                         ((test n exp)
                          (if (not exp)
                              (begin (display "*****BUG*****")
                                     (newline)
                                     (display "Failed test ")
                                     (display n)
                                     (display ":")
                                     (newline)
                                     (write 'exp)
                                     (newline)
                                     (exit #f)))))))
       (let ((x (list 1 2 3 'a))
             (y (list 1 2 3 1 2 3 'b)))
         (set-cdr! (list-tail x 2) x)
         (set-cdr! (list-tail y 5) y)

         (test 1 (equiv? '() '()))
         (test 2 (equiv? '(a) (list 'a)))
         (test 3 (not (equiv? (list 'a) '(b))))
         (test 4 (equiv? (vector) '#()))
         (test 5 (equiv? (vector 34.5) '#(34.5)))
         (test 6 (not (equiv? (vector 34.5) '#(a))))
         (test 7 (equiv? (make-test-tree1 3) (make-test-tree2 3)))
         (test 8 (equiv? (make-test-tree2 3) (make-test-tree1 3)))
         (test 9 (equiv? x y))
         (test 10 (not (equiv? (list x y 'a) (list y x 'b)))))))))

(basic-equiv-tests equiv?)