;; ** * * * * * * * * * * * * * * * * ** 
;; **  Start of the AP-F Parser...    **
;; ** * * * * * * * * * * * * * * * * **

(require (lib "lex.ss" "parser-tools")
         (prefix : (lib "lex-sre.ss" "parser-tools")))

;; End-Of-File Token
(define-struct EOF ())

;; Lexer Abbreviations
(define-lex-abbrevs
 (lower-letter (:/ "a" "z"))
 (upper-letter (:/ #\A #\Z))
 (digit (:/ "0" "9")))

;; Simple Lexer... Could add String constants as well
(define the-lexer
  (lexer
   [(eof) (make-EOF)]
   [(:or #\tab #\space #\newline #\linefeed) (the-lexer input-port)]
   [(:or "=" "+" "-" "*" "/" "(" ")" "{" "}") lexeme]
   [(:: "t" "r" "u" "e") true]
   [(:: "f" "a" "l" "s" "e") false]
   [(:+ (:or lower-letter upper-letter)) (string->symbol lexeme)]
   [(:+ digit) (string->number lexeme)]
   [(:: "\"" (:+ (char-complement "\"")) "\"") lexeme]
   [(:: (:+ digit) #\. (:* digit)) (string->number lexeme)]))

;;** A Rule is either:
;;     -- Prod-Rule, or
;;     -- Sum-Rule

;; Prod-Rule is (make-prod-rule Symbol (listof Symbol-Or-String))
(define-struct prod-rule (sym goesto))
;; Sum-Rule is (make-sum-rule Symbol (listof Symbol))
(define-struct sum-rule (sym goesto))

;; rule-sym: Rule -> Symbol
;; Get the name of a Rule
(define (rule-sym r)
  (cond [(prod-rule? r) (prod-rule-sym r)]
        [(sum-rule? r) (sum-rule-sym r)]
        [else (error 'bad "BAD: ~a" r)]))

;; The EmptySet == the EmptyList
(define empty-set empty)

;; set-single: Any -> Set
;; Make a singleton Set
(define (set-single it) (list it))

;; set-contains?: Set Any -> Boolean
;; Does the given Set contain the given element?
(define (set-contains? set it)
  (if (empty? set) false
      (or (equal? (first set) it)
          (set-contains? (rest set) it))))

;; set-union: Set Set -> Set
;; Compute the Union of two Sets
(define (set-union set1 set2)
  (cond [(empty? set1) set2]
        [(empty? set2) set1]
        [(set-contains? set2 (first set1)) (set-union (rest set1) set2)]
        [else (cons (first set1) (set-union (rest set1) set2))]))

;; first-set-rules: Symbol (listof Rules) (listof Rules) -> Set
;; First set of a rule without 'empty's
(define (first-set-rules sym lor glor)
  (if (empty? lor) empty-set
      (let ((r (first lor)))
        (if (symbol=? (rule-sym r) sym)
            (cond [(prod-rule? r) (first-set (prod-rule-goesto r) glor)]
                  [(sum-rule? r) 
                   (foldl (lambda (s set)
                           (set-union (first-set-rules s glor glor)
                                      set))
                         empty-set
                         (sum-rule-goesto r))])
            ;; Keep looking...
            (first-set-rules sym (rest lor) glor)))))

;; first-set: List-Of-Token Set-of-String -> Set-of-String
;; First set without 'empty's
(define (first-set lot lor)
  (if (empty? lot) (error 'first-set "Cannot Be Empty")
      (let ((f (first lot)))
        (cond [(string? f) (set-single f)]
              [(symbol? f)
               (cond [(or (symbol=? f 'number)
                          (symbol=? f 'string)
                          (symbol=? f 'symbol)
                          (symbol=? f 'boolean)) (set-single f)]
                     [else (first-set-rules (first lot) lor lor)])]))))

;; grammar-syms: (listof Rule) -> (listof Symbol)
;; Return just the 
(define (grammar-syms lor)
  (foldl (lambda (r s) (set-union (set-single (rule-sym r)) s)) empty-set lor))

;; all-first-sets: (listof Rules) -> (listof (listof Symbol Set))
;; Compute all the first-sets of all the Rules
(define (all-first-sets lor)
  (map (lambda (r) 
         (let ((s (rule-sym r)))
           (list s (first-set-rules s lor lor)))) lor))

;; type-of: Any -> Symbol
;; Return the TypeSymbol of a Token
(define (type-of a)
  (cond [(symbol? a) 'symbol]
        [(number? a) 'number]
        [(string? a) 'string]
        [(boolean? a) 'boolean]
        [(EOF? a) 'EOF]
        [else 'unknown]))

;; current-token: Any
;; The Current input Token... '() if none
(define current-token '())

;; peek-token: Lexer -> Any
;; Peek at the first token of the input, don't remove it
(define (peek-token lex)
  (if (empty? current-token) (set! current-token (lex)))
  current-token)

;; next-token: Lexer -> Any
;; Get and remove the next Token from teh input
(define (next-token lex)
  (if (empty? current-token) (lex)
      (let ((tmp current-token))
        (set! current-token '())
        tmp)))

;; make-the-lexer: InputPort -> Lexer
;; Make a Lexer for the given InputPort
(define (make-the-lexer port)
  (lambda () (the-lexer port)))

;; parse: (listof Rule) Symbol InputPort -> Any
;; Parse an InputPort using the given Rules, starting with the given Symbol
(define (parse lor start port)
  (let* ((firsts (all-first-sets lor))
         (lexer (make-the-lexer port))
         (result (parse-sym lor firsts start lexer)))
    (begin (parse-sym lor firsts 'EOF lexer) result)))

;; parse-sym: (listof Rule) (listof FirstSets) Symbol Lexer -> Any
;; Parse a specific Type (Symbol)
(define (parse-sym lor firsts sym lexer)
  (cond [(or (symbol=? sym 'number) (symbol=? sym 'symbol)
             (symbol=? sym 'string) (symbol=? sym 'boolean)
             (symbol=? sym 'EOF))
         (let* ((tok (next-token lexer))
                (tt (type-of tok)))
           (if (not (symbol=? sym tt))
               (error 'parser " Expected <~a> got <~a>!!" sym tt)
               tok))]
        [else (let* ((r (find-rule sym lor))
                     (name (rule-sym r)))
                (cond [(prod-rule? r)
                       (parse-prod lor firsts name (prod-rule-goesto r) lexer)]
                      [(sum-rule? r)
                       (parse-choice lor firsts name (sum-rule-goesto r)
                                     (peek-token lexer) lexer)]))]))

;; parse-choice: (listof Rule) (listof FirstSets) Symbol (listof Symbol) Any Lexer -> Any
;; Parse a Union (sum type), choose a concrete type based on the given FirstSets
(define (parse-choice lor firsts name choices tok lexer)
  (if (empty? choices)
      (error 'parse-choice "No Choice Found For <~a> Starting with \"~a\" <~a>"
             name tok (type-of tok))
      (let ((fst (find-first (car choices) firsts)))
        (if (or (set-contains? fst (type-of tok))
                (set-contains? fst tok))
            (parse-sym lor firsts (first choices) lexer)
            (parse-choice lor firsts name (cdr choices) tok lexer)))))

;; find-rule: Symbol (listof Rule) -> Rule
;; Find the Parse rule for the given Symbol
(define (find-rule sym lor)
  (cond [(null? lor) (error 'find-rule "Rule Not Found For <~a>" sym)]
        [(symbol=? (rule-sym (car lor)) sym) (car lor)]
        [else (find-rule sym (cdr lor))]))

;; find-first: Symbol (listof FirstSets) -> (listof String-Or-Symbol)
;; Find the FirstSets for the given Symbol
(define (find-first sym lof)
  (cond [(null? lof) (error 'find-first "BAD")]
        [(symbol=? sym (caar lof)) (cadar lof)]
        [else (find-first sym (cdr lof))]))

;; creator-name: Symbol -> Symbol
;; What is the symbol for the creator of the given Type?
(define (creator-name sym)
  (string->symbol (string-append "make-" (symbol->string sym))))

;; parse-string: String Lexer -> Void
;; Parse a Terminal (String) by ignoring it...
(define (parse-string str lexer)
  (let ((tok (next-token lexer)))
    (if (not (and (string? tok) (string=? tok str)))
        (error 'parse-string "Expected String \"~a\" Found \"~a\"" str tok))))

;; parse-prod: (listof Rule) (listof FirstSets) Symbol (listof String-Or-Symbol) Lexer -> Any
;; Parse a Product Rule for 'name', with the body 'goesto'
(define (parse-prod lor firsts name goesto lexer)
  (apply (eval (creator-name name))
         (reverse (foldl (lambda (tt lst)
                           (cond [(symbol? tt) (cons (parse-sym lor firsts tt lexer) lst)]
                                 [(string? tt) (begin (parse-string tt lexer) lst)]
                                 [else (error 'parse-rule "Bad Parsable <~a> For ~a" tt name)]))
                         '() goesto))))


;; * * * * * * * * * * *
;; *      TESTING      *
;; * * * * * * * * * * *

;; Structures
(define-struct Leaf (n))
(define-struct True ())
(define-struct False ())
(define-struct Node (left right))


;; Grammar (Abstr and Concrete Syntax)
(define gram (list (make-sum-rule 'Tree '(Node Leaf))
                   (make-prod-rule 'Leaf '(boolean))
                   (make-prod-rule 'Node '("(" Tree Tree ")"))))

;; boolean->string: Boolean-> String
(define (boolean->string b)
  (if b "true" "false"))

;; Tree->string: Tree -> String
(define (Tree->string t)
  (cond [(Leaf? t) (string-append "(leaf " (boolean->string (Leaf-n t)) ")")]
        [else (string-append "(node " 
                             (Tree->string (Node-left t)) ", "
                             (Tree->string (Node-right t)) ")")]))
    
;;** Try everything out
(Tree->string (parse gram 'Tree (open-input-string "(true false)")))