(module regexp-front-end mzscheme (require-for-syntax "constant-analysis.ss") (require-for-syntax "regexp-core.ss") (require-for-syntax "parse-regexp.ss") (require (lib "etc.ss")) (require (lib "match.ss")) (require "regexp-core.ss") (require "parse-regexp.ss") ;; @regexp : U-regexp -> S-regexp (define-syntax (@regexp stx) (syntax-case stx () [(_ pat) (string-literal? (syntax pat)) #`(quote #,(precompile-regexp #'pat))] [(_ pat) #'(parse-regexp pat)] [_ #'parse-regexp])) (define-syntax @disjoin* (syntax-rules () [(_ ()) fail] [(_ (d1 d2)) (disjoin d1 d2)] [(_ (d1 d2 ...)) (disjoin d1 (@disjoin* (d2 ...)))])) (define-syntax @conjoin* (syntax-rules () [(_ ()) succeed] [(_ (c1 c2)) (conjoin c1 c2)] [(_ (c1 c2 ...)) (conjoin c1 (@conjoin* (c2 ...)))])) ;; =========================================================================== ;; REGEXP MATCHER - NOT PARTIALLY EVALUATED ;; =========================================================================== ;; TODO: .$^[range][^range] ;; TODO: "memoize" a subpattern by adding (make-matcher x:M-regexp) -> x ;; TODO: minimal? is not yet implemented ;; make-matcher : S-regexp -> M-regexp (define (make-matcher pat) (match pat [(':or pats ...) (disjoin* (map make-matcher pats))] [(':seq pats ...) (conjoin* (map make-matcher pats))] ;; P{0,inf} = PP{0,inf} [(':between minimal? (? zero? at-least) (? not at-most) pat1) (list-of (make-matcher pat1))] ;; P{m,inf} = PP{m-1,inf} [(':between minimal? (? positive? at-least) (? not at-most) pat1) (conjoin (make-matcher pat1) (make-matcher (list ':between minimal? (sub1 at-least) #f pat1)))] ;; P{0,0} = () [(':between minimal? (? zero? at-least) (? zero? at-most) pat1) succeed] ;; P{0,n} = PP{0,n-1}|() [(':between minimal? (? zero? at-least) (? positive? at-most) pat1) (disjoin (conjoin (make-matcher pat1) (make-matcher (list ':between minimal? 0 (sub1 at-most) pat1))) succeed)] ;; P{m,n} = PP{m-1,n-1} [(':between minimal? (? positive? at-least) (? positive? at-most) pat1) (conjoin (make-matcher pat1) (make-matcher (list ':between minimal? (sub1 at-least) (sub1 at-most) pat1)))] [(':sub index pat1) (submatch index (make-matcher pat1))] [(? char? c) (pred (lambda (cc) (and (char? cc) (char=? cc c))))])) ;; match-positions : S-regexp string {nat {nat}} -> ([(nat * nat)|#f]|#f) (define match-positions (opt-lambda (pat str [start 0] [end (string-length str)]) (let loop ([i start]) (and (<= i end) (let ([res (run (make-matcher pat) str i end (count-subs pat))]) (or res (loop (+ 1 i)))))))) ;; match-strings : S-regexp string {nat {nat}} -> ([string|#f]|#f) (define match-strings (opt-lambda (pat str [start 0] [end (string-length str)]) (let ([posns (match-positions pat str start end)]) ;(printf "posns: ~a~n" posns) (and posns (map (lambda (posn) (and posn (substring str (car posn) (cdr posn)))) posns))))) ;; =========================================================================== ;; REGEXP MATCHER - PARTIALLY EVALUATED ;; =========================================================================== (define-syntax @make-matcher (syntax-rules (:or :seq :sub :between) [(_ (:or pat1 pat2 ...)) (@disjoin* ((@make-matcher pat1) (@make-matcher pat2) ...))] [(_ (:seq pat1 pat2 ...)) (@conjoin* ((@make-matcher pat1) (@make-matcher pat2) ...))] [(_ (:between minimal? at-least at-most pat1)) (@make-matcher/between minimal? at-least at-most pat1)] [(_ (:sub index pat1)) (submatch index (@make-matcher pat1))] [(_ c) (pred (lambda (cc) (and (char? cc) (char=? cc c))))])) (define-syntax (@make-matcher/between stx) (syntax-case stx () ;; P{0,inf} = PP{0,inf} [(_ minimal? 0 #f pat1) #'(list-of (@make-matcher pat1))] ;; P{m,inf} = PP{m-1,inf} [(_ minimal? at-least #f pat1) (let ([m (syntax-object->datum #'at-least)]) #`(conjoin (@make-matcher pat1) (@make-matcher/between minimal? #,(sub1 m) #f pat1)))] ;; P{0,0} = () [(_ minimal? 0 0 pat1) succeed] ;; P{0,n} = PP{0,n-1}|() [(_ minimal? 0 at-most pat1) (let ([n (syntax-object->datum #'at-most)]) #`(disjoin (conjoin (@make-matcher pat1) (@make-matcher/between minimal? 0 #,(sub1 n) pat1)) succeed))] ;; P{m,n} = PP{m-1,n-1} [(_ minimal? at-least at-most pat1) (let ([m (syntax-object->datum #'at-least)] [n (syntax-object->datum #'at-most)]) #`(conjoin (@make-matcher pat1) (@make-matcher/between minimal? #,(sub1 m) #,(sub1 n) pat1)))] ;; TODO: default case calls dynamic make-matcher )) ;; @regexp-match : regexp string {nat {nat}} -> ([string|#f]|#f) (define-syntax @regexp-match (syntax-rules () [(_ pat str) (@regexp-match pat str 0)] [(_ pat str start) (let ([s str]) (@regexp-match pat s 0 (string-length s)))] [(_ pat str start end) (let ([posns (@regexp-match-positions pat str start end)]) (and posns (map (lambda (posn) (and posn (substring str (car posn) (cdr posn)))) posns)))])) ;; @regexp-match-positions : regexp string {nat {nat}} -> ([(nat * nat)|#f]|#f) (define-syntax @regexp-match-positions (syntax-rules () [(_ pat str) (@regexp-match-positions pat str 0)] [(_ pat str start) (let ([s str]) (@regexp-match-positions pat s 0 (string-length s)))] [(_ pat str start end) (@regexp-match-positions/aux pat str start end)])) (define-syntax (@regexp-match-positions/aux stx) (syntax-case stx () [(_ pat str start end) (regexp-constant? #'pat) (let* ([ast (precompile-regexp #'pat)] [sub-count (count-subs ast)]) #`(@match-positions #,ast str start end #,sub-count))] ;; TODO: handle usual default cases )) (define-syntax @match-positions (syntax-rules () [(_ pat str start end sub-count) (let loop ([i start]) (and (<= i end) (let ([res (run (@make-matcher pat) str i end sub-count)]) (or res (loop (+ 1 i))))))])) (provide @regexp @regexp-match @regexp-match-positions make-matcher match-positions match-strings))