;; The first three lines of this file were inserted by DrScheme. They record metadata ;; about the language level of this file in a form that our tools can easily process. #reader(lib "htdp-intermediate-lambda-reader.ss" "lang")((modname 8-2) (read-case-sensitive #t) (teachpacks ()) (htdp-settings #(#t constructor repeating-decimal #f #t none #f ()))) #| Closed Web Site data definitions URL = String (RFC 3986: parses into http:// url and ends in / or .html) Res = (make-posn [Listof Missing] [Listof Missing]) Missing = (make-posn URL URL) interpretation: A Missing (make-posn u v) means u points to v and v doesn't exist or is external to the site. |# (require "more-io.ss") ;; imports: ;; url-html-neighbors : URL -> [Listof URL] ;; url-exists? : URL -> Boolean ;; Since both access the web, we must abstract over them with functions ;; that simulate the web. ; ; ; ; ; ; ; ; ; ;;;; ;;; ;;;; ; ;;; ;;;; ; ;; ; ; ; ; ; ;; ; ; ;; ; ; ; ; ; ; ; ; ; ; ; ;;; ;;;;; ;;;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;; ; ; ; ; ; ;;;; ;;;; ;; ; ; ;;;; ; ; ; ; ; ;; URL -> Res ;; determine non-existent URLs: external ones in x coordinate; internals in y ;; FUNCTION COMPOSITION, use of abstraction (define (closed-site url) (close-site-general url url-html-neighbors url-exists?)) ;; URL (URL -> [Listof URL]) (URL -> Boolean) -> Res ;; GENERATIVE RECURSION: ;; -- the web is a graph; the URLs are nodes ;; .. WITH ABSTRACTION OVER WEB ACCESSS ;; TERMINATION: This function may diverge when the "web site" can "grow" ;; as quickly as the function can retrieve URLs. Of course, the two ;; functions may just describe an infinite graph to start with: ;; Any -> Empty (define (run-forever anything) (local (;; URL -> URL ;; extend given url with one step in the path (define (node-neighbors u) (list (string-append u "a/"))) ;; URL -> Boolean ;; all urls exist (define (node-exists? y) true)) ;; run forever, because you always get a new URL (close-site-general "http://neu.com" node-neighbors node-exists?))) (check-expect (close-site-general FUN test-neighbors test-exists?) (make-posn (list (make-posn FUN XBAD)) (list (make-posn FUN IXBAD) (make-posn FUN IBAD)))) (define (close-site-general url0 node-neighbors node-exists?) (local (;; [Listof URL] [Listof URL] ;; [Listof (make-posn URL URL)] [Listof (make-posn URL URL)] -> Res ;; GENERATIVE RECURSION with accu ;; seen: all urls encountered on the path from url0 to url ;; ex: list of external links ;; in: list of missing internal links (define (search to-go seen ex in) (if (empty? to-go) (make-posn ex in) (local ((define pr1 (first to-go)) (define url (posn-y pr1)) (define rst (rest to-go)) (define s (cons url seen))) (cond [(member url seen) (search rst seen ex in)] [(url-external? url url0) (search rst s (cons pr1 ex) in)] [(not (node-exists? url)) (search rst s ex (cons pr1 in))] [else (local ((define u* (node-neighbors url)) (define n* (map (lambda (u) (make-posn url u)) u*))) (search (append rst n*) s ex in))]))))) (search (list (make-posn "*start" url0)) '() '() '()))) ;; URL URL -> Boolean ;; does url extend base? or is it server-relative? ;; FUNCTION COMPOSITION (check-expect (url-external? XBAD FUN) true) (check-expect (url-external? IBAD FUN) false) (check-expect (url-external? FUN2 FUN) false) (define (url-external? url base) (not (string-prefix? base url))) ;; String String -> Boolean ;; is s a prefix of t? (check-expect (string-prefix? "hello" "hell") false) (check-expect (string-prefix? "hell" "hello") true) (define (string-prefix? s t) (local (;; [Listof 1String] [Listof 1String] -> Boolean ;; STRUCTURAL RECURSION simultaneous on both args, simplified (define (prefix? s t) (cond [(empty? s) true] [(empty? t) false] [else (and (string=? (first s) (first t)) (prefix? (rest s) (rest t)))]))) (prefix? (explode s) (explode t)))) ; ; ; ; ; ; ; ; ; ;;;;; ;;; ;;;; ;;;;; ;;; ; ;; ;; ; ; ; ; ; ; ; ; ;; ; ; ;; ; ; ;;;;; ;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;; ; ;;; ;;;; ;;;; ;;; ; ; ; ;; ; ; ; ; ;;;; ;; a small fake 'web' site ;; constants for testing program (define FUN "http://fun.com/") (define FUN2 (string-append FUN "second.html")) (define XBAD "http://bad.com/") (define IBAD "http://fun.com/bad.html") (define IXBAD (string-append FUN "Images/styles.html")) (define PNG (string-append FUN "styles.png")) (define EXISTS (list FUN IBAD XBAD IXBAD FUN2 PNG)) (define existing-urls (append (list (list FUN2 FUN)) (list (list PNG)) (list EXISTS))) ;; URL -> [Listof URL] ;; extract all a-href and img-src URLs from the existing url ;; DOMAIN KNOWLEDGE (check-expect (test-neighbors FUN) (rest EXISTS)) (check-error (test-neighbors "") "can't happen: ") (define (test-neighbors url) (local (;; [Listof (cons URL [Listof URL])] -> [Listof URL] ;; STRUCTURAL RECURSION on louurl (define (find lourl) (cond ; [(empty? lourl) (error (string-append "can't happen: " url))] [else (if (string=? (first (first lourl)) url) (rest (first lourl)) (find (rest lourl)))]))) (if (test-exists? url) (find existing-urls) (error (string-append "can't happen: " url))))) ;; URL -> Boolean ;; does existing URL produce a 404 message? ;; FUNCTION COMPOSITION (check-expect (test-exists? FUN) true) (check-expect (test-exists? XBAD) false) (check-expect (test-exists? IBAD) false) (define (test-exists? url) (member url (map first existing-urls))) ;; --- (define url0 "http://www.ccs.neu.edu/home/matthias/107-f09/") ; (closed-site url0)