#!/bin/perl # validate.pl - a CGI script to validate changed HTML & CSS files # on a Web site # # # The purpose of this script is to make checking the validity of a # site's HTML & CSS easier. It employs the online HTML & CSS # validators offered by the World Wide Web consortium at w3.org to # do the validation. # # This script: # # 1. Finds all files that have changed since the last time # the script was run. # # 2. Submits each file to the appropriate validator. # # 3. Saves each file's results, aftering cleaning them up a # little. # # 4. Returns an HTML page with a list of valid files and a list # of files with errors. If there are files with errors, the # details of each error is listed. # # If there are no files that have changed since the last time the # script was run, a message to that effect is printed. # # If there are # # Requirements: # # 1. This script must be run from a Web server's CGI directory. It # returns an HTML page viewable in any browser. # # 2. A dummy file whose timestamp is used to record the last time # the scripts was run. This file must be created manually. See # the "globals" section. # # 3. The availability of the Unix utilities 'touch' and 'find'. # Free versions for Windowsare available from CygWin. # # Customization: the lines with !!! comments can be customized. # # This script has been tested with several browsers, Windows 95, # the CygWin utilities, Perl 5.6.0, and the TinyWeb server. # # Mark L. Irons # 2 August 2002 use LWP::Simple; #---------------------------------------------------------------------- # globals #---------------------------------------------------------------------- # customize these for your site $WebHome = "/www/"; # !!! $validationTimestampFileName = "timestamp.of.last.validation"; # !!! $validationTimestampFile = $WebHome."timestamp.of.last.validation"; # !!! # define validators & associated text strings to search their output for # (these shouldn't need modification) # HTML $Validator{"HTML"} = "http://validator.w3.org/check?uri="; $GoodMessage{"HTML"} = "No errors found!"; $OpenDelimiter{"HTML"} = ""; # CSS $Validator{"CSS"} = "http://jigsaw.w3.org/css-validator/validator?uri="; $GoodMessage{"CSS"} = "

No error or warning found

"; $OpenDelimiter{"CSS"} = "
"; $CloseDelimiter{"CSS"} = "\n\n
\n\n"; #---------------------------------------------------------------------- # Exit with message if the timestamp file doesn't exist. #---------------------------------------------------------------------- if (!-e $validationTimestampFile) { &printPreamble; print <Validation Failed

The timestamp file $validationTimestampFile doesn't exist. You need to create it.

EndOfNoTimestamp &printPostamble; exit; } #---------------------------------------------------------------------- # Get list of files that have changed since the last time this script # was run #---------------------------------------------------------------------- open(NEWURLS,"find $WebHome ( -name '*.html' -or -name '*.css' ) -newer $validationTimestampFile |") || die "Couldn't find changed files: $!\n"; while () { chop; push(@URLsToValidate,$_); } close(NEWURLS); #---------------------------------------------------------------------- # If there's nothing to validate, tell the user so, update the # timestamp, and exit. #---------------------------------------------------------------------- if (scalar(@URLsToValidate) == 0) { &printPreamble; &updateTimestamp; print <No Files Need Validation EndOfNothingToValidate &printPostamble; exit; } #---------------------------------------------------------------------- # Submit each URL to its appropriate validator, and save the results #---------------------------------------------------------------------- foreach $url (sort @URLsToValidate) { # determine the file type, HTML or CSS if ($url =~ /\.html$/) { $filetype = "HTML"; } elsif ($url =~ /\.css$/) { $filetype = "CSS"; } # submit to validator $_ = get($Validator{$filetype}.$url); # process the results # # if (/$GoodMessage{$filetype}/) { push(@goodURLs,$url); # add to good list } else { # pull out the error messages and save them # for HTML, error messages are everything inside
    # for CSS, error messages are everything inside
    $pattern = "($OpenDelimiter{$filetype}.*?$CloseDelimiter{$filetype})"; /$pattern/s; # at the top of each HTML error section, print the URL for clarity if ($filetype eq "HTML") { $URLerrors{$url} = "

    URI : $url

    " } $errorHTML = $1; # remove the now-redundant "Errors" header from the CSS results $errorHTML =~ s#

    Errors

    ##; # remove line links from HTML error sections $errorHTML =~ s/\1<\/a>/\1/g; # save the error report for each URL $URLerrors{$url} .= $errorHTML; push(@badURLs,$url); # add to bad list } } #---------------------------------------------------------------------- # Create the page of results #---------------------------------------------------------------------- &printPreamble; &updateTimestamp; print "

    Validation Results

    \n"; $numgood = scalar(@goodURLs); $numbad = scalar(@badURLs); if ($numgood > 0) { print "

    Okay

    \n
      \n"; while ($url = shift @goodURLs) { print "
    1. $url
    2. \n"; } print "
    \n"; } if ($numbad > 0) { print "

    Files Containing Errors

    \n
      \n"; $counter = 0; while ($url = shift @badURLs) { $urllist .= "
    1. $url  details 
    2. \n"; $errors .= ""; $errors .= $URLerrors{$url}; $errors .= " Back 


      "; $counter++; } print "$urllist\n
    \n
    \n$errors\n"; } &printPostamble; #====================================================================== # SUB updateTimestamp #====================================================================== # Touch the validation timestamp file; write an error if the attempt # fails sub updateTimestamp { $result = system("touch $validationTimestampFile"); if ($result != 0) { print "

    Couldn't touch the timestamp file $validationTimestampFile. Please touch this file manually.

    "; } } #====================================================================== # SUB printPreamble #====================================================================== sub printPreamble { print < URL Validation Report EndPreamble } #====================================================================== # SUB printPostamble #====================================================================== sub printPostamble { print < EndPostamble }