# expireIMGs.pl
#
# Process a list of HTML files, removing IMG tags that have expired.
# We know this is so if the tag has an EXPIRES attribute whose ISO
# date value has passed.
#
# We try to do it intelligently, and only rewrite the file if an
# expired image tag is found.
#
# Example:
#
#
#
# will be removed on or after 18 July 2000.
#
# The EXPIRES attribute may appear anywhere in the IMG tag. The
# quotes around the date are optional.
#
# If an tag is surrounded by tags, that anchor tag
# will be removed as well.
#
# Input: a list of files to process.
#
# Mark L. Irons
# 18 July 2000
#
#--------------------------------------------------------------------
# Get current date and convert it to ISO format
#
($s,$m,$h,$curday,$curmon,$curyear,$w,$y,$d) = localtime;
$curyear=$curyear+1900; # living in 21st century
$curmon=$curmon+1; # Jan is 1st month, not 0th
if ($curmon < 10) { $curmon = "0".$curmon; } # pad month to two digits
if ($curday < 10) { $curday = "0".$curday; } # pad day to two digits
$curdate = $curyear."-".$curmon."-".$curday; # put it all together
#
#--------------------------------------------------------------------
# Set patterns to search on:
#
#
# or
#
# $1 becomes an ISO date on match
#
$imgpattern = ']*>]*>|]*>';
#
#--------------------------------------------------------------------
# Loop over files, processing each.
#
while (<>) {
$filename = $_;
if (!open(HTMLFILE,$filename)) {
chop $filename;
warn "Can't open $filename, skipping: $!\n";
next;
}
$modifiedFlag = 0; # haven't changed file
$modifiedFile = ""; # text of modified file
while () {
$outputbuffer = "";
$originalLine = $_; # save original line
if (/$imgpattern/) { # if there are expiring tags
do { # process tag
$outputbuffer .= $`; # save beginning of line
if ($1 gt $curdate) { # check whether tag has expired
$outputbuffer .= $&; # if not, keep tag
} # else lose it (append nothing)
else {
$modifiedFlag = 1; # set modified file flag
# now close up doubled spaces
if ( (length($outputbuffer) > 0) && (length($') > 0) &&
(substr($',0,1) eq " ") &&
(substr($outputbuffer,length($outputbuffer)-1,1) eq " ") ) {
chop($outputbuffer);
}
}
$_ = $'; # process remainder of line
}
while (/$imgpattern/); # continue while there are more tags
$outputbuffer .= $'; # no more matches - save rest of line
}
else {
$outputbuffer = $_;
}
# do not preserve lines that have been made blank
if ((length($originalLine) == 1) || (length($outputbuffer) > 1)) {
$modifiedFile .= $outputbuffer;
}
} # processing single file
close(HTMLFILE); # close the input file
if ($modifiedFlag) {
if (open(HTMLFILE,"> ".$filename)) {
print HTMLFILE $modifiedFile; # write the modified contents
close HTMLFILE; # and close it of course
print "Expired tags from $filename";
}
}
} # all files