}
}
# Check the URLs, in order. @urlstoget may grow and rearrange.
while (@urlstoget) {
my($url)= shift(@urlstoget) ;
if ( !$url->{'ishtml'} or !$url->{'islocal'} or $url->{'dontfollow'}
or (length($max_depth) and $url->{'depth'} > $max_depth ) ) {
&verify_url($url) ; # may set ishtml=true
}
if ( $url->{'ishtml'} and $url->{'islocal'} and !$url->{'dontfollow'}
and (!length($max_depth) or $url->{'depth'} <= $max_depth ) ) {
my($HTML)= &load_url($url) ; # may set ishtml=false
# 11-30-99 JSM: fixed to handle rel URLs in redirected pages correctly
my($base_url)= $url->{'location'} || $url->{'URL'} ;
&extract_urls($HTML, $base_url, $url->{'URL'}, $url->{'depth'}+1)
if $url->{'ishtml'} ; # big, calls &add_url()
}
# If we get an error response that may be corrected with another
# attempt, put it back in the queue. Such errors include 408,
# 503, 504, and the homegrown codes 600, 601, 602, and 603.
if ($url->{'status'}=~ /^(408|503|504|600|601|602|603)\b/ ) {
push(@urlstoget, $url) if ( $url->{'numtries'} < $MAX_ATTEMPTS ) ;
}
}
&make_report() ;
exit ;
#----- Process command-line options -----------------------------------
# Process any command-line options.
sub getopts {
my($opt, $param) ;
while ($ARGV[0]=~ /^-/) {
$opt= shift(@ARGV) ;
($opt, $param)= $opt=~ /^-(.)(.*)/ ;
# Turn on verbose reporting
if ($opt eq 'v') {
$verbose_report= ($param ne '-') ;
# User-specified patterns to exclude ('' to clear list)
} elsif ($opt eq 'I') {
$param= shift(@ARGV) unless length($param) ;
if (length($param)) { push(@INCLUDE_PATTERNS, $param) }
else { @INCLUDE_PATTERNS= () }
# User-specified patterns to exclude ('' to clear list)
} elsif ($opt eq 'X') {
$param= shift(@ARGV) unless length($param) ;
if (length($param)) { push(@EXCLUDE_PATTERNS, $param) }
else { @EXCLUDE_PATTERNS= () }
# User-specified response codes to ignore ('' to clear list)
} elsif ($opt eq 'i') {
$param= shift(@ARGV) unless length($param) ;
if (length($param)) { push(@INCLUDE_STATUS, $param) }
else { @INCLUDE_STATUS= () }
# User-specified response codes to ignore ('' to clear list)
} elsif ($opt eq 'x') {
$param= shift(@ARGV) unless length($param) ;
if (length($param)) { push(@EXCLUDE_STATUS, $param) }
else { @EXCLUDE_STATUS= () }
# Maximum traversal depth
} elsif ($opt eq 'd') {
$param= shift(@ARGV) unless length($param) ;
$max_depth= $param ;
# Make it a "file check"-- only read local files, do not use HTTP
} elsif ($opt eq 'f') {
$file_check= ($param ne '-') ;
# Use HTTP for all URL's, even local files
} elsif ($opt eq 'h') {
$full_http_check= ($param ne '-') ;
# Read configuration parameters from srm.conf-like file
} elsif ($opt eq 'c') {
$param= shift(@ARGV) unless length($param) ;
&read_srm_conf($param) ;
# Print current configuration parameters
} elsif ($opt eq 'q') {
&print_config ;
exit ; # jsm-- should we exit?
# Allow certain parameters to be defined via the command line
} elsif ($opt eq 'D') {
$param= shift(@ARGV) unless length($param) ;
$debug=1, unshift(@ARGV,$param), next if $param=~ /^-/ ;
my($name,$value)= split(/=/, $param, 2) ;
=3= |