PROXY  WHOIS  RQUOTE  TEXTS  SOFT  FOREX  BBOARD
 Music  Philosophy  Code  Literature  Russian

= ROOT|Technical|Code_Examples|Perl|Proxy|cl-1.0.1.pl =

page 3 of 22



    }
}


# Check the URLs, in order.  @urlstoget may grow and rearrange.
while (@urlstoget) {
    my($url)= shift(@urlstoget) ;
    if ( !$url->{'ishtml'} or !$url->{'islocal'} or $url->{'dontfollow'}
         or (length($max_depth) and $url->{'depth'} > $max_depth ) ) {
        &verify_url($url) ;    # may set ishtml=true
    }
    if ( $url->{'ishtml'} and $url->{'islocal'} and !$url->{'dontfollow'}
         and (!length($max_depth) or $url->{'depth'} <= $max_depth ) ) {
        my($HTML)= &load_url($url) ;  # may set ishtml=false
        # 11-30-99 JSM: fixed to handle rel URLs in redirected pages correctly
        my($base_url)= $url->{'location'} || $url->{'URL'} ;
        &extract_urls($HTML, $base_url, $url->{'URL'}, $url->{'depth'}+1) 
            if $url->{'ishtml'} ;      # big, calls &add_url()
    }

    # If we get an error response that may be corrected with another
    #   attempt, put it back in the queue.  Such errors include 408,
    #   503, 504, and the homegrown codes 600, 601, 602, and 603.
    if ($url->{'status'}=~ /^(408|503|504|600|601|602|603)\b/ ) {
        push(@urlstoget, $url) if ( $url->{'numtries'} < $MAX_ATTEMPTS ) ;
    }

}

&make_report() ;

exit ;



#----- Process command-line options -----------------------------------

# Process any command-line options.
sub getopts {
    my($opt, $param) ;
    while ($ARGV[0]=~ /^-/) {
        $opt= shift(@ARGV) ;
        ($opt, $param)= $opt=~ /^-(.)(.*)/ ;

        # Turn on verbose reporting
        if ($opt eq 'v') {
            $verbose_report= ($param ne '-') ;

        # User-specified patterns to exclude ('' to clear list)
        } elsif ($opt eq 'I') {
            $param= shift(@ARGV) unless length($param) ;
            if (length($param)) { push(@INCLUDE_PATTERNS, $param) }
            else { @INCLUDE_PATTERNS= () }

        # User-specified patterns to exclude ('' to clear list)
        } elsif ($opt eq 'X') {
            $param= shift(@ARGV) unless length($param) ;
            if (length($param)) { push(@EXCLUDE_PATTERNS, $param) }
            else { @EXCLUDE_PATTERNS= () }

        # User-specified response codes to ignore ('' to clear list)
        } elsif ($opt eq 'i') {
            $param= shift(@ARGV) unless length($param) ;
            if (length($param)) { push(@INCLUDE_STATUS, $param) }
            else { @INCLUDE_STATUS= () }

        # User-specified response codes to ignore ('' to clear list)
        } elsif ($opt eq 'x') {
            $param= shift(@ARGV) unless length($param) ;
            if (length($param)) { push(@EXCLUDE_STATUS, $param) }
            else { @EXCLUDE_STATUS= () }

        # Maximum traversal depth
        } elsif ($opt eq 'd') {
            $param= shift(@ARGV) unless length($param) ;
            $max_depth= $param ;

        # Make it a "file check"-- only read local files, do not use HTTP
        } elsif ($opt eq 'f') {
            $file_check= ($param ne '-') ;

        # Use HTTP for all URL's, even local files
        } elsif ($opt eq 'h') {
            $full_http_check= ($param ne '-') ;

        # Read configuration parameters from srm.conf-like file
        } elsif ($opt eq 'c') {
            $param= shift(@ARGV) unless length($param) ;
            &read_srm_conf($param) ;
            
        # Print current configuration parameters
        } elsif ($opt eq 'q') {
            &print_config ;
            exit ;   # jsm-- should we exit?

        # Allow certain parameters to be defined via the command line
        } elsif ($opt eq 'D') {
            $param= shift(@ARGV) unless length($param) ;
            $debug=1, unshift(@ARGV,$param), next if $param=~ /^-/ ;
            my($name,$value)= split(/=/, $param, 2) ;
=3=

1|2| < PREV = PAGE 3 = NEXT > |4|5|6|7|8|9|10|11|12.22

UP TO ROOT | UP TO DIR | TO FIRST PAGE

Google
 


E-mail Facebook Google Digg del.icio.us BlinkList Fark Furl Ma.gnolia Netscape NewsVine Reddit Slashdot Spurl StumbleUpon Technorati YahooMyWeb LiveJournal Blogmarks TwitThis Live News2.ru BobrDobr.ru Memori.ru MoeMesto.ru

0.0674131 wallclock secs ( 0.00 usr + 0.01 sys = 0.01 CPU)