PROXY  WHOIS  RQUOTE  TEXTS  SOFT  FOREX  BBOARD
 Music  Philosophy  Code  Literature  Russian

= ROOT|Technical|Proxy_Docs|_Perl_code|cl-1.0.1.pl =

page 7 of 22



}
   

# Put the URL in such a form that two URLs that point to the same resource
#   have the same URL, to avoid superfluous retrievals.
# Host name is lowercased elsewhere-- this routine is only called from
#   &add_url; see note there.  To lowercase the host name here would be
#   inefficient.
sub canonicalize {
    my($URL)= @_ ;

    $URL=~ s/#.*// ;    # remove any "#" fragment from end of URL

    return $URL ;
}


#----- File reading/downloading routines (includes networking) --------

# Verify that a URL exists, and set $url->{'status'} accordingly.  Do
#   this either by checking the local filesystem or by using the HTTP HEAD 
#   method for remote sites or CGI scripts.
# Set $url->{'ishtml'} accordingly if discovered from Content-Type:.
# This does not support various Redirect directives in srm.conf.
sub verify_url {
    my($url)= @_ ;

    print STDERR "verifying $url->{'URL'}\n" if $debug ;


    # Depending on the state of $url->{islocal, iscgi, dontfollow} and
    #   $full_http_check, take appropriate actions to check/set the
    #   status code for this URL.
    
    # NOTE: In some situations, specifically when checking a CGI script
    #   named in a <form action> (thus implying that dontfollow is set),
    #   and using HTTP to check the URL (because the script is remote or
    #   $full_http_check is set), the HTTP response code may not be
    #   accurate.  This is because there is no form data sent with the
    #   request, as there normally would be.  In these cases, a cautionary
    #   note is appended to $url->{'status'}.  Additionally, an empty 
    #   $url->{'status'} is changed to an explanatory note (maybe we should
    #   do that in load_url() too?).

    # Use HEAD if file is remote, or if $full_http_check is set.
    if (!$url->{'islocal'} or $full_http_check) {
        &load_url_using_HTTP($url, 'HEAD') ;
        $url->{'status'}= '[no status returned]'
            unless length($url->{'status'}) ;
        $url->{'status'}.= ' (NOTE: Form was not submitted normally)'
            if $url->{'dontfollow'} ;

    # URL is local:  If it's not CGI, do a normal local file check
    } elsif (!$url->{'iscgi'}) {
        $url->{'status'}= (-e $url->{'filename'})  
            ? "200 Local File Exists"  : "404 File Not Found" ;

    # URL is local CGI:  Use HEAD unless dontfollow is set
    } elsif (!$url->{'dontfollow'}) {
        &load_url_using_HTTP($url, 'HEAD') ;

    # Else it's a local CGI with dontfollow set:  Check for executable file
    } else {
        $url->{'status'}= 
             (! -e $url->{'filename'})  ? "404 File Not Found"
           : (! -x $url->{'filename'})  ? "403 Local File Is Not Executable"
           :                              "200 Local Executable File Exists"

    }
        

# Old verify routine below:
#
#    # If is a local non-CGI file, check it directly from the filesystem
#    if ($url->{'islocal'} and !$url->{'iscgi'} and !$full_http_check) {
#        $url->{'status'}= (-e $url->{'filename'})  
#            ? "200 Local File Exists"  : "404 File Not Found" ;
#
#    # Otherwise, download its HEAD from its HTTP server
#    } else {
#        &load_url_using_HTTP($url, 'HEAD') ;
#    }


}



# Load entire file/resource and return its contents, setting $url->{'status'}
#    accordingly.  Do this either by checking the local filesystem or by 
#    using the HTTP GET method for remote sites or CGI scripts.
# Set $url->{'ishtml'} accordingly if discovered from Content-Type:.
# This does not support various Redirect directives in srm.conf.
sub load_url {
    my($url)= @_ ;
    my($HTML) ;

    print STDERR "loading $url->{'URL'}\n" if $debug ;

    # If is a local non-CGI file, read it directly from the filesystem
=7=

1|2|3|4|5|6| < PREV = PAGE 7 = NEXT > |8|9|10|11|12|13|14|15|16.22

UP TO ROOT | UP TO DIR | TO FIRST PAGE

Google
 


E-mail Facebook Google Digg del.icio.us BlinkList Fark Furl Ma.gnolia Netscape NewsVine Reddit Slashdot Spurl StumbleUpon Technorati YahooMyWeb LiveJournal Blogmarks TwitThis Live News2.ru BobrDobr.ru Memori.ru MoeMesto.ru

0.116894 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU)