PROXY  WHOIS  RQUOTE  TEXTS  SOFT  FOREX  BBOARD
 Music  Philosophy  Code  Literature  Russian

= ROOT|Technical|Proxy_Docs|_Perl_code|cl-1.0.1.pl =

page 13 of 22





# Returns a canonicalized absolute URL, given a relative URL and a base URL.
# The full procedure is described in the URI draft, section 5.2.
# Note that a relative URI of "#fragment" should be resolved to "the current
#   document", not to an absolute URL.  This presents a quandary for this
#   routine:  should it always return an absolute URL, thus violating the
#   spec, or should it not always return an absolute URL, thus requiring any
#   caller to check for this special case?  This routine leaves that up to
#   the caller, with $return_rel_fragment-- if set, stick to the spec;
#   otherwise, always return an absolute URL.  See section G.4 of the draft.
# Note that the pathname reduction in steps 6.c-f messes up any PATH_INFO
#   that has ./ or ../ in it, which may be a bug in the spec.
sub absolute_url {
    my($relurl, $baseurl, $return_rel_fragment)= @_ ;
    my(@relurl, @baseurl) ;

    # parse_url() returns scheme, site, path, query, fragment
    @relurl= &parse_url($relurl) ;      # Step 1
    @baseurl= &parse_url($baseurl) ;

    COMBINE: {

        # Step 2
        # See note above about $return_rel_fragment
        if (  $relurl[2] eq '' && 
              !defined($relurl[0]) &&
              !defined($relurl[1]) &&
              !defined($relurl[3]) ) {
            @relurl[0..3]= @baseurl[0..3] ;
            return $relurl if $return_rel_fragment ;   # see note above
            last COMBINE ;
        }

        last COMBINE if defined($relurl[0]) ;    # Step 3
        $relurl[0]= $baseurl[0] ;

        last COMBINE if defined($relurl[1]) ;    # Step 4
        $relurl[1]= $baseurl[1] ;

        last COMBINE if $relurl[2]=~ m#^/# ;     # Step 5

        # Step 6-- resolve relative path
        my($path)= $baseurl[2]=~ m#^(.*/)# ;     # Step 6.a
        $relurl[2]= $path . $relurl[2] ;         # Step 6.b
        
    } # COMBINE

    # Put the remaining steps outside of the block to canonicalize the path.
    # Arguably, this is not allowed.  To avoid such arguments at the expense of
    #   path canonicalization, put steps 6.c-f back in the COMBINE block.

    1 while $relurl[2]=~ s#(^|/)\./#$1# ;    # Step 6.c
    $relurl[2]=~ s#(^|/)\.$#$1# ;            # Step 6.d

    # Step 6.e
    my($oldpath) ;
    while ($relurl[2]=~ s#(([^/]+)/\.\./)# ($2 eq '..')  ? $1  : '' #ge) {
        last if ($relurl[2] eq $oldpath) ;
        $oldpath= $relurl[2] ;
    }

    # Step 6.f
    $relurl[2]=~ s#(([^/]+)/\.\.$)# ($2 eq '..')  ? $1  : '' #ge ;

    # Step 6.g: allow leading ".." segments to remain in path
    # Step 6.h: relurl[2] is already the buffer string

    # To canonicalize further, lowercase the hostname (is this valid for all
    #   schemes?)
    $relurl[1]= lc($relurl[1]) if defined($relurl[1]) ;

    return &unparse_url(@relurl) ;                  # Step 7
}



# Convert a local URL into a canonicalized absolute path, or undef if
#   not on this host or other error.
# Result should only be used as filename.
# Supports UserDir (e.g. public_html) for "/~username/path/file" URLs.
# Supports Alias, AliasMatch, ScriptAlias, and ScriptAliasMatch from srm.conf
#   (but note use of Perl regex's instead of standard regex's).
# Inserts index.html, etc. (from @DIRECTORY_INDEX) if result is a directory,
#   but just return directory name (ending in '/') if none of those exists.
# Removes PATH_INFO, if any, from filename.
# Directory names are always returned with trailing slash (which would not
#   be appropriate if PATH_INFO was to be retained).
# While this routines makes some tests (e.g. if the file is a directory),
#   it does not verify that file at the resulting $filename exists.
# Note that not all URLs point to files, so this routine is not always
#   appropriate.  In this program, the result from this routine is only
#   used when we know the URL is not a CGI script (and is therefore a file),
#   except in &is_cgi() itself, which tests if a file is a CGI script.
#   If it weren't for &is_cgi(), we could ignore cases when the URL isn't
#   a file.
# 12-1-99 JSM:  Changed to also return "redirected" location, in case URL
#   is a directory but not ending in a slash, so relative URLs will resolve
#   correctly against the redirected URL.
sub url_to_filename {
=13=

1.7|8|9|10|11|12| < PREV = PAGE 13 = NEXT > |14|15|16|17|18|19.22

UP TO ROOT | UP TO DIR | TO FIRST PAGE

Google
 


E-mail Facebook Google Digg del.icio.us BlinkList Fark Furl Ma.gnolia Netscape NewsVine Reddit Slashdot Spurl StumbleUpon Technorati YahooMyWeb LiveJournal Blogmarks TwitThis Live News2.ru BobrDobr.ru Memori.ru MoeMesto.ru

0.0124979 wallclock secs ( 0.01 usr + 0.00 sys = 0.01 CPU)