# Returns a canonicalized absolute URL, given a relative URL and a base URL.
# The full procedure is described in the URI draft, section 5.2.
# Note that a relative URI of "#fragment" should be resolved to "the current
# document", not to an absolute URL. This presents a quandary for this
# routine: should it always return an absolute URL, thus violating the
# spec, or should it not always return an absolute URL, thus requiring any
# caller to check for this special case? This routine leaves that up to
# the caller, with $return_rel_fragment-- if set, stick to the spec;
# otherwise, always return an absolute URL. See section G.4 of the draft.
# Note that the pathname reduction in steps 6.c-f messes up any PATH_INFO
# that has ./ or ../ in it, which may be a bug in the spec.
sub absolute_url {
my($relurl, $baseurl, $return_rel_fragment)= @_ ;
my(@relurl, @baseurl) ;
# parse_url() returns scheme, site, path, query, fragment
@relurl= &parse_url($relurl) ; # Step 1
@baseurl= &parse_url($baseurl) ;
COMBINE: {
# Step 2
# See note above about $return_rel_fragment
if ( $relurl[2] eq '' &&
!defined($relurl[0]) &&
!defined($relurl[1]) &&
!defined($relurl[3]) ) {
@relurl[0..3]= @baseurl[0..3] ;
return $relurl if $return_rel_fragment ; # see note above
last COMBINE ;
}
last COMBINE if defined($relurl[0]) ; # Step 3
$relurl[0]= $baseurl[0] ;
last COMBINE if defined($relurl[1]) ; # Step 4
$relurl[1]= $baseurl[1] ;
last COMBINE if $relurl[2]=~ m#^/# ; # Step 5
# Step 6-- resolve relative path
my($path)= $baseurl[2]=~ m#^(.*/)# ; # Step 6.a
$relurl[2]= $path . $relurl[2] ; # Step 6.b
} # COMBINE
# Put the remaining steps outside of the block to canonicalize the path.
# Arguably, this is not allowed. To avoid such arguments at the expense of
# path canonicalization, put steps 6.c-f back in the COMBINE block.
1 while $relurl[2]=~ s#(^|/)\./#$1# ; # Step 6.c
$relurl[2]=~ s#(^|/)\.$#$1# ; # Step 6.d
# Step 6.e
my($oldpath) ;
while ($relurl[2]=~ s#(([^/]+)/\.\./)# ($2 eq '..') ? $1 : '' #ge) {
last if ($relurl[2] eq $oldpath) ;
$oldpath= $relurl[2] ;
}
# Step 6.f
$relurl[2]=~ s#(([^/]+)/\.\.$)# ($2 eq '..') ? $1 : '' #ge ;
# Step 6.g: allow leading ".." segments to remain in path
# Step 6.h: relurl[2] is already the buffer string
# To canonicalize further, lowercase the hostname (is this valid for all
# schemes?)
$relurl[1]= lc($relurl[1]) if defined($relurl[1]) ;
return &unparse_url(@relurl) ; # Step 7
}
# Convert a local URL into a canonicalized absolute path, or undef if
# not on this host or other error.
# Result should only be used as filename.
# Supports UserDir (e.g. public_html) for "/~username/path/file" URLs.
# Supports Alias, AliasMatch, ScriptAlias, and ScriptAliasMatch from srm.conf
# (but note use of Perl regex's instead of standard regex's).
# Inserts index.html, etc. (from @DIRECTORY_INDEX) if result is a directory,
# but just return directory name (ending in '/') if none of those exists.
# Removes PATH_INFO, if any, from filename.
# Directory names are always returned with trailing slash (which would not
# be appropriate if PATH_INFO was to be retained).
# While this routines makes some tests (e.g. if the file is a directory),
# it does not verify that file at the resulting $filename exists.
# Note that not all URLs point to files, so this routine is not always
# appropriate. In this program, the result from this routine is only
# used when we know the URL is not a CGI script (and is therefore a file),
# except in &is_cgi() itself, which tests if a file is a CGI script.
# If it weren't for &is_cgi(), we could ignore cases when the URL isn't
# a file.
# 12-1-99 JSM: Changed to also return "redirected" location, in case URL
# is a directory but not ending in a slash, so relative URLs will resolve
# correctly against the redirected URL.
sub url_to_filename {
=13= |