if ($url->{'islocal'} and !$url->{'iscgi'} and !$full_http_check) {
my($iscgi) ;
($HTML, $url->{'ssierrs'}, $iscgi)=
&read_expanded_file($url->{'filename'}, $url->{'URL'}) ;
$url->{'status'}=
!defined($HTML)
? sprintf("450 Can't read file: %s (%s)", $!, $!+0)
: @{$url->{'ssierrs'}}
? sprintf("451 SSI Error(s) (%s total)",
scalar @{$url->{'ssierrs'}})
: "200 Local File Read OK" ;
# $url->{'iscgi'} may be set if an SHTML file included CGI calls.
# Don't set it if we're doing a file check, in which case we'll
# keep whatever $HTML we could get.
$url->{'iscgi'}= $iscgi unless $file_check ;
}
# Otherwise (or if rereckoned), download the resource from its HTTP server
if (!$url->{'islocal'} or $url->{'iscgi'} or $full_http_check) {
(undef, undef, $HTML)= &load_url_using_HTTP($url, 'GET') ;
}
# Note that this will be set even when URL is to be reloaded, like
# for a 601 (timeout) response.
$url->{'hasbeenloaded'}= 1 ;
return $HTML ;
}
# Read a local file and return its contents. If a file is SSI (aka SHTML),
# expand any SSI <!--#include--> directives as needed, recursively
# including nested files.
# This is used for all local reads, SHTML or not, but the vast bulk of this
# routine is for SHTML files.
#
# If file is SHTML, this routine also returns a structure of error data,
# and a boolean saying if this file needs to be downloaded via HTTP
# for a complete check (e.g. includes CGI calls).
#
# $fname must be canonicalized absolute path, but $URL parameter is optional.
# %$parents contains all "include"-ancestors of the file, to prevent loops.
# If omitted, assumes no ancestors (and a fresh hash is started).
#
# This routine seems much bigger and more complex than it needs to be.
# It could be one third the size and much simpler if we didn't have to
# worry about full error reporting on nested includes.
#
# Note: This routine was made to mimic what Apache would return to a client.
# However, the result differs from Apache's in two slight ways, both
# involving nested SSI within <!--#include file="..." -->, and both
# apparent bugs in Apache 1.1 (may be fixed in later versions):
#
# 1) If a <file="..."> value contains no "/" (i.e. in current directory),
# then Apache always parses the included file as SHTML, regardless of
# extension. This routine checks @SHTML_EXTENSIONS for all included
# files.
# 2) If a <file="..."> value containing a "/" loads an SHTML file
# containing a <virtual="..."> tag with a relative path, the directive
# fails in Apache. This routine tries to guess the correct path/URL.
#
#
# Notes on this routine, and SHTML files in general:
#
# At first thought, it seems like we could load each included file
# only once, instead of once for every file that includes it.
# However, because of the fact that relative URLs are resolved
# relative to the top-level including file, the top-level file will
# need to be expanded every time. (It's legal (though of questionable
# wisdom) to include a file from e.g. both /a/index.shtml and
# /b/index.shtml, so links from the included file point to different
# URLs.)
#
# Note that while URLs in included files (e.g. <a href="...">) are
# resolved relative to the top-level including file, nested include tags
# are resolved relative to the direct includer.
#
# We could possibly be more efficient in time (but costly in memory)
# by storing the expanded contents and $errlist of each included file,
# since those will be constant (except $errlist's include-loop
# reporting might vary somewhat). There are probably other ways to
# eek out savings of time and memory, at the cost of complexity.
#
# The main loop here is inside of an s/// statement. Unusual, but it's an
# appropriate way to handle the recursion. Recursion is needed, since each
# included file may or may not be SHTML.
#
# $iscgi is set if a file includes "<!--#exec", or if it contains an
# <!--#include virtual="..." --> tag that points to a CGI file, or if
# any of its include-children sets $iscgi.
#
#
# Notes to help clarify data structures, if (God forbid) you have to modify
# this routine:
#
# Each error is a list of files in an "include chain", and $errlist is a
# list of errors. $errlist is associated with the current $HTML. Each
# error in $errlist is associated with some tag in $HTML, as iterated in
# the s/// loop. When this routine returns ($HTML, $errlist), the
=8= |