package HTML::TreeBuilder;
use strict;
use integer; # vroom vroom!
use Carp ();
use vars qw(@ISA $VERSION $DEBUG);
$VERSION = '3.23';
#---------------------------------------------------------------------------
# Make a 'DEBUG' constant...
BEGIN {
# We used to have things like
# print $indent, "lalala" if $Debug;
# But there were an awful lot of having to evaluate $Debug's value.
# If we make that depend on a constant, like so:
# sub DEBUG () { 1 } # or whatever value.
# ...
# print $indent, "lalala" if DEBUG;
# Which at compile-time (thru the miracle of constant folding) turns into:
# print $indent, "lalala";
# or, if DEBUG is a constant with a true value, then that print statement
# is simply optimized away, and doesn't appear in the target code at all.
# If you don't believe me, run:
# perl -MO=Deparse,-uHTML::TreeBuilder -e 'BEGIN { \
# $HTML::TreeBuilder::DEBUG = 4} use HTML::TreeBuilder'
# and see for yourself (substituting whatever value you want for $DEBUG
# there).
if(defined &DEBUG) {
# Already been defined! Do nothing.
} elsif($] < 5.00404) {
# Grudgingly accomodate ancient (pre-constant) versions.
eval 'sub DEBUG { $Debug } ';
} elsif(!$DEBUG) {
eval 'sub DEBUG () {0}'; # Make it a constant.
} elsif($DEBUG =~ m<^\d+$>s) {
eval 'sub DEBUG () { ' . $DEBUG . ' }'; # Make THAT a constant.
} else { # WTF?
warn "Non-numeric value \"$DEBUG\" in \$HTML::Element::DEBUG";
eval 'sub DEBUG () { $DEBUG }'; # I guess.
}
}
#---------------------------------------------------------------------------
use HTML::Entities ();
use HTML::Tagset 3.02 ();
use HTML::Element ();
use HTML::Parser ();
@ISA = qw(HTML::Element HTML::Parser);
# This looks schizoid, I know.
# It's not that we ARE an element AND a parser.
# We ARE an element, but one that knows how to handle signals
# (method calls) from Parser in order to elaborate its subtree.
# Legacy aliases:
*HTML::TreeBuilder::isKnown = \%HTML::Tagset::isKnown;
*HTML::TreeBuilder::canTighten = \%HTML::Tagset::canTighten;
*HTML::TreeBuilder::isHeadElement = \%HTML::Tagset::isHeadElement;
*HTML::TreeBuilder::isBodyElement = \%HTML::Tagset::isBodyElement;
*HTML::TreeBuilder::isPhraseMarkup = \%HTML::Tagset::isPhraseMarkup;
*HTML::TreeBuilder::isHeadOrBodyElement = \%HTML::Tagset::isHeadOrBodyElement;
*HTML::TreeBuilder::isList = \%HTML::Tagset::isList;
*HTML::TreeBuilder::isTableElement = \%HTML::Tagset::isTableElement;
*HTML::TreeBuilder::isFormElement = \%HTML::Tagset::isFormElement;
*HTML::TreeBuilder::p_closure_barriers = \@HTML::Tagset::p_closure_barriers;
#==========================================================================
# Two little shortcut constructors:
sub new_from_file { # or from a FH
my $class = shift;
Carp::croak("new_from_file takes only one argument")
unless @_ == 1;
Carp::croak("new_from_file is a class method only")
if ref $class;
my $new = $class->new();
$new->parse_file($_[0]);
return $new;
}
sub new_from_content { # from any number of scalars
my $class = shift;
Carp::croak("new_from_content is a class method only")
if ref $class;
my $new = $class->new();
foreach my $whunk (@_) {
if(ref($whunk) eq 'SCALAR') {
$new->parse($$whunk);
} else {
$new->parse($whunk);
}
last if $new->{'_stunted'}; # might as well check that.
}
$new->eof();
return $new;
}
=1= |