sub warn { shift->_elem('_warn', @_); }
#==========================================================================
sub warning {
my $self = shift;
CORE::warn("HTML::Parse: $_[0]\n") if $self->{'_warn'};
# should maybe say HTML::TreeBuilder instead
}
#==========================================================================
{
# To avoid having to rebuild these lists constantly...
my $_Closed_by_structurals = [qw(p h1 h2 h3 h4 h5 h6 pre textarea)];
my $indent;
sub start {
return if $_[0]{'_stunted'};
# Accept a signal from HTML::Parser for start-tags.
my($self, $tag, $attr) = @_;
# Parser passes more, actually:
# $self->start($tag, $attr, $attrseq, $origtext)
# But we can merrily ignore $attrseq and $origtext.
if($tag eq 'x-html') {
print "Ignoring open-x-html tag.\n" if DEBUG;
# inserted by some lame code-generators.
return; # bypass tweaking.
}
$tag =~ s{/$}{}s; # So <b/> turns into <b>. Silently forgive.
unless($tag =~ m/^[-_a-zA-Z0-9:%]+$/s) {
DEBUG and print "Start-tag name $tag is no good. Skipping.\n";
return;
# This avoids having Element's new() throw an exception.
}
my $ptag = (
my $pos = $self->{'_pos'} || $self
)->{'_tag'};
my $already_inserted;
#my($indent);
if(DEBUG) {
# optimization -- don't figure out indenting unless we're in debug mode
my @lineage = $pos->lineage;
$indent = ' ' x (1 + @lineage);
print
$indent, "Proposing a new \U$tag\E under ",
join('/', map $_->{'_tag'}, reverse($pos, @lineage)) || 'Root',
".\n";
#} else {
# $indent = ' ';
}
#print $indent, "POS: $pos ($ptag)\n" if DEBUG > 2;
# $attr = {%$attr};
foreach my $k (keys %$attr) {
# Make sure some stooge doesn't have "<span _content='pie'>".
# That happens every few million Web pages.
$attr->{' ' . $k} = delete $attr->{$k}
if length $k and substr($k,0,1) eq '_';
# Looks bad, but is fine for round-tripping.
}
my $e =
($self->{'_element_class'} || 'HTML::Element')->new($tag, %$attr);
# Make a new element object.
# (Only rarely do we end up just throwing it away later in this call.)
# Some prep -- custom messiness for those damned tables, and strict P's.
if($self->{'_implicit_tags'}) { # wallawallawalla!
unless($HTML::TreeBuilder::isTableElement{$tag}) {
if ($ptag eq 'table') {
print $indent,
" * Phrasal \U$tag\E right under TABLE makes implicit TR and TD\n"
if DEBUG > 1;
$self->insert_element('tr', 1);
$pos = $self->insert_element('td', 1); # yes, needs updating
} elsif ($ptag eq 'tr') {
print $indent,
" * Phrasal \U$tag\E right under TR makes an implicit TD\n"
if DEBUG > 1;
$pos = $self->insert_element('td', 1); # yes, needs updating
}
$ptag = $pos->{'_tag'}; # yes, needs updating
}
# end of table-implication block.
# Now maybe do a little dance to enforce P-strictness.
# This seems like it should be integrated with the big
# "ALL HOPE..." block, further below, but that doesn't
# seem feasable.
if(
=3= |