$p = $p->{'_parent'};
}
return unless defined $p; # We went off the top of the tree.
# Otherwise specified element was found; set pos to its parent.
push @to_close, $p;
$self->{'_pos'} = $p->{'_parent'};
}
$self->{'_pos'} = undef if $self eq ($self->{'_pos'} || '');
print $indent, "(Pos now points to ",
$self->{'_pos'} ? $self->{'_pos'}{'_tag'} : '???', ".)\n"
if DEBUG > 1;
### EXPENSIVE, because has to check that it's not under a pre
### or a CDATA-parent. That's one more method call per end()!
### Might as well just do this at the end of the tree-parse, I guess,
### at which point we'd be parsing top-down, and just not traversing
### under pre's or CDATA-parents.
##
## Take this opportunity to nix any terminal whitespace nodes.
## TODO: consider whether this (plus the logic in start(), above)
## would ever leave any WS nodes in the tree.
## If not, then there's no reason to have eof() call
## delete_ignorable_whitespace on the tree, is there?
##
#if(@to_close and $self->{'_tighten'} and !$self->{'_ignore_text'} and
# ! $to_close[-1]->is_inside('pre', keys %HTML::Tagset::isCDATA_Parent)
#) { # if tightenable
# my($children, $e_tag);
# foreach my $e (reverse @to_close) { # going top-down
# last if 'pre' eq ($e_tag = $e->{'_tag'}) or
# $HTML::Tagset::isCDATA_Parent{$e_tag};
#
# if(
# $children = $e->{'_content'}
# and @$children # has children
# and !ref($children->[-1])
# and $children->[-1] =~ m<^\s+$>s # last node is all-WS
# and
# (
# # has a tightable parent:
# $HTML::TreeBuilder::canTighten{ $e_tag }
# or
# ( # has a tightenable left sibling:
# @$children > 1 and
# ref($children->[-2])
# and $HTML::TreeBuilder::canTighten{ $children->[-2]{'_tag'} }
# )
# )
# ) {
# pop @$children;
# #print $indent, "Popping a terminal WS node from ", $e->{'_tag'},
# # " (", $e->address, ") while exiting.\n" if DEBUG;
# }
# }
#}
foreach my $e (@to_close) {
# Call the applicable callback, if any
$ptag = $e->{'_tag'};
&{ $self->{"_tweak_$ptag"}
|| $self->{'_tweak_*'}
|| next
}(map $_, $e, $ptag, $self);
print $indent, "Back from tweaking.\n" if DEBUG;
last if $self->{'_stunted'}; # in case one of the handlers called stunt
}
return @to_close;
}
}
#==========================================================================
{
my($indent, $nugget);
sub text {
return if $_[0]{'_stunted'};
# Accept a "here's a text token" signal from HTML::Parser.
my($self, $text, $is_cdata) = @_;
# the >3.0 versions of Parser may pass a cdata node.
# Thanks to Gisle Aas for pointing this out.
return unless length $text; # I guess that's always right
my $ignore_text = $self->{'_ignore_text'};
my $no_space_compacting = $self->{'_no_space_compacting'};
my $pos = $self->{'_pos'} || $self;
HTML::Entities::decode($text)
unless $ignore_text || $is_cdata
|| $HTML::Tagset::isCDATA_Parent{$pos->{'_tag'}};
#my($indent, $nugget);
if(DEBUG) {
# optimization -- don't figure out depth unless we're in debug mode
my @lineage_tags = $pos->lineage_tag_names;
$indent = ' ' x (1 + @lineage_tags);
=10= |