PROXY  WHOIS  RQUOTE  TEXTS  SOFT  FOREX  BBOARD
 Music  Philosophy  Code  Literature  Russian

= ROOT|Technical|Code_Examples|Perl|site_perl|HTML|TreeBuilder.pm =

page 3 of 19



sub warn           { shift->_elem('_warn',           @_); }


#==========================================================================

sub warning {
    my $self = shift;
    CORE::warn("HTML::Parse: $_[0]\n") if $self->{'_warn'};
     # should maybe say HTML::TreeBuilder instead
}

#==========================================================================

{
  # To avoid having to rebuild these lists constantly...
  my $_Closed_by_structurals = [qw(p h1 h2 h3 h4 h5 h6 pre textarea)];
  my $indent;

  sub start {
    return if $_[0]{'_stunted'};
    
    # Accept a signal from HTML::Parser for start-tags.
    my($self, $tag, $attr) = @_;
    # Parser passes more, actually:
    #   $self->start($tag, $attr, $attrseq, $origtext)
    # But we can merrily ignore $attrseq and $origtext.

    if($tag eq 'x-html') {
      print "Ignoring open-x-html tag.\n" if DEBUG;
      # inserted by some lame code-generators.
      return;    # bypass tweaking.
    }
   
    $tag =~ s{/$}{}s;  # So <b/> turns into <b>.  Silently forgive.
    
    unless($tag =~ m/^[-_a-zA-Z0-9:%]+$/s) {
      DEBUG and print "Start-tag name $tag is no good.  Skipping.\n";
      return;
      # This avoids having Element's new() throw an exception.
    }

    my $ptag = (
                my $pos  = $self->{'_pos'} || $self
               )->{'_tag'};
    my $already_inserted;
    #my($indent);
    if(DEBUG) {
      # optimization -- don't figure out indenting unless we're in debug mode
      my @lineage = $pos->lineage;
      $indent = '  ' x (1 + @lineage);
      print
        $indent, "Proposing a new \U$tag\E under ",
        join('/', map $_->{'_tag'}, reverse($pos, @lineage)) || 'Root',
        ".\n";
    #} else {
    #  $indent = ' ';
    }
    
    #print $indent, "POS: $pos ($ptag)\n" if DEBUG > 2;
    # $attr = {%$attr};

    foreach my $k (keys %$attr) {
      # Make sure some stooge doesn't have "<span _content='pie'>".
      # That happens every few million Web pages.
      $attr->{' ' . $k} = delete $attr->{$k}
       if length $k and substr($k,0,1) eq '_';
      # Looks bad, but is fine for round-tripping.
    }
    
    my $e =
     ($self->{'_element_class'} || 'HTML::Element')->new($tag, %$attr);
     # Make a new element object.
     # (Only rarely do we end up just throwing it away later in this call.)
     
    # Some prep -- custom messiness for those damned tables, and strict P's.
    if($self->{'_implicit_tags'}) {  # wallawallawalla!
      
      unless($HTML::TreeBuilder::isTableElement{$tag}) {
        if ($ptag eq 'table') {
          print $indent,
            " * Phrasal \U$tag\E right under TABLE makes implicit TR and TD\n"
           if DEBUG > 1;
          $self->insert_element('tr', 1);
          $pos = $self->insert_element('td', 1); # yes, needs updating
        } elsif ($ptag eq 'tr') {
          print $indent,
            " * Phrasal \U$tag\E right under TR makes an implicit TD\n"
           if DEBUG > 1;
          $pos = $self->insert_element('td', 1); # yes, needs updating
        }
        $ptag = $pos->{'_tag'}; # yes, needs updating
      }
       # end of table-implication block.
      
      
      # Now maybe do a little dance to enforce P-strictness.
      # This seems like it should be integrated with the big
      # "ALL HOPE..." block, further below, but that doesn't
      # seem feasable.
      if(
=3=

1|2| < PREV = PAGE 3 = NEXT > |4|5|6|7|8|9|10|11|12.19

UP TO ROOT | UP TO DIR | TO FIRST PAGE

Google
 


E-mail Facebook Google Digg del.icio.us BlinkList Fark Furl Ma.gnolia Netscape NewsVine Reddit Slashdot Spurl StumbleUpon Technorati YahooMyWeb LiveJournal Blogmarks TwitThis Live News2.ru BobrDobr.ru Memori.ru MoeMesto.ru

0.00574613 wallclock secs ( 0.01 usr + 0.00 sys = 0.01 CPU)