PROXY  WHOIS  RQUOTE  TEXTS  SOFT  FOREX  BBOARD
 Music  Philosophy  Code  Literature  Russian

= ROOT|Technical|Code_Examples|Perl|site_perl|HTML|TreeBuilder.pm =

page 1 of 19



package HTML::TreeBuilder;

use strict;
use integer; # vroom vroom!
use Carp ();
use vars qw(@ISA $VERSION $DEBUG);
$VERSION = '3.23';

#---------------------------------------------------------------------------
# Make a 'DEBUG' constant...

BEGIN {
  # We used to have things like
  #  print $indent, "lalala" if $Debug;
  # But there were an awful lot of having to evaluate $Debug's value.
  # If we make that depend on a constant, like so:
  #   sub DEBUG () { 1 } # or whatever value.
  #   ...
  #   print $indent, "lalala" if DEBUG;
  # Which at compile-time (thru the miracle of constant folding) turns into:
  #   print $indent, "lalala";
  # or, if DEBUG is a constant with a true value, then that print statement
  # is simply optimized away, and doesn't appear in the target code at all.
  # If you don't believe me, run:
  #    perl -MO=Deparse,-uHTML::TreeBuilder -e 'BEGIN { \
  #      $HTML::TreeBuilder::DEBUG = 4}  use HTML::TreeBuilder'
  # and see for yourself (substituting whatever value you want for $DEBUG
  # there).

  if(defined &DEBUG) {
    # Already been defined!  Do nothing.
  } elsif($] < 5.00404) {
    # Grudgingly accomodate ancient (pre-constant) versions.
    eval 'sub DEBUG { $Debug } ';
  } elsif(!$DEBUG) {
    eval 'sub DEBUG () {0}';  # Make it a constant.
  } elsif($DEBUG =~ m<^\d+$>s) {
    eval 'sub DEBUG () { ' . $DEBUG . ' }';  # Make THAT a constant.
  } else { # WTF?
    warn "Non-numeric value \"$DEBUG\" in \$HTML::Element::DEBUG";
    eval 'sub DEBUG () { $DEBUG }'; # I guess.
  }
}

#---------------------------------------------------------------------------

use HTML::Entities ();
use HTML::Tagset 3.02 ();

use HTML::Element ();
use HTML::Parser ();
@ISA = qw(HTML::Element HTML::Parser);
 # This looks schizoid, I know.
 # It's not that we ARE an element AND a parser.
 # We ARE an element, but one that knows how to handle signals
 #  (method calls) from Parser in order to elaborate its subtree.

# Legacy aliases:
*HTML::TreeBuilder::isKnown = \%HTML::Tagset::isKnown;
*HTML::TreeBuilder::canTighten = \%HTML::Tagset::canTighten;
*HTML::TreeBuilder::isHeadElement = \%HTML::Tagset::isHeadElement;
*HTML::TreeBuilder::isBodyElement = \%HTML::Tagset::isBodyElement;
*HTML::TreeBuilder::isPhraseMarkup = \%HTML::Tagset::isPhraseMarkup;
*HTML::TreeBuilder::isHeadOrBodyElement = \%HTML::Tagset::isHeadOrBodyElement;
*HTML::TreeBuilder::isList = \%HTML::Tagset::isList;
*HTML::TreeBuilder::isTableElement = \%HTML::Tagset::isTableElement;
*HTML::TreeBuilder::isFormElement = \%HTML::Tagset::isFormElement;
*HTML::TreeBuilder::p_closure_barriers = \@HTML::Tagset::p_closure_barriers;

#==========================================================================
# Two little shortcut constructors:

sub new_from_file { # or from a FH
  my $class = shift;
  Carp::croak("new_from_file takes only one argument")
   unless @_ == 1;
  Carp::croak("new_from_file is a class method only")
   if ref $class;
  my $new = $class->new();
  $new->parse_file($_[0]);
  return $new;
}

sub new_from_content { # from any number of scalars
  my $class = shift;
  Carp::croak("new_from_content is a class method only")
   if ref $class;
  my $new = $class->new();
  foreach my $whunk (@_) {
    if(ref($whunk) eq 'SCALAR') {
      $new->parse($$whunk);
    } else {
      $new->parse($whunk);
    }
    last if $new->{'_stunted'}; # might as well check that.
  }
  $new->eof();
  return $new;
}

=1=

= PAGE 1 = NEXT > |2|3|4|5|6|7|8|9|10.19

UP TO ROOT | UP TO DIR

Google
 


E-mail Facebook Google Digg del.icio.us BlinkList Fark Furl Ma.gnolia Netscape NewsVine Reddit Slashdot Spurl StumbleUpon Technorati YahooMyWeb LiveJournal Blogmarks TwitThis Live News2.ru BobrDobr.ru Memori.ru MoeMesto.ru

0.00605392 wallclock secs ( 0.00 usr + 0.01 sys = 0.01 CPU)