[Templates-cvs] cvs commit: TT3/lib/Template/TT3 Scanner.pm

cvs@template-toolkit.org cvs@template-toolkit.org
Wed, 10 Dec 2003 14:16:03 +0000


cvs         03/12/10 14:16:02

  Added:       lib/Template/TT3 Scanner.pm
  Log:
  added Scanner
  
  Revision  Changes    Path
  1.1                  TT3/lib/Template/TT3/Scanner.pm
  
  Index: Scanner.pm
  ===================================================================
  #========================================================================
  #
  # Template::TT3::Scanner
  #
  # DESCRIPTION
  #   The scanner sits in front of the parser.  It scans the source text
  #   of a template to identify plain text blocks and embedded directives
  #   in various different forms (e.g. [% blah %], $blah, ${blah}, etc.)
  # 
  # AUTHOR
  #   Andy Wardley <abw@wardley.org>
  #
  # COPYRIGHT
  #   Copyright (C) 1996-2003 Andy Wardley.  All Rights Reserved.
  #   Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
  #
  #   This module is free software; you can redistribute it and/or
  #   modify it under the same terms as Perl itself.
  #
  # REVISION
  #   $Id: Scanner.pm,v 1.1 2003/12/10 14:16:01 abw Exp $
  #
  #========================================================================
  
  package Template::TT3::Scanner;
  
  use strict;
  use warnings;
  use Template::TT3::Base;
  use vars qw( $VERSION $DEBUG $ERROR $WARNING $TAGS );
  use base qw( Template::TT3::Base );
  
  $VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/);
  $DEBUG   = 0 unless defined $DEBUG;
  $ERROR   = '';
  $TAGS    = [ ];
  
  
  #------------------------------------------------------------------------
  # init(\%config)
  #
  # Initialiser method called by base class new() method.
  #------------------------------------------------------------------------
  
  sub init {
      my ($self, $config) = @_;
      my $pkgtags = $self->pkgtags();
      my $cfgtags = $config->{ tags } || $config->{ TAGS } || [ ];
      $cfgtags = [ $cfgtags ] unless UNIVERSAL::isa($cfgtags, 'ARRAY');
   
      $self->tags([ @$pkgtags, @$cfgtags ]) || return;
  
      return $self;
  }
  
  
  #------------------------------------------------------------------------
  # pkgtags()
  #
  # Return the $TAGS list reference in the subclass package if defined, or
  # in the base class package if not.
  #------------------------------------------------------------------------
  
  sub pkgtags {
      my $self  = shift;
      my $class = ref $self || $self;
      no strict 'refs';
      return ${"$class\::TAGS"} || $TAGS;
  }
  
  
  #------------------------------------------------------------------------
  # tags()         # return existing list of tags
  # tags(\@tags)   # install new list of tags
  #
  # Method to get/set the current set of tags which the scanner must 
  # scan for.
  #------------------------------------------------------------------------
  
  sub tags {
      my $self = shift;
  
      # return existing tag set if called without args
      return $self->{ tags } unless @_;
  
      # install new tag set from args
      my $tags = ref $_[0] eq 'ARRAY' ? shift : [ @_ ];
      $self->{ tags } = $tags;
  
      my $tagmap  = $self->{ tagmap  } = { };
      my $regexen = $self->{ regexen } = [ ];
      my $regtags = $self->{ regtags } = [ ];
      my @build;
  
      # fetch the start token for each tag in order to build a master
      # regex for matching all start tags;  if a start token is a regex
      # then we leave it alone; if it's a string then we pass it through
      # quotemeta to escape any regex metacharacter that would otherwise
      # mess things up;  while we're doing it, we build a lookup table to
      # map start tags to tag objects;  with regexes, we can't do a direct
      # object so instead we keep a list of them for matching later.
      
      foreach my $tag (@$tags) {
          my $start = $tag->start();
          if (ref $start eq 'Regexp') {
              push(@$regexen, $start);
              push(@$regtags, $tag);
              push(@build, $start);
          }
          elsif (defined $start) {
              $tagmap->{ $start } = $tag;
              $start = quotemeta($start);
              push(@build, $start);
          }
          else {
              return $self->error('no start token defined for tag');
          }
      }
  
      # build the regex to match all tag start tokens
      if (@build) {
          # we have to ensure that longer start tags come before shorter ones
          # so that two different tags, say "<FOO" and "<FOOBAR", will both match,
          # e.g. qr/<FOOBAR|<FOO/ .   Some start tags may already be regexen which 
          # gives them longer string representations (e.g. "(?-xism:<FOO)" vs 
          # "<FOOBAR") so we map them all to regex before sorting by length and
          # bulding a composite regex to match any of the start tags
          my $regex = join( '|',
                            reverse 
                            sort { length $a <=> length $b }  
                            map { ref($_) ? $_ : qr/$_/ } 
                            @build );
          $self->{ regex } = qr/ \G (.*?) ($regex) /sx;
          $self->debug("tags regex: $regex\n") if $DEBUG;
      }
      else {
          $self->{ regex } = undef;
          $self->debug("no regex\n") if $DEBUG;
      }
  
      $self->{ reset } = 1;
      return $tags;
  }
  
  
  #------------------------------------------------------------------------
  # scan($text, $document)
  #
  # Scan $text to identify plain text and embedded directives that match any
  # of the tags defined for the scanner.  Scanner events are raised as 
  # method calles against the $document object, either directly in the case
  # text or indirectly through delegation to a tag pbject. 
  #------------------------------------------------------------------------
  
  sub scan {
      my ($self, $text, $document) = @_;
      my $textref = ref $text ? $text : \$text;
  
      $self->debug("scan()\n") if $DEBUG;
      $self->{ reset } = 0;
  
      SCAN_FOR_TAGS: {
          # fetch the tagset information prepared by the tags() method
          my ($tagmap, $regex, $regexen) = @$self{ qw( tagmap regex regexen  ) };
          if ($regex) {
              $self->debug("scanning with regex: $regex\n") if $DEBUG;
          }
          else {
              $self->debug("no regex, nothing to do\n") if $DEBUG;
              last SCAN_FOR_TAGS;
          }
  
          # scan source text to identify text and tags
          while ($$textref =~ /$regex/cg) {
              my ($pretext, $dirtok) = ($1, $2);
  
              # notify template document of any text preceeding a tag
              $document->text(\$pretext) 
                  || return $self->error($document->error())
                      if defined $pretext && length $pretext;
  
              # look for the tag object corresponding to this start token
              my $tag = $tagmap->{ $dirtok };
  
              # if there isn't an entry in the tag lookup table then we 
              # look through each regex to find out which one matches the
              # start token, and use the tag object corresponding to it
              unless (defined $tag) {
                  my $regexen = $self->{ regexen };
                  for my $n (0..$#$regexen) {
                      if ($dirtok =~ $regexen->[$n]) {
                          $self->debug("matched $dirtok against $regexen->[$n]\n") 
                              if $DEBUG;
                          $tag = $self->{ regtags }->[$n]
                              || return $self->error("no tag for regex $regexen->[$n]");
                          # cached tag found for the start token in tagmap
                          $tagmap->{ $dirtok } = $tag;
                          last;
                      }
                  }
                  return $self->error("no tag defined for '$dirtok'")
                      unless defined $tag;
              }
  
              # call tag object to scan the tag content
              $tag->scan($dirtok, $textref, $document)
                  || return $self->error($tag->error());
  
              # check to see if tags have changed
              if ($self->{ reset }) {
                  $self->debug("scanner reset\n") if $DEBUG;
                  redo SCAN_FOR_TAGS;
              }
          }
      }
  
      if ($$textref =~ / \G (.*) /sx) {
          my $text = $1;
          $document->text(\$text) 
              || return $self->error($document->error());
      }
  
      return 1;
  }
  
  
  
  1;
  __END__
  
  =head1 NAME
  
  Template::TT3::Scanner - template scanner
  
  =head1 SYNOPSIS
  
      package Template::TT3::Scanner;
  
      # TODO
  
  =head1 DESCRIPTION
  
  # TODO
  
  =head1 METHODS
  
  =head2 new()
  
  # TODO
  
  =head1 AUTHOR
  
  Andy Wardley  E<lt>abw@wardley.orgE<gt>
  
  =head1 VERSION
  
  $Revision: 1.1 $
  
  =head1 COPYRIGHT
  
    Copyright (C) 1996-2003 Andy Wardley.  All Rights Reserved.
    Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
  
  This module is free software; you can redistribute it and/or
  modify it under the same terms as Perl itself.
  
  =cut
  
  # Local Variables:
  # mode: perl
  # perl-indent-level: 4
  # indent-tabs-mode: nil
  # End:
  #
  # vim: expandtab shiftwidth=4: