[Templates-cvs] cvs commit: TT3/lib/Template/TT3 Tag.pm

cvs@template-toolkit.org cvs@template-toolkit.org
Tue, 16 Dec 2003 12:41:15 +0000


cvs         03/12/16 12:41:15

  Modified:    lib/Template/TT3 Tag.pm
  Log:
  * removed action() and associated behaviour
  * changed start_match() and end_match() to start_token() and end_token()
  * changed scan() and parse() to return $document (or alternate) to indicate
    success
  * added documentation
  
  Revision  Changes    Path
  1.4       +367 -64   TT3/lib/Template/TT3/Tag.pm
  
  Index: Tag.pm
  ===================================================================
  RCS file: /template-toolkit/TT3/lib/Template/TT3/Tag.pm,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- Tag.pm	2003/12/15 15:32:42	1.3
  +++ Tag.pm	2003/12/16 12:41:14	1.4
  @@ -17,7 +17,7 @@
   #   modify it under the same terms as Perl itself.
   #
   # REVISION
  -#   $Id: Tag.pm,v 1.3 2003/12/15 15:32:42 abw Exp $
  +#   $Id: Tag.pm,v 1.4 2003/12/16 12:41:14 abw Exp $
   #
   #========================================================================
   
  @@ -29,7 +29,7 @@
   use vars qw( $VERSION $DEBUG $ERROR $WARNING $TAG );
   use base qw( Template::TT3::Base );
   
  -$VERSION = sprintf("%d.%02d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/);
  +$VERSION = sprintf("%d.%02d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/);
   $DEBUG   = 0 unless defined $DEBUG;
   $ERROR   = '';
   $TAG     = {
  @@ -70,57 +70,26 @@
   
   
   #------------------------------------------------------------------------
  -# scan($textref, $document, $start, $end)
  +# scan($content, $document, $start, $end)
   #------------------------------------------------------------------------
   
   sub scan {
  -    my ($self, $textref, $document, $start, $end) = @_;
  -    @$self{ qw( start_match end_match ) } = ($start, $end);
  -    $self->parse($textref, $document);
  +    my ($self, $content, $document, $start, $end) = @_;
  +    @$self{ qw( start_token end_token ) } = ($start, $end);
  +    return $self->parse($content, $document);
   }
   
   
   #------------------------------------------------------------------------
  -# parse($textref, $document)
  +# parse($content, $document)
   #
   # Method to parse the tag content, usually redefined by subclasses to do
  -# something useful.  Otherwise, the action() method is called to activate
  -# any other handler for the tag.
  -#
  -# TODO: perhaps the default should be to call $document->body()
  +# something useful.
   #------------------------------------------------------------------------
   
   sub parse {
  -    my ($self, $textref, $document) = @_;
  -    my $action = $self->{ action };
  -    return $self->error('no action') unless defined $action;
  -    my $result;
  -
  -    $self->debug("parse('$$textref', '$document->{ name }')\n") if $DEBUG;
  -
  -    if (ref $action eq 'CODE') {
  -        # subroutine reference
  -        eval {
  -            $result = &$action($textref, $document);
  -        };
  -        $result = $self->error($@) if $@;
  -    }
  -    elsif ($action eq '1' || $action eq '0') {
  -        # simple return code
  -        $result = $action;
  -    }
  -    elsif (! ref $action) {
  -        # TODO: not sure about this
  -
  -        # method name to call against document
  -        $result = $document->$action($textref)
  -            || $self->error($document->error());
  -    }
  -    else {
  -        $result = $self->error('unknown action type: ', ref $action);
  -    }
  -
  -    return $result;
  +    my ($self, $content, $document) = @_;
  +    return $document;
   }
   
   
  @@ -128,7 +97,6 @@
   # start() / start($token)
   # end() / end($token)
   # name() / name($name)
  -# action() / action($name)
   #
   # Accessor methods to get/set the start tag, end tag and optional tag 
   # name and action attributes.
  @@ -153,32 +121,23 @@
       return @_ ? ($self->{ name } = shift) : $self->{ name };
   }
   
  -sub action {
  -    my $self = shift;
  -    return @_ ? ($self->{ action } = shift) : $self->{ action };
  -}
  -
   
   #------------------------------------------------------------------------
  -# start_match()
  -# end_match()
  +# start_token()
  +# end_token()
   # 
  -# Accessor methods to return the actual text matched for the start and 
  -# end tags respectively.  If the start and end tags are strings then
  -# these will contain the same values.  If the start and end tags are 
  -# regexen, then the start_match and end_match may vary.  These values
  -# can only be guaranteed to be correct in the context of a call to the 
  -# scan() method.
  +# Accessor methods to return the actual tokens matched for the start and 
  +# end tags respectively, as set by the scan() method.
   #------------------------------------------------------------------------
   
  -sub start_match {
  +sub start_token {
       my $self = shift;
  -    return @_ ? ($self->{ start_match } = shift) : $self->{ start_match };
  +    return @_ ? ($self->{ start_token } = shift) : $self->{ start_token };
   }
   
   sub end_match {
       my $self = shift;
  -    return @_ ? ($self->{ end_match } = shift) : $self->{ end_match };
  +    return @_ ? ($self->{ end_token } = shift) : $self->{ end_token };
   }
   
   
  @@ -212,27 +171,363 @@
   
   =head1 SYNOPSIS
   
  -    package Template::TT3::Tag;
  -
  -    # TODO
  +    package My::Custom::Tag;
  +    use base qw( Template::TT3::Tag );
  +    use vars qw( $DEBUG $TAG );
  +
  +    $DEBUG = 0 unless defined $DEBUG;
  +    $TAG   = {
  +        start => '[%',
  +        end   => '%]',
  +        name  => 'mytag',
  +    };
  +
  +    sub parse {
  +        my ($self, $content, $document) = @_;
  +
  +        # NOTE: the API for the document class isn't fixed yet
  +        # so this is all tentative
  +
  +        if ($$content =~ /^INCLUDE (\w+)$/) {
  +            # simple directive            
  +            return $document->add_item( include => $1 );
  +        }
  +        elsif ($$content =~ /^IF (.*)$/) {
  +            # start of block directive            
  +            return $document->begin_item( if => $1 );
  +        }
  +        elsif ($$content eq 'END') {
  +            # end of block directive
  +            return $document->end_item();
  +        }
  +        .
  +        . # etc...
  +        .
  +        else {
  +            return $self->error( "invalid directive: ",
  +                                 $self->start_token(),
  +                                 $$content,
  +                                 $self->end_token() );
  +        }
  +    }
   
   =head1 DESCRIPTION
   
  -# TODO
  +This module implements a base class object for representing embedded
  +tags in a template document.  
   
  +Tags are identified in the source of a template by a literal string or
  +regular expression which marks its C<start>.  The default start token
  +is of course C<[%>.  A tag may also define a literal string or regular
  +expression which marks its C<end>.  The default end token is C<%]>.
  +
  +    use Template::TT3::Tag;
  +
  +    my $tag = Template::TT3::Tag->new();
  +    print $tag->start();        # [%
  +    print $tag->end();          # %]
  +
  +The C<start> and C<end> tokens can be provided as named parameters
  +to the new() constructor method.
  +
  +    my $tag = Template::TT3::Tag->new(
  +        start => '<*',
  +        end   => '*>',
  +    );
  +    print $tag->start();        # <*
  +    print $tag->end();          # *>
  +
  +Here's another example showing how regular expressions can be used
  +to define the C<start> and C<end> of the tag.
  +
  +    my $tag = Template::TT3::Tag->new(
  +        start => qr/ < (?i:tt) [23]? : /x,
  +        end   => qr/ \/? > /x,
  +    );
  +    print $tag->start();        # (?x-ism: < (?i:tt) [23]? : )
  +    print $tag->end();          # (?x-ism: /? > )
  +
  +Here we define the C<start> token as a regular expression which
  +matches a left angle bracket, following by C<TT> in any case (thanks
  +to the C<(?i: ... )> construct around it), then an optional C<2> or
  +C<3>, and finally a colon.  The C<end> token permits an optional slash
  +character C</> followed by a mandatory right angle bracket.  Here are
  +some examples of embedded tags that will be matched by this
  +configuration.  The tag content is shown as the simple string C<foo>
  +but could contain any text not matching the C<end> regular expression.
  +
  +    <tt:foo>
  +    <TT:foo>
  +    <tt:foo/>
  +    <TT:foo/>
  +    <tt2:foo>
  +    <TT2:foo/>
  +    <tt2:foo>
  +    <TT3:foo/>
  +
  +You can also subclass the Template::TT3::Tag module to create
  +your own custom tags.  The default C<start>, C<end> and a simple
  +C<name> to identify your tag style can be defined in a hash array
  +referenced by the C<$TAG> package variable.
  +
  +    package My::Custom::Tag;
  +    use base qw( Template::TT3::Tag );
  +    use vars qw( $DEBUG $TAG );
  +
  +    $TAG   = {
  +        start => '<%',
  +        end   => '%>',
  +        name  => 'mytag',
  +    };
  +
  +    package main;
  +    my $tag = My::Custom::Tag->new();
  +    print $tag->start();        # <%
  +    print $tag->end();          # %>
  +
  +The C<$TAG> package variable defines the default start and end tokens, 
  +but you can still provide alternate values as configuration options.
  +
  +    my $tag = My::Custom::Tag->new( start => '[%', 
  +                                    end   => '%]' );
  +    print $tag->start();        # [%
  +    print $tag->end();          # %]
  +
  +=head2 Closed Tags
  +
  +The job of matching tags in the source of a template document is 
  +handled by the Template::TT3::Scanner module.  This accepts a list
  +of any number of Template::TT3::Tag objects, or subclasses thereof,
  +and constructs a single regular expression that matches any of the 
  +C<start> markers for the tags.
  +
  +    my $tag1 = Template::TT3::Tag->new( 
  +        start => '[%', 
  +        end   => '%]',
  +        name  => 'tt3tag',
  +    );
  +    my $tag2 = Template::TT3::Tag->new( 
  +        start => qr/(?m:^)=/, 
  +        end   => qr/(?m:$)/,
  +        name  => 'podcmd',
  +    );
  +
  +    my $scanner = Template::TT3::Scanner->new(
  +        tags => [ $tag1, $tag2 ],
  +    );
  +
  +    $scanner->scan($text, $document)
  +        || die $scanner->error();
  +
  +Tags that define both a C<start> and C<end> are known as I<closed
  +tags>.  When the scanner identifies a closed tag it scans ahead to the
  +C<end> marker to identify the tag content in advance.  It then passes
  +a reference to a string containing the content to the C<scan()> method
  +of the tag object.  The scanner also passes the current document
  +object and the start and end tokens that it matched.  The internal
  +C<start> and C<end> values for a tag may be regular expressions, but
  +the scanner always passes the actual strings that matched.  The base
  +class scan() method stores the matched start and end tokens internally 
  +as the C<start_token> and C<end_token> items and then calls the C<parse()>
  +method.
  +
  +    sub scan {
  +        my ($self, $content, $document, $start, $end) = @_;
  +        @$self{ qw( start_token end_token ) } = ($start, $end);
  +        return $self->parse($content, $document);
  +    }
  +
  +You can subclass either the C<scan()> or C<parse()> methods as you see
  +fit.  The scan() method is generally used to perform any simple
  +pre-processing that is independant of the tag content.  The parse()
  +method is typically used to implement more complex parser logic
  +related to the tag content.  For example, the
  +Template::TT3::Tag::Directive module uses the scan() method to look
  +for the comment, pre-chomp and post-chomp flags, C<#>, C<-> and C<+>,
  +but leaves the parse() method to parse the directives contained 
  +therein.  This separation makes it possible to subclass the module
  +and reimplement one, other or both methods as required.  
  +
  +    [%- INCLUDE header title="Hello World" +%]
  +        ^-------- $tag->parse() ---------^
  +      ^---------- $tag->scan() ------------^
  +    ^------------ $scanner->scan() ----------^
  +
  +=head2 Open Tags
  +
  +It is also possible to define a tag which has no C<end> marker defined.
  +This is known as an I<open tag>.  The most commonly encountered open
  +tag is an interpolated variable:
  +
  +    blah blah $foo blah
  +              ^
  +
  +The C<$> character marks the start of the tag, but there is nothing
  +to explicitly mark the end of the tag.  It might be sufficient for
  +certain purposes to use whitespace to indicate the end of the tag.
  +However, this would preclude the use of variables that included
  +arguments containing spaces:
  +
  +    blah blah $bar.join(', ') blah
  +                          ^
  +
  +In the case of open tags the scanner does not scan forwards to the end
  +of the tag but instead passes the complete text buffer to the tag
  +scan() method for further processing.  The current regex position 
  +marks the first character in the string immediately after the start
  +token in question.  The scan() or parse() methods should parse as much
  +of the string as required from the current position (C<\G>) onwards.
  +The C</cg> flags are required on the regular expression to enable 
  +global matching (C</g>) and to prevent Perl from automatically 
  +resetting the regex position on a failed match (C</c>).  The C</x>
  +flag is also useful for improving the legibility of regular 
  +expressions by allowing whitespace and comments to be included 
  +(and ignored).
  +
  +    sub parse {
  +        my ($self, $content, $document) = @_;
  +        my ($var, $args);
  +
  +        if ($$content =~ / \G ( \w+ ) /cgx) {
  +            # found an identifier, now look for args
  +            $var = $1;
  +
  +            if ($$content =~ / \G \( /cgx) {
  +                $args = $self->parse_args($content) || return;
  +            }
  +            else {
  +                $args = 0;
  +            }
  +
  +            return $document->add( var => [ $var, $args ] );
  +        }
  +        else {
  +            return $self->error("no variable after $self->{start_token}")
  +        }
  +    }
  +            
   =head1 METHODS
   
   =head2 new()
  +
  +Constructor method used to instantiate a new tag object.  Accepts
  +a list or reference to a hash array of named parameters.
  +
  +    use Template::TT3::Tag;
  +
  +    my $tag = Template::TT3::Tag->new({
  +        start => '<%'
  +        end   => '%>',
  +        name  => 'mytag',
  +    });
  +
  +=head2 scan($content, $document, $start, $end)
  +
  +Method called by the scanner when a tag has been identified in the
  +template source text.  The first argument, C<$content>, contains a
  +reference to a text string which contains the content of the tag.  The
  +second argument, C<$document>, is a reference to a document handler
  +object which constructs the content of the template from messages sent
  +to it by the scanner and tags.  The third argument is a string
  +indicating the start token that was matched.  The fourth argument is a
  +string indicating the end token matched, or undef if the tag does not
  +define an end marker (i.e. is an open tag).
  +
  +In the case of a closed tag, the C<$content> variable will reference
  +a string containing the text extracted from the source document between
  +the start and end markers.  In the case of an open tag, it will contain
  +a reference to a string containing the complete template source with 
  +the regular expression pointer set to the current location.
  +
  +The base class scan() method sets the internal C<start_token> and
  +C<end_token> items to indicate the start and end tokens matched.  These
  +are not to be confused with the C<start> and C<end> items which may
  +contain the same values or might instead be expressed as regular
  +expressions.  It then delegates to the parse() method.
  +
  +The scan() method should return a reference to the original
  +C<$document> handler, or a reference to a new one which will become
  +the target of subsequent scanner events.  In the general case, the 
  +scan() method simply returns the value of the parse() method.
  +
  +=head2 parse($content, $document)
  +
  +Method called by the base class scan() method to parse the contents
  +of the tag.  The arguments passed are the first two arguments passed to 
  +the scan() method.
   
  -# TODO
  +The parse() method should return a reference to the C<$document> handler
  +or an alternate one, as per scan().
   
  +=head2 name()
  +
  +Accessor method used to get/set the tag name.
  +
  +    $tag->name('foo');
  +    print $tag->name();     # foo
  +
  +=head2 start()
  +
  +Accessor method used to get/set the literal string or regular expression
  +used to mark the start of the tag.
  +
  +    $tag->start('<*');
  +    print $tag->start();     # <*
  +
  +Literal strings may safely contain regular expression metacharacters
  +(e.g. C<*> in the example above).  Regular expression should be specified
  +using the C<qr/ ... /> construct to pre-compile them into references.
  +In this case any metacharacters should of course be escaped.
  +
  +    $tag->start(qr/<\*/);
  +    print $tag->start();     # (?-xism:<\*)
  +
  +=head2 end()
  +
  +Accessor method used to get/set the literal string or regular expression
  +used to mark the end of the tag.  An argument passed to set a new 
  +value should be a literal string or regular expression as per start().
  +
  +    $tag->end('%>');
  +    print $tag->end();     # %>
  +
  +=head2 start_token()
  +
  +This method returns the actual start token matched for the most
  +recent call to the scan() method.  If you subclass the scan() method
  +then be sure to save the start and end tokens internally if you want
  +to reference them later.
  +
  +    sub scan {
  +        my ($self, $content, $document, $start, $end) = @_;
  +        @$self{ qw( start_token end_token ) } = ($start, $end);
  +
  +        # more processing....
  +
  +        return $document;
  +    }
  +
  +=head2 end_token()
  +
  +This method returns the actual end token matched for the most recent
  +call to the scan() method, as per start_token().  In the case of an open
  +tag that has no C<end> defined, the end_token() method will return undef.
  +
  +=head2 is_open()
  +
  +Returns true if the tag is an open tag or false if not.
  +
  +=head2 is_closed()
  +
  +Returns true if the tag is a closed tag or false if not.
  +
   =head1 AUTHOR
   
   Andy Wardley  E<lt>abw@wardley.orgE<gt>
   
   =head1 VERSION
   
  -$Revision: 1.3 $
  +$Revision: 1.4 $
   
   =head1 COPYRIGHT
   
  @@ -241,6 +536,14 @@
   
   This module is free software; you can redistribute it and/or
   modify it under the same terms as Perl itself.
  +
  +=head1 SEE ALSO
  +
  +For examples of tag subclasses that perform more specific processing,
  +see L<Template::TT3::Tag::Comment>, L<Template::TT3::Tag::Escape>,
  +L<Template::TT3::Tag::Variable>, and L<Template::TT3::Tag::Directive>.
  +For more information about the scanner and document classes, see
  +L<Template::TT3::Scanner> and L<Template::TT3::Document> respectively.
   
   =cut