[Templates-cvs] cvs commit: TT3/lib/Template Parser.pm
cvs@template-toolkit.org
cvs@template-toolkit.org
Thu, 02 Dec 2004 16:46:35 +0000
cvs 04/12/02 16:46:34
Modified: lib/Template Parser.pm
Log:
* more docs, minor fixes and enhancements
Revision Changes Path
1.17 +212 -128 TT3/lib/Template/Parser.pm
Index: Parser.pm
===================================================================
RCS file: /template-toolkit/TT3/lib/Template/Parser.pm,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- Parser.pm 2004/12/02 14:55:15 1.16
+++ Parser.pm 2004/12/02 16:46:34 1.17
@@ -18,7 +18,7 @@
# modify it under the same terms as Perl itself.
#
# REVISION
-# $Id: Parser.pm,v 1.16 2004/12/02 14:55:15 abw Exp $
+# $Id: Parser.pm,v 1.17 2004/12/02 16:46:34 abw Exp $
#
#========================================================================
@@ -29,7 +29,7 @@
use Template::Base;
use base qw( Template::Base );
-our $VERSION = sprintf("%d.%02d", q$Revision: 1.16 $ =~ /(\d+)\.(\d+)/);
+our $VERSION = sprintf("%d.%02d", q$Revision: 1.17 $ =~ /(\d+)\.(\d+)/);
our $DEBUG = 0 unless defined $DEBUG;
our $ERROR = '';
our $THROW = 'parser';
@@ -65,6 +65,7 @@
our $COMPOPS = qr/ [=!<>]= | [<>] /x; # | eq | ne | [lg][et]
our $BOOLOPS = qr/ &&? | \|\| | or | and /x;
our $BINARY = qr/ $MATHOPS | $COMPOPS | $BOOLOPS /ox;
+our $TERTIARY = [ qr/\?/, qr/\:/ ];
# regexen to match decimal and hexadecimal numbers, integers and
# floats, with optionals signs and exponents
@@ -153,11 +154,11 @@
my $comment = qr/ \# $eol /sx;
my $wspace = qr/ \s* (?:$comment\s*)* /sx;
- # TODO: I don't think these need to be config options.
- my $assign = $config->{ assign } || $self->pkgvar( ASSIGN => $ASSIGN );
- my $range = $config->{ range } || $self->pkgvar( RANGE => $RANGE );
- my $unary = $config->{ unary } || $self->pkgvar( UNARY => $UNARY );
- my $binary = $config->{ binary } || $self->pkgvar( BINARY => $BINARY );
+ my $assign = $config->{ op_assign } || $self->pkgvar( ASSIGN => $ASSIGN );
+ my $range = $config->{ op_range } || $self->pkgvar( RANGE => $RANGE );
+ my $unary = $config->{ op_unary } || $self->pkgvar( UNARY => $UNARY );
+ my $binary = $config->{ op_binary } || $self->pkgvar( BINARY => $BINARY );
+ my $tertiary = $config->{ op_tertiary } || $self->pkgvar( TERTIARY => $TERTIARY );
# construct regexen to match ignorable whitespace and comments,
# and various other punctuation tokens, including commas,
@@ -168,13 +169,13 @@
$self->{ eol } = qr/ \G $eol /x;
$self->{ wspace } = qr/ \G $wspace /x;
$self->{ comma } = qr/ \G (?:$wspace,)? $wspace /x;
- $self->{ question } = qr/ \G $wspace \? $wspace /x;
- $self->{ colon } = qr/ \G $wspace : $wspace /x;
$self->{ semicolon } = qr/ \G $wspace ; $wspace /x;
$self->{ assign } = qr/ \G $wspace ($assign) $wspace /x;
$self->{ range } = qr/ \G $wspace ($range) $wspace /x;
$self->{ unary } = qr/ \G $wspace ($unary) $wspace /x;
$self->{ binary } = qr/ \G $wspace ($binary) $wspace /x;
+ $self->{ tertiary } = qr/ \G $wspace ($tertiary->[0]) $wspace /x;
+ $self->{ otherwise } = qr/ \G $wspace ($tertiary->[1]) $wspace /x;
$self->{ tag_end } = qr/ (?= \G $wspace $tag_end ) /sx
if $tag_end;
@@ -192,21 +193,19 @@
#------------------------------------------------------------------------
# parse_expression($textref, \%options)
#
-# Implements a left-recursive parse of an expression.
+# Implements a left-recursive parse of an expression.
#
-# foo && bar && baz && qux
-# foo && ( bar && ( baz && qux ) ) # old (incorrect) way
-# ( ( foo && bar ) && baz ) && qux # new (correct) way
-#
-# expression: UNARYOP expression
-# | term BINOP expression
-# | expression ? expression : expression
+# expr: term
+# | unary expr
+# | term binary expr
+# | expr ? expr : expr
+#
#------------------------------------------------------------------------
sub parse_expression {
my ($self, $textref, $options) = @_;
my ($expr, $term, $error, $unop, $binop, $pos);
- my @tokens = ();
+ my @terms = ();
my $stop;
my $tag_end = $self->{ tag_end };
@@ -219,33 +218,35 @@
# check for end of tag
if ($tag_end && ($$textref =~ /$tag_end/cg)) {
- $self->debug("matched tag end before unary operator\n") if $DEBUG;
return $self->error("unexpected end of directive tag after '$binop'")
- if @tokens;
+ if @terms;
$stop = 1;
last CHUNK;
}
- else {
- $self->debug("end_regex ($tag_end) not found before unaryop: '", $self->next_token($textref), "'\n") if $DEBUG && $tag_end;
-
- }
if ($$textref =~ /$self->{ unary }/cg ) {
+ # got a unary operator which should be followed by a term
+ # or another unary operator, so we call ourselves and ask
+ # for the first term only
$unop = $1;
- $self->debug("unary operator: $unop\n") if $DEBUG;
+ $self->debug("expr unary operator: $unop\n") if $DEBUG;
+
if ($term = $self->parse_expression($textref, { first_term => 1 })) {
$term = [ unary => $unop, $term ];
}
else {
- return $self->unexpected( $textref,
- "after '$unop' where expression expected" );
+ return $self->unexpected($textref,
+ "after '$unop' where expression expected");
}
}
elsif ($term = $self->parse_term($textref)) {
$self->debug("expr term: @$term\n") if $DEBUG;
}
else {
- if (@tokens) {
+ if (@terms) {
+ # we haven't found a term, but we've already got some
+ # terms push onto @terms, so this must be coming after
+ # a binary operator, which means it's a syntax error
return $self->unexpected($textref,
"after '$binop' where expression expected");
}
@@ -255,37 +256,34 @@
}
}
- # return the first token if relevant option is set
+ # return the first token if relevant option is set
+ # or push it onto the list of terms
return $term if $options->{ first_term };
-
- # we got a term, munch munch
- push(@tokens, $term);
+ push(@terms, $term);
- # check for end_regex
+ # check for end_regex before we go looking for binary operator
if ($tag_end && ($$textref =~ /$tag_end/cgx)) {
$self->debug("matched end_regex before binaryop\n") if $DEBUG;
$stop = 1;
last CHUNK;
}
- else {
- $self->debug("end regex not found before binary op [",
- $self->next_token($textref), "]\n") if $DEBUG;
- }
-
- # is there a binary operator indicating more to come?
+
+ # now look for a binary operator indicating there's more to come
if ($$textref =~ /$self->{ binary }/cgx) {
$binop = $1;
- $self->debug("expr binop: $binop\n") if $DEBUG;
- push(@tokens, $binop);
+ $self->debug("expr binary operator: $binop\n") if $DEBUG;
+ push(@terms, $binop);
redo CHUNK;
}
}
- if (@tokens > 1) {
- $expr = [ binary => \@tokens ];
- }
- elsif (@tokens) {
- $expr = shift @tokens;
+ if (@terms > 1) {
+ # multiple terms is a sequence of binary operators and terms
+ $expr = [ binary => \@terms ];
+ }
+ elsif (@terms) {
+ # single term
+ $expr = shift @terms;
}
else {
return $self->decline('not an expression');
@@ -293,25 +291,28 @@
return $expr if $stop;
- if ($$textref =~ /$self->{ question }/cg) {
+ # we've got an expression, but it could be the start of a tertiary
+ # expression, with two more expressions to come
+ if ($$textref =~ /$self->{ tertiary }/cg) {
+ my $op = $1;
my ($true, $false);
+ $self->debug("expr tertiary operator: $op\n") if $DEBUG;
$true = $self->parse_expression($textref)
|| return $self->unexpected($textref,
- "after '?' where expression expected");
-
- $self->debug("expr test [$expr]\n") if $DEBUG;
+ "after '$op' where expression expected");
- return $self->missing("':' after expression following '?'")
- unless $$textref =~ /$self->{ colon }/cg;
-
- $self->debug("expr true [$true]\n") if $DEBUG;
-
+ # NOTE: tertiary operator could be redefined to something other
+ # than ':' so this error message could be wrong, but we can't
+ # print out the value we're looking for because it's a regex
+ return $self->missing($textref, "':' after expression following '$op'")
+ unless $$textref =~ /$self->{ otherwise }/cg;
+
+ $op = $1;
+
$false = $self->parse_expression($textref)
|| return $self->unexpected($textref,
- "after ':' where expression expected");
-
- $self->debug("expr false [$false]\n") if $DEBUG;
+ "after '$op' where expression expected");
$expr = [ tertiary => $expr, $true, $false ];
}
@@ -324,6 +325,8 @@
#------------------------------------------------------------------------
# parse_term($textref)
#
+# Parse a single term.
+#
# term: number # 3.14159
# | squote # 'blah blah'
# | dquote # "blah $var blah"
@@ -376,7 +379,7 @@
}
elsif ($$textref =~ /$PAREN/cog) {
$term = $self->parse_parens($textref) || return;
- return $self->unexpected( $textref, 'in parentheses ( ... )' )
+ return $self->unexpected($textref, 'in parentheses ( ... )')
unless $$textref =~ /$ENDPAREN/cog;
$term = [ parens => $term ];
}
@@ -403,6 +406,12 @@
}
+
+#========================================================================
+# terminals and other similar simple productions
+#========================================================================
+
+
#------------------------------------------------------------------------
# parse_ident($text)
#
@@ -527,14 +536,19 @@
# embedded ${ variable }
my $text = $2;
$token = $self->parse_variable(\$text)
- || return $self->missing( $textref, "variable in '\${ }'" );
+ || return $self->missing($textref, "variable in '\${ }'");
+ if ($text =~ / \G \s* (.+) /cgsx) {
+ $text = $1;
+ $text =~ s/\n/\\n/g;
+ return $self->error("unexpected text after embedded variable: $text");
+ }
push(@tokens, $token);
}
elsif (defined $3) {
# $variable reference
my $text = $3;
$token = $self->parse_variable(\$text)
- || return $self->missing( $textref, 'variable after \$' );
+ || return $self->missing($textref, 'variable after \$');
push(@tokens, $token);
}
else {
@@ -547,6 +561,12 @@
+
+#========================================================================
+# variables
+#========================================================================
+
+
#------------------------------------------------------------------------
# parse_variable($textref)
#
@@ -582,7 +602,7 @@
$args = $self->parse_args($textref) || return $args;
# moved from parse_args()
- return $self->missing( $textref, "')' at end of argument list" )
+ return $self->missing($textref, "')' at end of argument list")
unless ($$textref =~ /$ENDPAREN/cog);
}
}
@@ -597,7 +617,7 @@
if ( $$textref =~ /$DOTOP/cog ) {
# TODO: not sure why this is set to $term which is subsequently ignored...
$self->parse_varnodes($textref, $terms)
- || return $self->missing( $textref, "item after '.'" );
+ || return $self->missing($textref, "item after '.'");
}
return [ variable => $terms ];
@@ -621,7 +641,8 @@
do {
($node = $self->parse_varnode($textref))
|| return @$nodes
- ? $self->missing( $textref, "item after '.'" ) : $node;
+ ? $self->missing($textref, "item after '.'")
+ : $node;
push(@$nodes, $node);
}
while ($$textref =~ /$DOTOP/cog);
@@ -684,11 +705,11 @@
elsif ($$textref =~ /$EMBED/cog) {
my $text = $1;
($term = $self->parse_variable(\$text))
- || return $self->missing( $textref, "variable in '\${ }'" );
+ || return $self->missing($textref, "variable in '\${ }'");
}
elsif ($$textref =~ /$INTERP/cog) {
($term = $self->parse_variable($textref))
- || return $self->missing( $textref, "variable after '\$'");
+ || return $self->missing($textref, "variable after '\$'");
}
else {
return $self->decline('not a variable node');
@@ -698,7 +719,7 @@
$args = $self->parse_args($textref) || return;
# moved from parse_args
- return $self->missing( $textref, "')' at end of argument list" )
+ return $self->missing($textref, "')' at end of argument list")
unless ($$textref =~ /$ENDPAREN/cog);
}
@@ -706,30 +727,10 @@
}
-#------------------------------------------------------------------------
-# parse_qwlist($textref, $left)
-#
-# Parses a quoted word list, e.g. qw[ ] qw( foo bar baz ) qw< x y z >.
-#------------------------------------------------------------------------
-
-sub parse_qwlist {
- my ($self, $textref, $left) = @_;
-
- $self->debug("parse_qwlist(", $self->next_chunk($textref), ")\n")
- if $DEBUG;
-
- # look up regex to match corresponding right bracket
- my $regex = $RBRACKET->{ $left }
- || return $self->error("no right bracket defined to match $1\n");
- my $right = $BRACKETS->{ $left };
-
- # match text up to right bracket
- return $self->missing("$right after 'qw$left'")
- unless $$textref =~ /$regex/gc;
-
- return [ qwlist => $left, $1, $right ];
-}
+#========================================================================
+# lists, quoted lists, hash arrays, arguments and parameters lists
+#========================================================================
#------------------------------------------------------------------------
@@ -757,8 +758,8 @@
push(@items, [ range => $item, $end ]);
}
else {
- return $self->unexpected( $textref,
- "after '$range' where expression expected" );
+ return $self->unexpected($textref,
+ "after '$range' where expression expected");
}
}
else {
@@ -775,6 +776,32 @@
#------------------------------------------------------------------------
+# parse_qwlist($textref, $left)
+#
+# Parses a quoted word list, e.g. qw[ ] qw( foo bar baz ) qw< x y z >.
+#------------------------------------------------------------------------
+
+sub parse_qwlist {
+ my ($self, $textref, $left) = @_;
+
+ $self->debug("parse_qwlist(", $self->next_chunk($textref), ")\n")
+ if $DEBUG;
+
+ # look up regex to match corresponding right bracket
+ my $regex = $RBRACKET->{ $left }
+ || return $self->error("no right bracket defined to match $1\n");
+
+ my $right = $BRACKETS->{ $left };
+
+ # match text up to right bracket
+ return $self->missing($textref, "$right after 'qw$left'")
+ unless $$textref =~ /$regex/gc;
+
+ return [ qwlist => $left, $1, $right ];
+}
+
+
+#------------------------------------------------------------------------
# parse_hash($textref)
#
# Parses the contents of an anonymous hash definition. Skips leading
@@ -793,8 +820,8 @@
while ($key = $self->parse_key($textref)) {
$value = $self->parse_assign_expr($textref)
- || return $self->missing( $textref,
- "assignment after hash key $self->{ key }" );
+ || return $self->missing($textref,
+ "assignment after hash key $self->{ key }");
push(@hash, [ $key, $value ]);
# skip comma and/or whitespace
@@ -926,8 +953,8 @@
while ($key = $self->parse_key($textref)) {
$value = $self->parse_assign_expr($textref)
- || return $self->missing( $textref,
- "assignment after parameter $self->{ key }" );
+ || return $self->missing($textref,
+ "assignment after parameter $self->{ key }");
push(@$params, [ tuple => $key, $value ]);
# skip comma and/or whitespace
@@ -991,8 +1018,8 @@
$self->debug(" - assign ($op)\n") if $DEBUG;
$value = $self->parse_expression($textref)
- || return $self->unexpected( $textref,
- "after '$op' where expression expected" );
+ || return $self->unexpected($textref,
+ "after '$op' where expression expected");
return [ assign => $var, $value ];
}
}
@@ -1025,7 +1052,7 @@
if ($$textref =~ /$self->{ assign }/cg) {
$op = $1;
return $self->parse_expression($textref)
- || $self->missing( $textref, "expression after '$op'" );
+ || $self->missing($textref, "expression after '$op'");
}
else {
return $self->decline('not an assignment');
@@ -1051,7 +1078,7 @@
$ident = $self->parse_ident($textref) || return;
$expr = $self->parse_assign_expr($textref)
- || return $self->missing( $textref, "assignment after identifier '$ident'" );
+ || return $self->missing($textref, "assignment after identifier '$ident'");
return [ $ident, $expr ];
}
@@ -1105,11 +1132,10 @@
return $self->error('unexpected end of text in argument list');
}
elsif ($tag_end && ($$textref =~ /$tag_end/cg)) {
- return $self->missing( $textref, "')' at end of argument list" );
+ return $self->missing($textref, "')' at end of argument list");
}
else {
- return $self->unexpected( $textref,
- 'in argument list' );
+ return $self->unexpected($textref, 'in argument list');
}
}
@@ -1204,7 +1230,7 @@
elsif ($$textref =~ /$INTERP/cog) {
$self->debug("parsing filename variable\n") if $DEBUG;
$term = $self->parse_variable($textref)
- || return $self->missing( $textref, "variable after '\$'");
+ || return $self->missing($textref, "variable after '\$'");
}
else {
return $self->decline('not a template name');
@@ -1393,9 +1419,6 @@
__END__
-# TODO: these docs are incomplete and incorrect...
-# ...but I'm working on it as of 2nd Dec
-
=head1 NAME
Template::Parser - parser for core language elements
@@ -1413,6 +1436,9 @@
=head1 DESCRIPTION
+NOTE: this documentation is incomplete. Items left TODO are marked
+(or are missing altogether). Patches welcome.
+
The Template::Parser module implements a recursive descent parser for
parsing the basic constructs of the Template Toolkit language. These
include things like expressions (parse_expression()), basic terms
@@ -1637,10 +1663,16 @@
=head2 new()
-# TODO: constructor method inherited from Template::Base.
+A constructor method inherited from Template::Base which creates a new
+Template::Parser object.
-The following configuration options may be provided:
+ use Template::Parser;
+ my $parser = Template::Parser->new();
+
+The following configuration options may be provided as a list of
+named parameters or as a reference to a hash array.
+
=head3 tag_end
A string or regular expression indicating the token used to mark the
@@ -1649,19 +1681,22 @@
my $parser = Template::Parser->new( tag_end => qr/%]/ );
-TODO: more on this.
-
=head3 directives
-TODO: a reference to a hash of which the keys are reserved directive
-keywords (it doesn't matter what the values are as long as they contain
-a true value, but typically they are Template::Directive objects or
-class names). The parser uses these to identify reserved words that
-typically indicate the end of one directive and the start of the next.
-
-TODO: for example, the FOREACH keyword indicates the end of the
-parameter list for a preceeding INCLUDE directive, and the start
-of the FOREACH directive, here being used in side-effect notation.
+This option can be used to provide a reference to a hash array of
+directives. The keys in the hash array are reserved directive
+keywords which the parser should detect in certain places. It doesn't
+really matter what the values are as set to as long as they contain a
+true value. However, they typically reference Template::Directive
+objects or class names as returned by the Template::Directives
+directives() method.
+
+The parser uses this to identify reserved words that indicate the end
+of one directive and the start of the next. The following example shows
+the FOREACH directive being used in side-effect notation with an INCLUDE
+directive. Here the FOREACH keyword indicates the end of the INCLUDE
+parameter list (and hence the end of the INCLUDE directive) and the start
+of the following FOREACH directive.
[% INCLUDE header
title = 'Hello World'
@@ -1669,13 +1704,21 @@
FOREACH x IN y
%]
-Here the parse_params() method is responsible for parsing the
-parameter list following the C<header> template name in the INCLUDE
-directive. It continues through C<title>, and C<author> but then
-recognises C<FOREACH> as a reserved keyword, so stops what it is doing
-and returns a list of the two parameters found. The C<FOREACH>
-keyword is then left at the global regex position ready for dispatching
-the Template::Directive::Foreach module to parse it.
+The parse_params() method is responsible for parsing the parameter
+list following the C<header> template name in the INCLUDE directive.
+It continues through C<title>, and C<author> but then recognises
+C<FOREACH> as a reserved keyword, stops at that point and returns a
+list of the two parameters found. The current regex match position
+remains set at the start of the C<FOREACH> keyword, ready to be
+detected and dispatched to the Template::Directive::Foreach module for
+handling.
+
+The parser doesn't directly handle the parsing of complete directives.
+It uses directive keywords as "stop words" to indicate when a
+parameter list ends, but leaves it up to the Template::Tag::Directive
+parse_directive() method to recognise the keywords and dispatch them
+accordingly. The directive object then calls back to the parser to
+parse the core language elements.
=head2 parse_expression(\$text)
@@ -2179,24 +2222,65 @@
=head2 parse_args(\$text)
+TODO: parse a list of arguments, as enclosed by parens. Only parses the
+contents, not the parens themselves.
+
+ foo(a, b, c)
+ ^^^^^^^
+
=head2 parse_params(\$text)
+TODO: Parse a list of named parameters such as those in an INCLUDE directive
+
+ INLUDE header
+ x = 10, y = 20
+ ^^^^^^^^^^^^^^
+
=head2 parse_parens(\$text)
+TODO: Parse an assignment or expression enclosed in parentheses.
+
=head2 parse_assign(\$text)
+TODO: parse an assignment of variable to expression, such as in the SET
+directive.
+
+ foo = bar
+ wiz.waz = a < b ? c : d
+
=head2 parse_assign_expr(\$text)
+TODO: parse an assignment to an expression
+
+ = 10
+ = a * b
+
=head2 parse_ident_assign_expr(\$text)
+TODO: parse an assigment to a simple identifier, as used by MY, CONSTANT.
+No dotops allowed on LHS.
+
+ a = 10
+ b = 20
+
=head2 parse_ident_args(\$text)
+TODO: parse an identifer followed by an optional argument list
+
+ foo(10, 20)
+
=head2 parse_whitespace(\$text)
+TODO
+
=head2 parse_semicolon(\$text)
+TODO
+
=head2 parse_comma(\$text)
+TODO
+
=head2 decline($message)
This method is called by Template::Parser methods to indicate that
@@ -2349,7 +2433,7 @@
=head1 VERSION
-$Revision: 1.16 $
+$Revision: 1.17 $
=head1 COPYRIGHT