[Templates-cvs] cvs commit: TT3/lib/Template Parser.pm
cvs@template-toolkit.org
cvs@template-toolkit.org
Thu, 02 Dec 2004 14:55:15 +0000
cvs 04/12/02 14:55:15
Modified: lib/Template Parser.pm
Log:
* added documentation for most methods
Revision Changes Path
1.16 +970 -99 TT3/lib/Template/Parser.pm
Index: Parser.pm
===================================================================
RCS file: /template-toolkit/TT3/lib/Template/Parser.pm,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- Parser.pm 2004/12/01 18:28:39 1.15
+++ Parser.pm 2004/12/02 14:55:15 1.16
@@ -18,7 +18,7 @@
# modify it under the same terms as Perl itself.
#
# REVISION
-# $Id: Parser.pm,v 1.15 2004/12/01 18:28:39 abw Exp $
+# $Id: Parser.pm,v 1.16 2004/12/02 14:55:15 abw Exp $
#
#========================================================================
@@ -29,7 +29,7 @@
use Template::Base;
use base qw( Template::Base );
-our $VERSION = sprintf("%d.%02d", q$Revision: 1.15 $ =~ /(\d+)\.(\d+)/);
+our $VERSION = sprintf("%d.%02d", q$Revision: 1.16 $ =~ /(\d+)\.(\d+)/);
our $DEBUG = 0 unless defined $DEBUG;
our $ERROR = '';
our $THROW = 'parser';
@@ -135,7 +135,7 @@
# construct regex to match everything up to the end of the current
# line or the end of tag token, whichever comes first
$eol = qr/
- .*? # capture everything non-greedily
+ [^\n]*? # capture everything on this line non-greedily
(?: \n # either match and consumer a newline character
| $ # or look ahead for the end of the text or the
| (?= # end-of-tag marker
@@ -152,6 +152,8 @@
my $comment = qr/ \# $eol /sx;
my $wspace = qr/ \s* (?:$comment\s*)* /sx;
+
+ # TODO: I don't think these need to be config options.
my $assign = $config->{ assign } || $self->pkgvar( ASSIGN => $ASSIGN );
my $range = $config->{ range } || $self->pkgvar( RANGE => $RANGE );
my $unary = $config->{ unary } || $self->pkgvar( UNARY => $UNARY );
@@ -848,56 +850,6 @@
#------------------------------------------------------------------------
-# parse_assign_expr($textref)
-#
-# Parses an assignment to an expression, e.g. "= 10", "=> x", " => y || z".
-#------------------------------------------------------------------------
-
-sub parse_assign_expr {
- my ($self, $textref) = @_;
- my ($op, $expr);
-
- $self->debug("parse_assign_expr(",
- $self->next_chunk($textref), ")") if $DEBUG;
-
- # skip any leading whitespace, comments, etc.
- $$textref =~ /$self->{ wspace }/cg;
-
- if ($$textref =~ /$self->{ assign }/cg) {
- $op = $1;
- return $self->parse_expression($textref)
- || $self->missing( $textref, "expression after '$op'" );
- }
- else {
- return $self->decline('not an assignment');
- }
-}
-
-
-#------------------------------------------------------------------------
-# parse_ident_assign_expr($textref)
-#
-# Parses an assignment of a simple identifier to an expression, e.g.
-# "x = 10", "y => x", "z = x || y".
-#------------------------------------------------------------------------
-
-sub parse_ident_assign_expr {
- my ($self, $textref) = @_;
- my ($ident, $expr);
-
- $self->debug("parse_ident_assign_expr(",
- $self->next_chunk($textref), ")") if $DEBUG;
-
- $$textref =~ /$self->{ wspace }/cgx;
-
- $ident = $self->parse_ident($textref) || return;
- $expr = $self->parse_assign_expr($textref)
- || return $self->missing( $textref, "assignment after identifier '$ident'" );
- return [ $ident, $expr ];
-}
-
-
-#------------------------------------------------------------------------
# parse_args($textref)
#
# Parse the contents of a parenthesised argument list.
@@ -1054,9 +1006,57 @@
}
+#------------------------------------------------------------------------
+# parse_assign_expr($textref)
+#
+# Parses an assignment to an expression, e.g. "= 10", "=> x", " => y || z".
+#------------------------------------------------------------------------
+
+sub parse_assign_expr {
+ my ($self, $textref) = @_;
+ my ($op, $expr);
+
+ $self->debug("parse_assign_expr(",
+ $self->next_chunk($textref), ")") if $DEBUG;
+
+ # skip any leading whitespace, comments, etc.
+ $$textref =~ /$self->{ wspace }/cg;
+
+ if ($$textref =~ /$self->{ assign }/cg) {
+ $op = $1;
+ return $self->parse_expression($textref)
+ || $self->missing( $textref, "expression after '$op'" );
+ }
+ else {
+ return $self->decline('not an assignment');
+ }
+}
#------------------------------------------------------------------------
+# parse_ident_assign_expr($textref)
+#
+# Parses an assignment of a simple identifier to an expression, e.g.
+# "x = 10", "y => x", "z = x || y".
+#------------------------------------------------------------------------
+
+sub parse_ident_assign_expr {
+ my ($self, $textref) = @_;
+ my ($ident, $expr);
+
+ $self->debug("parse_ident_assign_expr(",
+ $self->next_chunk($textref), ")") if $DEBUG;
+
+ $$textref =~ /$self->{ wspace }/cgx;
+
+ $ident = $self->parse_ident($textref) || return;
+ $expr = $self->parse_assign_expr($textref)
+ || return $self->missing( $textref, "assignment after identifier '$ident'" );
+ return [ $ident, $expr ];
+}
+
+
+#------------------------------------------------------------------------
# parse_ident_args($text)
#
# Parser a simple identifier (e.g. variable name) optionally followed by
@@ -1128,6 +1128,7 @@
+
#------------------------------------------------------------------------
# parse_filename($textref)
#
@@ -1376,6 +1377,12 @@
}
+#------------------------------------------------------------------------
+# next_chunk(\$text)
+#
+# Returns the next text, truncated to a maximum length of 16 characters.
+#------------------------------------------------------------------------
+
sub next_chunk {
my ($self, $textref) = @_;
return $self->dump_text($self->next_text($textref), 16);
@@ -1386,91 +1393,955 @@
__END__
-# TODO: these docs are incomplete and incorrect
+# TODO: these docs are incomplete and incorrect...
+# ...but I'm working on it as of 2nd Dec
=head1 NAME
-Template::Parser - parser basic language elements
+Template::Parser - parser for core language elements
=head1 SYNOPSIS
use Template::Parser;
+
+ my $parser = Template::Parser->new();
- # TODO
+ my $text = 'a + b > c ? d : e';
+ my $expr = $parser->parse_expression(\$text)
+ || die $parser->error();
+
=head1 DESCRIPTION
+
+The Template::Parser module implements a recursive descent parser for
+parsing the basic constructs of the Template Toolkit language. These
+include things like expressions (parse_expression()), basic terms
+(parse_term()), variables (parse_variable()), parameter lists
+(parse_params()), hash and list definitions (parse_hash() and
+parse_list()), and so on.
+
+The Template::Parser is typically used by Template::Directive modules
+for parsing the basic language elements that comprise the more complex
+directives.
+
+The Template::Directive::Include module, for example, implements a
+parse() method which is called when the INCLUDE directive keyword is
+identified in a template tag. It expects to find a template name
+immediately after the keyword (disregarding whitespace, comments,
+etc.), followed by one or more parameters defining local variable
+values.
+
+ [% INCLUDE header
+ title = 'Hello World'
+ %]
+
+Here's the Template::Directive::Include parse() method in its
+entirety. A reference to the source text is passed as the first
+argument (after the implicit $self class name or object reference),
+followed by a reference to a Template::Handler object, against which
+we make calls to register the parsed directive (the expr() method).
+The third argument is a hash array containing various items relating
+to the current tag matched, and includes a reference to a
+Template::Parser object as the C<parser> item.
+
+ sub parse {
+ my ($self, $textref, $handler, $match) = @_;
+
+ my $parser = $match->{ parser }
+ || return $self->error('no parser defined');
+
+ my $name = $parser->parse_name($textref)
+ || return $self->error('missing template name in ',
+ $self->keyword($match), ' directive');
+
+ my $args = $parser->parse_params($textref)
+ || return $self->error('missing parameters in ',
+ $self->keyword($match), ' directive');
+
+ return $handler->expr([ include => $name, $args ])
+ || $self->error($handler->error());
+ }
+
+The parse() method makes two calls to the parser object, the first
+to parse the template name (parse_name()) and the second to parse a
+list of zero or more parameters (parse_params()).
+
+=head2 Parsing Text
+
+TODO: The methods all expect to be passed a reference to a text string. We
+pass references around because they're much faster and avoid the constant
+copying of strings.
+
+ sub parse_thingy {
+ my ($self, $textref) = @_;
+
+ # ...
+ }
+
+TODO: Passing around a reference to a single string allows the methods
+to use the global matching position provided by Perl's regular
+expression engine to keep track of the current parsing position in the
+string.
+
+TODO: must use \G to search from current position, /g flag to enable
+global matching and also /c flag to not reset position if it fails.
+This is required for backtracking. The /x flag is also used in this
+example to allow us to embed whitespace in the regex to make it easier
+to read.
+
+ if ($$textref =~ / \G ... /cgx) {
+ ...
+ }
+
+TODO: methods must also make sure they save and reset the regex position
+if they consume any text but then later decide to backtrack and decline.
+For example, parse_assign() looks for a variable, an assignment, then an
+expression.
+
+ sub parse_assign {
+ my ($self, $textref) = @_;
+
+ # save string position in case we need to backtrack
+ my $pos = pos $$textref;
+
+ # look for a variable
+ if ($var = $self->parse_variable($textref)) {
+
+ # then look for a '=' or '=>'
+ if ($$textref =~ /$self->{ assign }/cg) {
+ $op = $1;
+
+ # now get the expression on the RHS of the '=' or '=>'
+ $value = $self->parse_expression($textref)
+ || return $self->unexpected($textref, "after '$op'");
+
+ # success!
+ return [ assign => $var, $value ];
+ }
+ }
+
+ # rewind string position to start of variable
+ pos $$textref = $pos;
+
+ return $self->decline('not an assignment');
+ }
+
+
+TODO: It first saves position in $pos, then calls parse_variable()
+which (assuming it succeeds) advances the global match position. Then
+we look for a '=' (or '=>'). If we find it then all is well and good
+and we can go on to look for the following expression. But if not, we
+need to rewind back to the position before we consumed the first
+variable before we decline.
+
+TODO: This is an important requirement for TT implicit GET and SET
+directives. A tag can contain the following:
+
+ [% foo = 10 %]
+ [% foo %]
+
+TODO: We first call parse_assign() (which will match the first
+example) and if that declines, we then call parse_expression()
+(which will match the second). The parse_assign() method will
+happily consume the variable 'foo' in the second directive,
+but when it doesn't find a '=' following it, as in the first
+directive, it realises that it's not an assignment after all
+and rewinds back to the start of 'foo' to let parse_expression()
+have a go. This will correctly identify foo as a variable and
+all will be good.
+
+=head2 Structured Expressions
+
+The parser methods return a reference to a list containing a
+structured expression.
+
+TODO: A structured expression is...
+
+TODO: examples:
+
+ 3.14 [ number => 3.14 ]
+ header [ ident => 'header' ]
+ 'header' [ squote => 'header' ]
+ "header" [ dquote => ['header'] ]
+
+TODO: First item is type, followed by any further arguments relevant to that
+type...just like Lisp. Explain above example.
+
+TODO: structured expressions can be nested indefinately. For example, double
+quoted string can contain variables. Argument to dquote sexpr is a list of
+items in the string, including literal text and variables. A variable looks
+like this:
+
+ [ variable => [@nodes] ]
+
+TODO: Where each node in @nodes is part of the variable delimited by
+the dot operator. So there are three nodes in "foo.bar(20).baz(30,
+40)" and each is a reference to a list containing the name and
+argument list, or 0 if there are no arguments.
+
+ foo [ [ident => 'foo'], 0 ]
+ bar(20) [ [ident => 'bar'], [ [number => 20] ] ]
+ baz(30, 40) [ [ident => 'baz'], [ [number => 30], [number=>40] ] ]
+
+=head2 Parser Errors
+
+If the parser encounters a syntactical error in any of the constructs
+it is parsing, then it will immediately throw a parser exception (a
+Template::Exception object) using Perl's die().
+
+For example, the parse_expression() method expects a term or further
+expression to follow after any binary operator (e.g. C<+>). If it
+doesn't find one then a syntax error has occurred which is reported
+immediately.
-# TODO
+ [% a + %]
+This example would result in an exception being thrown with the message:
+
+ unexpected end of directive tag after '+'
+
+See the description of the error() method below for further
+details.
+
+=head2 Parser Declines
+
+The parser may also decline to parse a particular language element.
+This happens when it is expecting to find one thing but instead finds
+something else. As long as this doesn't constitute a syntax error,
+the parser will return undef, via a call to its own decline() method.
+The global match position, if modified by the method, will be returned
+to its original position, effectively backtracking a match to allow
+the caller to go on to try something else.
+
+This is typically used as shown in the following example:
+
+ # we're expecting a thingy, doodah or bifta
+ if ($expr = $parser->parse_thingy()) {
+ print "got a thingy!\n";
+ }
+ elsif ($expr = $parser->parse_doodah()) {
+ print "got a doodah!\n";
+ }
+ elsif ($expr = $parser->parse_bifta()) {
+ print "got a bifta!\n";
+ }
+ else {
+ die "not a thingy, doodah or bifta";
+ }
+
+At any point in the above example, a syntax error in parse_thing(),
+parse_doodah() or parse_bifta() will be thrown immediately via die().
+
+
=head1 METHODS
=head2 new()
+
+# TODO: constructor method inherited from Template::Base.
+
+The following configuration options may be provided:
+
+=head3 tag_end
+
+A string or regular expression indicating the token used to mark the
+end of embedded tags within a template. This allows the parser to
+identify when the current tag ends.
+
+ my $parser = Template::Parser->new( tag_end => qr/%]/ );
+
+TODO: more on this.
+
+=head3 directives
+
+TODO: a reference to a hash of which the keys are reserved directive
+keywords (it doesn't matter what the values are as long as they contain
+a true value, but typically they are Template::Directive objects or
+class names). The parser uses these to identify reserved words that
+typically indicate the end of one directive and the start of the next.
+
+TODO: for example, the FOREACH keyword indicates the end of the
+parameter list for a preceeding INCLUDE directive, and the start
+of the FOREACH directive, here being used in side-effect notation.
+
+ [% INCLUDE header
+ title = 'Hello World'
+ author = 'Arthur Dent'
+ FOREACH x IN y
+ %]
+
+Here the parse_params() method is responsible for parsing the
+parameter list following the C<header> template name in the INCLUDE
+directive. It continues through C<title>, and C<author> but then
+recognises C<FOREACH> as a reserved keyword, so stops what it is doing
+and returns a list of the two parameters found. The C<FOREACH>
+keyword is then left at the global regex position ready for dispatching
+the Template::Directive::Foreach module to parse it.
+
+=head2 parse_expression(\$text)
+
+This method parses a Template Toolkit expression. An expression is
+comprised of one or more terms (or sub-expressions) combined with
+unary, binary or tertiary operators.
+
+For example:
+
+ items.size && ! no_items ? items.size + 1 : "no items"
+
+This expression shows examples of the unary operator C<!>, the binary
+operators C<&&> and C<+>, and the tertiary operator(s) C<?:>
+
+The unary operators are C<!> (not), C<+> (positive) and C<->
+(negative), as shown in the following examples:
+
+ !foo
+ +foo
+ -foo
+
+The binary operators fall into three groups: mathematical, comparison
+and logical (boolean). The mathematical operators are the usual C<+>,
+C<->, C<*> and C</> for addition, subtraction, multiplication and
+division, and C<%> for returning the modulus (remainder) after
+division.
+
+ 20 + 10 # 30
+ 20 - 10 # 10
+ 20 * 10 # 200
+ 20 / 10 # 2
+ 20 % 10 # 0 (20 / 10 = 2 with a remainder of 0)
+
+The comparison operators are C<==> (equal to), C<!=> (not equal to),
+C<E<lt>> (less than), C<E<gt>> (greater than), C<E<lt>=> (less than or
+equal to), and C<E<gt>=> (greater than or equal to). They return a
+true (1) or false (0) result.
+
+ 20 == 10 # 0
+ 20 != 10 # 1
+ 20 < 10 # 0
+ 20 > 10 # 1
+ 20 <= 10 # 0
+ 20 >= 10 # 1
+
+The logical operators are C<&&> (and) and C<||> (or). They can also
+be written out as C<and> and C<or>, respectively.
+
+ a && b
+ a and b
+ a || b
+ a or b
+
+The comparison and logical operators are most commonly used in IF directives.
+For example:
+
+ [% IF user.registered && user.age >= 18 %]
+ Welcome to the "Truth About Santa" web site!
+ [% ELSE %]
+ Sorry, this site is for grown-ups only!
+ [% END %]
+
+The tertiary C<?:> operator provides a more succint form of this construct.
+
+ [% expr ? then : else %]
+
+If C<expr> is true the C<then> expression is evaluated, otherwise the
+C<else> expression is evaluated.
+
+ [% user.registered && user.age >= 18
+ ? 'Welcome to the "Truth About Santa" web site'
+ : 'Sorry, this site is for grown-ups only!'
+ %]
+
+If the user is registered and they are at least 18 years of age, then
+a welcoming message is displayed. Otherwise we send them away.
+
+Whitespace (including newlines) and comments within expressions are
+ignored, as this rather contrived examples demonstrates.
-# TODO
+ [% user.registered # they must be registered so that
+ && # we know their age, and then we
+ user.age # check that they are at least
+ >= # 18 years old, because this
+ 18 # content is for grown-ups only
+ ? 'Welcome to the "Truth About Santa" web site'
+ : 'Sorry, this site is for grown-ups only!'
+ %]
-=head2 missing(\$text, $value, $message)
+Expressions are parsed left-to-right and rely on the underlying precedence
+of Perl's operators. For example, Perl's C<*> operator has a higher
+precedence than C<+>, so the following expression gives the answer 55
+(5 + 50) rather than 100 (10 * 10).
-Error checking and reporting method which raises an error based on the
-value of the second argument, $value. It is called by various parsing
-method as shown in the following example:
+ 5 + 5 * 10
- if ($$textref =~ / \G \$ /cg) {
- $self->debug("parsing variable\n") if $DEBUG;
- $value = $self->parse_variable($textref)
- || return $self->missing( $textref, $value,
- "missing variable after '\$'");
+In effect, it is evaluated as if written:
+
+ 5 + (5 * 10)
+
+You can enclose an expression or part of it in parenthesis to indicate
+which part(s) should be evaluated first.
+
+ (5 + 5) * 10
+
+In this example, the result of evaluating the expression is 100.
+
+The value returned from parse_expression() is a reference to an array
+containing a structured expression. In it's simplest form, this will
+be a single term returned by the parse_term() method. For example, an
+expression containing the number 10 as a single term will generated the
+following structured expression.
+
+ [ number => 10 ]
+
+Unary operators generate a structured expression of the following form.
+
+ [ unary => $op, $expr ]
+
+Here C<$op> denotes an operator, e.g. C<->, and C<$expr> an expression,
+typically a single term. For example, the expression "-10" yields:
+
+ [ unary => '-', [number=>10] ]
+
+Binary operators generate a structured expression containing a sequence
+of one or more expressions interspersed with operators.
+
+ [ binary => $expr, $op, $expr, $op, $expr, ... ]
+
+For example, "5 + 5 * 10" yields:
+
+ [ binary => [number =>5], '+', [number=>5], '*' [number=>10] ]
+
+The tertiary operator generates a structured expression containing
+three expressions. The first is the expression for evaluation, the
+second is for a true result, and the third for false.
+
+ [ tertiary => $expr, $expr, $expr ]
+
+For example, "10 ? 20 : 30" yields:
+
+ [ tertiary => [number=>10], [number=>20], [number=>30] ]
+
+=head2 parse_term(\$text)
+
+This method parses a single term. A term can be a number, string,
+list quoted list, hash, variable or a parenthesised assignment or
+sub-expression.
+
+Numbers include integers, real numbers, hexadecimal numbers, and those
+expressed in exponential notation. Positive and negative numbers are
+both recognised.
+
+ -10 # negative number
+ 10 # integer
+ +10 # explicitly positive number
+ 3.14 # real number
+ 0xff # hex number
+ 1.23e4 # exponential notation
+
+Strings can be single or double quoted. Double quoted strings
+may have variables embedded within them as C<$var> or C<${var}>.
+
+ 'hello world'
+ "hello $world"
+ "hello ${world}"
+
+Lists are constructed using square brackets and may contain any
+number of expressions separated by whitespace, a comma or both.
+
+ [10 20 30] # whitespace delimited
+ [10,20,30] # comma delimited
+ [ 10, 20, 30 ] # comma and whitespace
+ [ a ? b : c, d, e ] # first item is an expression
+
+Quoted lists are provided as a convenience for defining a list of
+quoted string. It works just like Perl's C<qw()> syntax, and allows
+you to use any combination of parentheses. Items are split on
+whitespace, with any leading and trailing whitespace being ignored.
+
+ qw[foo bar baz]
+ qw( foo bar baz )
+ qw{ foo bar baz }
+ qw< foo
+ bar
+ baz
+ >
+
+The above examples are all equivalent to:
+
+ ['foo', 'bar', 'baz']
+
+Hash arrays are constructed using curly braces and include a sequence
+of zero of more key/value pairs. The keys should be simple identifiers
+or quoted strings. The values are expressions of arbitrary complexity.
+
+ { a => 10,
+ b => 20,
+ c => 30 + 40,
}
+
+Each key/value pair is separated by C<=> or C<=E<gt>>, with any amount
+of intervening whitespace. Each pair is separated by whitespace, an
+optional comma, or both.
+
+Variables consist of one or more identifiers (or certain other
+symbols, e.g. interpolated variable names) separated by dot operators.
+Each node (separated by a dot) may contain a parenthesised list of
+named parameters or arguments, each of which can be an expression of
+arbitrary complexity.
+
+ foo
+ foo.bar
+ foo.bar.baz
+ foo(10).bar(20, 30)
+ foo(a || b).bar(x ? y : z)
+ foo(a=10 b=20).bar( x => 30, y => 40 )
+
+In addition to variables with dot operators, the parse_term() method
+also recognises literal values followed by dot operators.
+
+ # single quoted string with length vmethod
+ 'some text'.length
+
+ # anonymous hash with interpolated index key
+ { E_NO_PONY => 'No Pony!',
+ E_NO_BUFFY => 'No Buffy!'
+ }.$errno
+
+These yield a structured expression as for normal variables. but with
+the name of the first variable node set to:
+
+ [ root => $value ]
+
+For example:
+
+ 'some text'.length
+
+Generates the following:
+
+ [ variable => [ [ # first node: root squote node, no args
+ [ root => [squote=>'some text'] ], 0,
+ ]
+ [ # second node: name 'length', no args
+ [ ident => 'length' ], 0,
+ ]
+ ]
+ ]
+
+Finally, a term can also be comprised of a parenthesised assignment
+(which returns the assigned value) or a sub-expression.
+
+ (x + y) * z
+ x = (y = 10)
+
+A parenthesised expression yields as structured expression of the form:
+
+ [ parens => $expr ]
+
+=head2 parse_ident(\$text)
+
+Parses a simple, unquoted identifier comprised of one or more word
+characters (alphanumerics and underscore).
+
+ foo
+ foo_bar
+
+If the identifier matches a directive keyword defined by the C<directives>
+configuration option, then the method will decline.
+
+Returns a structured expression of the form:
+
+ [ ident => $identifier ]
+
+For example:
+
+ [ ident => 'foo' ]
+
+=head2 parse_number(\$text)
+
+Parses a number, handling integers, real numbers, hexadecimal numbers,
+and those expressed in exponential notation. Positive and negative
+numbers are both recognised.
+
+ -10 # negative number
+ 10 # integer
+ +10 # explicitly positive number
+ 3.14 # real number
+ 0xff # hex number
+ 1.23e4 # exponential notation
+
+Numbers yield a structured expression of the general form:
+
+ [ number => $number ]
+
+For example:
+
+ [ number => -10 ]
+ [ number => 10 ]
+ [ number => +10 ]
+ [ number => 3.14 ]
+ [ number => 0xff ]
+ [ number => 1.23e4 ]
+
+=head2 parse_squote(\$text)
+
+Parses a single quoted string:
+
+ 'hello world'
+
+A structured expression is returned of the form:
+
+ [ squote => $text ]
+
+For example:
+
+ [ squote => 'hello world' ]
+
+=head2 parse_dquote(\$text)
+
+Parses a double quoted string that may contain variables embedded
+within it as C<$var> or C<${var}>.
+
+ "hello $world"
+ "hello ${world}"
+
+The expressions generated by double quoted string contain a list of
+items within the string.
+
+ [ dquote => \@items ]
+
+Each item is either a plain text sequence or a reference to an array
+containing a structured expression for a variable.
+
+ [ dquote => [
+ 'hello',
+ [ variable => [[[ident=>'world'], 0]] ]
+ ]
+ ]
+
+=head2 parse_string(\$text)
+
+Parses the contents of a double quoted string. This does most of the
+dirty work for parse_dquote().
+
+=head2 parse_name(\$text)
+
+TODO: this method needs work. should handle name+name+name and other
+stuff.
+
+Parses a template name., or one of the valid tokens that can be substituted
+for it (e.g. a variable)
+
+ header
+ index.html
+ 'index.html'
+ "index.html"
+ "my$file"
+ $filename
+
+TODO: more docs on this
+
+=head2 parse_variable(\$text)
+
+TODO: method for parsing a variable. hands over to parse_varnodes.
+
+=head2 parse_varnodes(\$text)
+
+TODO: method for parsing a sequence of variable nodes delimiter by dots.
+
+=head2 parse_varnode(\$text)
+
+TODO: method for parsing a single of variable node and any arguments
+
+=head2 parse_list(\$text)
+
+Parses the contents of an anonymous list definition. Lists are
+constructed using square brackets and may contain any number of
+expressions separated by whitespace, a comma or both.
+
+For example:
-If C<$value> is undefined then it indicates that a fatal error
-occurred in the parse_variable() method, and one which has already
-been reported via a previous call to $self->error(). In this case,
-the missing() method does nothing and simply returns undef.
+ [10 20 30] # whitespace delimited
+ [10,20,30] # comma delimited
+ [ 10, 20, 30 ] # comma and whitespace
+ [ a ? b : c, d, e ] # first item is an expression
-If $value is 0 then it indicates that the parse_variable() method
-declined to parse a variable (i.e. there wasn't one in the input
-stream). For example, if <$$textref> contains C<$@> then the error
-reported would be:
+Lists can also contain embedded comments.
- missing variable after '$' (got '@')
+ [ 10 # the first number
+ 20 # the second number
+ ]
+Comments begin with a C<#> and continue to the end of the line. You'll
+need to make sure that the closing C<]> goes on a separate line. The
+parser won't recognise it if it comes at the end of a comment line.
+
+ # this doesn't work!
+ [ 10 # a comment ]
+
+The structured expression generated for a list contains a reference to
+a list of the items defined within it.
+
+ [ list => \@items ]
+
+For example:
+
+ [ list => [ [number=>10], [number=>20], [number=>30] ] ]
+
+=head2 parse_qwlist(\$text)
+
+This method parses a quoted list. These are provided as a convenience
+for defining a list of quoted strings. It works just like Perl's
+C<qw()> syntax, and allows you to use any pair of parentheses: square,
+round, curly or angle brackets.
+
+ qw[foo bar baz]
+ qw(foo bar baz)
+ qw{foo bar baz}
+ qw<foo bar baz>
+
+Items are split on whitespace, with any leading and trailing whitespace
+being ignored.
+
+ qw[foo bar baz]
+ qw[ foo bar baz ]
+ qw[ foo
+ bar
+ baz
+ ]
+
+All of the above examples result in an expression equivalent to:
+
+ ['foo', 'bar', 'baz']
+
+The structured expression return for a quoted list is of the form:
+
+ [ qwlist => $left, \@list, $right ]
+
+The C<$left> and C<$right> items provide the parentheses used to quote
+the list. Sitting in between is a reference to a list containing the
+items split from the list.
+
+For example:
+
+ [ qwlist => '[', ['foo', 'bar', 'baz'], ']' ]
+
+=head2 parse_hash(\$text)
+
+This method parses the contents of an anonymous hash array definition.
+
+Hash arrays are constructed using curly braces and include a sequence
+of zero of more key/value pairs. The keys should be simple identifiers
+or quoted strings. The values are expressions of arbitrary complexity.
+
+ { a => 10,
+ b => 20,
+ c => 30 + 40,
+ }
+
+Each key/value pair is separated by C<=> or C<=E<gt>>, with any amount
+of intervening whitespace. Each pair is separated by whitespace, an
+optional comma, or both.
+
+ {a=10 b=20}
+ {a=10,b=20}
+ {a=10, b=20}
+ { a = 10, b = 20 }
+ { a => 10, b => 20 }
+
+Hash arrays can also contain embedded comments, as per list definitions.
+
+ { # let's define a hash
+ a # this is the first key
+ => # and we set it to
+ 10 # ten
+ b=20 # ...etc...
+ }
+
+We don't recommend adding this kind of verbose running commentary to
+your hash definitions, but it does at least illustrate the point that
+comments are allowed anywhere within a hash definiton.
+
+The structured expression returned is of the form:
+
+ [ hash => \@pairs ]
+
+Each of the items in the C<@pairs> list is a reference to a two element
+list containing structured expressions for a key and corresponding value.
+
+ [ hash => [
+ [ [ident=>'a'], [number=>10] ],
+ [ [ident=>'b'], [number=>20] ],
+ ]
+ ]
+
+=head2 parse_key(\$text)
+
+TODO: method for parsing a hash key. can be an unquoted identifier, a
+single or double quoted string, of variable node and any arguments
+
+=head2 parse_args(\$text)
+
+=head2 parse_params(\$text)
+
+=head2 parse_parens(\$text)
+
+=head2 parse_assign(\$text)
+
+=head2 parse_assign_expr(\$text)
+
+=head2 parse_ident_assign_expr(\$text)
+
+=head2 parse_ident_args(\$text)
+
+=head2 parse_whitespace(\$text)
+
+=head2 parse_semicolon(\$text)
+
+=head2 parse_comma(\$text)
+
+=head2 decline($message)
+
+This method is called by Template::Parser methods to indicate that
+the input text does not match the expected language element. It is
+called with one or more arguments (which are concatenated into a single
+string) which provide an indication of why the parser has declined.
+This message is stored internally for subsequent inspection via the
+error() method. The method returns undef.
+
+Template::Parser methods typically use it as shown in this pseudo-code
+example:
+
+ sub parse_thingy {
+ my ($self, $textref) = @_;
+
+ if ($$textref =~ / \G (some regex) /) {
+ return [ thingy => $1 ];
+ }
+ else {
+ return $self->decline("not a thingy");
+ }
+ }
+
+The method could then be called like so:
+
+ my $thingy = $parser->parse_thingy(\$text)
+ || warn "parser declined: ", $parser->error();
+
+Note that any syntax errors will be thrown as exceptions via die(),
+with the result that the parse_thingy() method will not return. This
+is discussed further in the error() method below.
+
+=head2 error($message)
+
+This method is inherited from the Template::Base base class. When
+called with an argument (or arguments which are concatenated into a
+single string), it throws a parser exception (a Template::Exception
+object) containing the message passed as an argument (or arguments).
+It also stores the error message within the object.
+
+A Template::Parser method might use the method as follows:
+
+ sub parse_thingy {
+ my ($self, $textref) = @_;
+
+ if ($$textref =~ / \G (some regex) /) {
+ return [ thingy => $1 ];
+ }
+ else {
+ return $self->error("syntax error in thingy");
+ }
+ }
+
+In contrast to the decline() method which returns undef, the error()
+method throws the exception using Perl's die() and never returns. To
+catch errors as well as declines, you should enclose the call to the
+parser method in an C<eval> block.
+
+ my $expr = eval {
+ $parser->parse_thingy(\$text)
+ || warn "parser declined: ", $parser->error();
+ };
+ if ($@) {
+ # exception is in $@ or accessible via $parser->error()
+ warn "parser error: $@\n";
+ }
+
+When called without any arguments, the method returns the error message
+most recently set by a call to error() (with arguments, of course) or
+decline().
+
+=head2 missing(\$text, $thing)
+
+This is a wrapper around the error() method which constructs an error
+message of the form "missing $thing (got 'blah')". The first argument
+is a reference to the current input text string. The method uses this
+to determine the token(s) following the current match point that have
+caused the parser to fail. The second argument (or arguments which
+are concatenated into a single string) provides a message indicating
+what is missing.
+
+Here's an example of it in use:
+
+ sub parse_hello {
+ my ($self, $textref) = @_;
+
+ if ($$textref =~ / \G HELLO /cg) {
+ return [ message => 'Hello World' ];
+ }
+ else {
+ return $self->missing($textref, 'HELLO');
+ }
+ }
+
+When fed with the input string "GOODBYE", the method will throw
+an error with the message:
+
+ missing HELLO (got 'GOODBYE')
+
=head2 unexpected(\$text, $message)
-Error reporting method which generates a formatted error string and
-calls the error() method to report it. It is called by various
-parsing method as shown in the following example:
+Like the missing() method, this provides a wrapper around the error()
+method.
- $$textref =~ / \G foo /gcx
- || return $self->unexpected( $textref,
- " where 'foo' should be" );
+ sub parse_hello {
+ my ($self, $textref) = @_;
+
+ if ($$textref =~ / \G HELLO /cg) {
+ return [ message => 'Hello World' ];
+ }
+ else {
+ return $self->unexpected($textref, "where 'HELLO' was expected");
+ }
+ }
-If C<$$textref> contains C<bar> instead of C<foo> then the match will
-fail and the method will return undef having set the internal error string
-to:
+When given the same "GOODBYE" input string as in our earlier example,
+the method will throw an error with the message:
- unexpected 'bar' where 'foo' should be
+ unexpected 'GOOBYE' where 'HELLO' was expected
-If C<$$textref> doesn't contain any further text after the current
-regex position then it will set the error to:
+If there is no further text following the current match position, then
+it will generate the following message:
- unexpected end of statement where 'foo' should be
+ unexpected end of statement where 'HELLO' was expected
=head2 next_char(\$text)
-Utility method which returns the character following the current
-regular expression position in the text string passed by reference
-as an argument.
+Returns the character following the current regular expression
+position in the text string passed by reference as an argument.
=head2 next_token(\$text)
-Utility method which returns the next whitespace delimited token
-following the current regular expression position in the text string
-passed by reference as an argument.
+Returns the next whitespace delimited token following the current
+regular expression position in the text string passed by reference as
+an argument.
=head2 next_text(\$text)
-Utility method which returns all of the text following the current
-regular expression position in the text string passed by reference as
-an argument.
+Returns all of the text following the current regular expression
+position in the text string passed by reference as an argument.
+
+=head2 next_chunk(\$text)
+
+Returns the text following the current regular expression position,
+truncated to a maximum length of 16 characters.
=head1 AUTHOR
@@ -1478,7 +2349,7 @@
=head1 VERSION
-$Revision: 1.15 $
+$Revision: 1.16 $
=head1 COPYRIGHT