[Templates-cvs] cvs commit: Template2/lib/Template Document.pm Provider.pm

cvs@template-toolkit.org cvs@template-toolkit.org
Fri, 23 Jul 2004 13:49:53 +0100


cvs         04/07/23 12:49:53

  Modified:    lib/Template Document.pm Provider.pm
  Log:
  * applied Mark's patch for Unicode compatibility
  
  Revision  Changes    Path
  2.73      +27 -6     Template2/lib/Template/Document.pm
  
  Index: Document.pm
  ===================================================================
  RCS file: /template-toolkit/Template2/lib/Template/Document.pm,v
  retrieving revision 2.72
  retrieving revision 2.73
  diff -u -r2.72 -r2.73
  --- Document.pm	2004/01/30 19:32:25	2.72
  +++ Document.pm	2004/07/23 12:49:53	2.73
  @@ -20,7 +20,7 @@
   # 
   #----------------------------------------------------------------------------
   #
  -# $Id: Document.pm,v 2.72 2004/01/30 19:32:25 abw Exp $
  +# $Id: Document.pm,v 2.73 2004/07/23 12:49:53 abw Exp $
   #
   #============================================================================
   
  @@ -29,13 +29,28 @@
   require 5.004;
   
   use strict;
  -use vars qw( $VERSION $ERROR $COMPERR $DEBUG $AUTOLOAD );
  +use vars qw( $VERSION $ERROR $COMPERR $DEBUG $AUTOLOAD $UNICODE );
   use base qw( Template::Base );
   use Template::Constants;
   
  -$VERSION = sprintf("%d.%02d", q$Revision: 2.72 $ =~ /(\d+)\.(\d+)/);
  +$VERSION = sprintf("%d.%02d", q$Revision: 2.73 $ =~ /(\d+)\.(\d+)/);
   
  +BEGIN {
  +    # UNICODE is supported in versions of Perl from 5.008 onwards
  +    if ($UNICODE = $] > 5.007 ? 1 : 0) {
  +        if ($^V gt v5.8.0) {
  +            # utf8::is_utf8() available from Perl 5.8.1 onwards
  +            *is_utf8 = \&utf8::is_utf8;
  +        }
  +        elsif ($^V eq v5.8.0) {
  +            # use Encode::is_utf8() for Perl 5.8.0
  +            require Encode;
  +            *is_utf8 = \&Encode::is_utf8;
  +        }
  +    }
  +}
   
  +
   #========================================================================
   #                     -----  PUBLIC METHODS -----
   #========================================================================
  @@ -280,12 +295,18 @@
           ($fh, $tmpfile) = File::Temp::tempfile( 
               DIR => File::Basename::dirname($file) 
           );
  -	print $fh $class->as_perl($content) || die $!;
  -	close($fh);
  +        my $perlcode = $class->as_perl($content) || die $!;
  +        
  +        if ($UNICODE && is_utf8($perlcode)) {
  +            $perlcode = "use utf8;\n\n$perlcode";
  +            binmode $fh, ":utf8";
  +        }
  +        print $fh $perlcode;
  +        close($fh);
       };
       return $class->error($@) if $@;
       return rename($tmpfile, $file)
  -	|| $class->error($!);
  +        || $class->error($!);
   }
   
   
  
  
  
  2.81      +67 -3     Template2/lib/Template/Provider.pm
  
  Index: Provider.pm
  ===================================================================
  RCS file: /template-toolkit/Template2/lib/Template/Provider.pm,v
  retrieving revision 2.80
  retrieving revision 2.81
  diff -u -r2.80 -r2.81
  --- Provider.pm	2004/01/30 19:32:28	2.80
  +++ Provider.pm	2004/07/23 12:49:53	2.81
  @@ -27,7 +27,7 @@
   #
   #----------------------------------------------------------------------------
   #
  -# $Id: Provider.pm,v 2.80 2004/01/30 19:32:28 abw Exp $
  +# $Id: Provider.pm,v 2.81 2004/07/23 12:49:53 abw Exp $
   #
   #============================================================================
   
  @@ -36,7 +36,7 @@
   require 5.004;
   
   use strict;
  -use vars qw( $VERSION $DEBUG $ERROR $DOCUMENT $STAT_TTL $MAX_DIRS );
  +use vars qw( $VERSION $DEBUG $ERROR $DOCUMENT $STAT_TTL $MAX_DIRS $UNICODE );
   use base qw( Template::Base );
   use Template::Config;
   use Template::Constants;
  @@ -44,7 +44,7 @@
   use File::Basename;
   use File::Spec;
   
  -$VERSION  = sprintf("%d.%02d", q$Revision: 2.80 $ =~ /(\d+)\.(\d+)/);
  +$VERSION  = sprintf("%d.%02d", q$Revision: 2.81 $ =~ /(\d+)\.(\d+)/);
   
   # name of document class
   $DOCUMENT = 'Template::Document' unless defined $DOCUMENT;
  @@ -64,6 +64,27 @@
   
   $DEBUG = 0 unless defined $DEBUG;
   
  +# UNICODE is supported in versions of Perl from 5.007 onwards
  +$UNICODE = $] > 5.007 ? 1 : 0;
  +
  +my $boms = [
  +    'UTF-8'    => "\x{ef}\x{bb}\x{bf}",
  +    'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}",
  +    'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}",
  +    'UTF-16BE' => "\x{fe}\x{ff}",
  +    'UTF-16LE' => "\x{ff}\x{fe}",
  +];
  +
  +# hack so that 'use bytes' will compile on versions of Perl earlier than 
  +# 5.6, even though we never call _decode_unicode() on those systems
  +BEGIN { 
  +    if ($] < 5.006) { 
  +        package bytes; 
  +        $INC{'bytes.pm'} = 1; 
  +    } 
  +}
  +
  +
   #========================================================================
   #                         -- PUBLIC METHODS --
   #========================================================================
  @@ -394,6 +415,10 @@
   #   $self->{ PREFIX       } = $params->{ PREFIX };
       $self->{ PARAMS       } = $params;
   
  +    # look for user-provided UNICODE parameter or use default from package var
  +    $self->{ UNICODE      } = defined $params->{ UNICODE } 
  +                                    ? $params->{ UNICODE } : $UNICODE;
  +
       return $self;
   }
   
  @@ -628,6 +653,7 @@
           elsif (ref $name) {
               # ...or a GLOB or file handle...
               my $text = <$name>;
  +            $text = $self->_decode_unicode($text) if $self->{ UNICODE };
               $data = {
                   name => defined $alias ? $alias : 'input file handle',
                   text => $text,
  @@ -638,6 +664,7 @@
           elsif (-f $name) {
               if (open(FH, $name)) {
                   my $text = <FH>;
  +                $text = $self->_decode_unicode($text) if $self->{ UNICODE };
                   $data = {
                       name => $alias,
                       path => $name,
  @@ -967,7 +994,44 @@
       }
   }
   
  +
  +#------------------------------------------------------------------------
  +# _decode_unicode
  +#
  +# Decodes encoded unicode text that starts with a BOM and
  +# turns it into perl's internal representation
  +#------------------------------------------------------------------------
  +
  +
  +sub _decode_unicode
  +{
  +    use bytes;
  +
  +    my $self   = shift;
  +    my $string = shift;
  +
  +    # try all the BOMs in order looking for one (order is important
  +    # 32bit BOMs look like 16bit BOMs)
  +    my $count = 0;
  +    while ($count < @{ $boms }) {
  +        my $enc = $boms->[$count++];
  +        my $bom = $boms->[$count++];
  +        
  +        # does the string start with the bom?
  +        if ($bom eq substr($string, 0, length($bom))) {
  +            # decode it and hand it back
  +            require Encode;
  +            return Encode::decode($enc, substr($string, length($bom)), 1);
  +        }
  +    }
  +
  +    # no boms matched so it must be a non unicode string which we return as is
  +    return $string;
  +}
  +
  +
   1;
  +
   
   __END__