[Templates-cvs] cvs commit: Template2/lib/Template Document.pm Provider.pm
cvs@template-toolkit.org
cvs@template-toolkit.org
Fri, 23 Jul 2004 13:49:53 +0100
cvs 04/07/23 12:49:53
Modified: lib/Template Document.pm Provider.pm
Log:
* applied Mark's patch for Unicode compatibility
Revision Changes Path
2.73 +27 -6 Template2/lib/Template/Document.pm
Index: Document.pm
===================================================================
RCS file: /template-toolkit/Template2/lib/Template/Document.pm,v
retrieving revision 2.72
retrieving revision 2.73
diff -u -r2.72 -r2.73
--- Document.pm 2004/01/30 19:32:25 2.72
+++ Document.pm 2004/07/23 12:49:53 2.73
@@ -20,7 +20,7 @@
#
#----------------------------------------------------------------------------
#
-# $Id: Document.pm,v 2.72 2004/01/30 19:32:25 abw Exp $
+# $Id: Document.pm,v 2.73 2004/07/23 12:49:53 abw Exp $
#
#============================================================================
@@ -29,13 +29,28 @@
require 5.004;
use strict;
-use vars qw( $VERSION $ERROR $COMPERR $DEBUG $AUTOLOAD );
+use vars qw( $VERSION $ERROR $COMPERR $DEBUG $AUTOLOAD $UNICODE );
use base qw( Template::Base );
use Template::Constants;
-$VERSION = sprintf("%d.%02d", q$Revision: 2.72 $ =~ /(\d+)\.(\d+)/);
+$VERSION = sprintf("%d.%02d", q$Revision: 2.73 $ =~ /(\d+)\.(\d+)/);
+BEGIN {
+ # UNICODE is supported in versions of Perl from 5.008 onwards
+ if ($UNICODE = $] > 5.007 ? 1 : 0) {
+ if ($^V gt v5.8.0) {
+ # utf8::is_utf8() available from Perl 5.8.1 onwards
+ *is_utf8 = \&utf8::is_utf8;
+ }
+ elsif ($^V eq v5.8.0) {
+ # use Encode::is_utf8() for Perl 5.8.0
+ require Encode;
+ *is_utf8 = \&Encode::is_utf8;
+ }
+ }
+}
+
#========================================================================
# ----- PUBLIC METHODS -----
#========================================================================
@@ -280,12 +295,18 @@
($fh, $tmpfile) = File::Temp::tempfile(
DIR => File::Basename::dirname($file)
);
- print $fh $class->as_perl($content) || die $!;
- close($fh);
+ my $perlcode = $class->as_perl($content) || die $!;
+
+ if ($UNICODE && is_utf8($perlcode)) {
+ $perlcode = "use utf8;\n\n$perlcode";
+ binmode $fh, ":utf8";
+ }
+ print $fh $perlcode;
+ close($fh);
};
return $class->error($@) if $@;
return rename($tmpfile, $file)
- || $class->error($!);
+ || $class->error($!);
}
2.81 +67 -3 Template2/lib/Template/Provider.pm
Index: Provider.pm
===================================================================
RCS file: /template-toolkit/Template2/lib/Template/Provider.pm,v
retrieving revision 2.80
retrieving revision 2.81
diff -u -r2.80 -r2.81
--- Provider.pm 2004/01/30 19:32:28 2.80
+++ Provider.pm 2004/07/23 12:49:53 2.81
@@ -27,7 +27,7 @@
#
#----------------------------------------------------------------------------
#
-# $Id: Provider.pm,v 2.80 2004/01/30 19:32:28 abw Exp $
+# $Id: Provider.pm,v 2.81 2004/07/23 12:49:53 abw Exp $
#
#============================================================================
@@ -36,7 +36,7 @@
require 5.004;
use strict;
-use vars qw( $VERSION $DEBUG $ERROR $DOCUMENT $STAT_TTL $MAX_DIRS );
+use vars qw( $VERSION $DEBUG $ERROR $DOCUMENT $STAT_TTL $MAX_DIRS $UNICODE );
use base qw( Template::Base );
use Template::Config;
use Template::Constants;
@@ -44,7 +44,7 @@
use File::Basename;
use File::Spec;
-$VERSION = sprintf("%d.%02d", q$Revision: 2.80 $ =~ /(\d+)\.(\d+)/);
+$VERSION = sprintf("%d.%02d", q$Revision: 2.81 $ =~ /(\d+)\.(\d+)/);
# name of document class
$DOCUMENT = 'Template::Document' unless defined $DOCUMENT;
@@ -64,6 +64,27 @@
$DEBUG = 0 unless defined $DEBUG;
+# UNICODE is supported in versions of Perl from 5.007 onwards
+$UNICODE = $] > 5.007 ? 1 : 0;
+
+my $boms = [
+ 'UTF-8' => "\x{ef}\x{bb}\x{bf}",
+ 'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}",
+ 'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}",
+ 'UTF-16BE' => "\x{fe}\x{ff}",
+ 'UTF-16LE' => "\x{ff}\x{fe}",
+];
+
+# hack so that 'use bytes' will compile on versions of Perl earlier than
+# 5.6, even though we never call _decode_unicode() on those systems
+BEGIN {
+ if ($] < 5.006) {
+ package bytes;
+ $INC{'bytes.pm'} = 1;
+ }
+}
+
+
#========================================================================
# -- PUBLIC METHODS --
#========================================================================
@@ -394,6 +415,10 @@
# $self->{ PREFIX } = $params->{ PREFIX };
$self->{ PARAMS } = $params;
+ # look for user-provided UNICODE parameter or use default from package var
+ $self->{ UNICODE } = defined $params->{ UNICODE }
+ ? $params->{ UNICODE } : $UNICODE;
+
return $self;
}
@@ -628,6 +653,7 @@
elsif (ref $name) {
# ...or a GLOB or file handle...
my $text = <$name>;
+ $text = $self->_decode_unicode($text) if $self->{ UNICODE };
$data = {
name => defined $alias ? $alias : 'input file handle',
text => $text,
@@ -638,6 +664,7 @@
elsif (-f $name) {
if (open(FH, $name)) {
my $text = <FH>;
+ $text = $self->_decode_unicode($text) if $self->{ UNICODE };
$data = {
name => $alias,
path => $name,
@@ -967,7 +994,44 @@
}
}
+
+#------------------------------------------------------------------------
+# _decode_unicode
+#
+# Decodes encoded unicode text that starts with a BOM and
+# turns it into perl's internal representation
+#------------------------------------------------------------------------
+
+
+sub _decode_unicode
+{
+ use bytes;
+
+ my $self = shift;
+ my $string = shift;
+
+ # try all the BOMs in order looking for one (order is important
+ # 32bit BOMs look like 16bit BOMs)
+ my $count = 0;
+ while ($count < @{ $boms }) {
+ my $enc = $boms->[$count++];
+ my $bom = $boms->[$count++];
+
+ # does the string start with the bom?
+ if ($bom eq substr($string, 0, length($bom))) {
+ # decode it and hand it back
+ require Encode;
+ return Encode::decode($enc, substr($string, length($bom)), 1);
+ }
+ }
+
+ # no boms matched so it must be a non unicode string which we return as is
+ return $string;
+}
+
+
1;
+
__END__