diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm
index eaf018ad8..cfadd635e 100644
--- a/lib/Locale/Po4a/Po.pm
+++ b/lib/Locale/Po4a/Po.pm
@@ -225,7 +225,6 @@ sub initialize {
$self->{options}{'package-name'} = "PACKAGE";
$self->{options}{'package-version'} = "VERSION";
$self->{options}{'wrap-po'} = 76;
- $self->{options}{'pot-charset'} = "UTF-8";
$self->{options}{'pot-language'} = "";
foreach my $opt ( keys %$options ) {
@@ -296,13 +295,11 @@ sub initialize {
. "Language: "
. $self->{options}{'pot-language'} . "\n"
. "MIME-Version: 1.0\n"
- . "Content-Type: text/plain; charset="
- . $self->{options}{'pot-charset'} . "\n"
+ . "Content-Type: text/plain; charset=UTF-8\n"
. "Content-Transfer-Encoding: 8bit\n"
);
- $self->{encoder} = find_encoding("UTF-8");
- $self->{footer} = [];
+ $self->{footer} = [];
# To make stats about gettext hits
$self->stats_clear();
@@ -320,6 +317,8 @@ sub read {
my $self = shift;
my $filename = shift
or croak wrap_mod( "po4a::po", dgettext( "po4a", "Please provide a non-null filename" ) );
+ my $charset = shift // 'UTF-8';
+ warn "Read $filename with encoding: $charset" if $debug{'encoding'};
my $lang = basename($filename);
$lang =~ s/\.po$//;
@@ -336,23 +335,32 @@ sub read {
unless ( $? == 0 );
my $fh;
- my $close_fh = 1;
if ( $filename eq '-' ) {
- $fh = *STDIN;
- $close_fh = 0;
+ $fh = *STDIN;
} else {
- open $fh, "<$filename"
+ open( $fh, "<:raw:encoding($charset)", $filename )
or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
}
## Read paragraphs line-by-line
my $pofile = "";
- my $textline;
- while ( defined( $textline = <$fh> ) ) {
+ while ( defined( my $textline = <$fh> ) ) {
$pofile .= $textline;
}
- if ($close_fh) {
+ # If we did not get the charset right, reload the file with the right one
+ if ( $pofile =~ /charset=(.*?)[\s\\]/ ) {
+ my $detected_charset = $1;
+
+ if ( $detected_charset ne $charset || uc($detected_charset) ne $charset ) {
+ warn "Reloading the PO file, changing the charset from '$charset' to '$detected_charset'"
+ if $debug{'encoding'};
+ $self->read( $filename, $detected_charset );
+ return;
+ }
+ }
+
+ if ( $filename ne '-' ) {
close $fh
or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
}
@@ -508,15 +516,26 @@ sub write {
File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error
if ( length($dir) && !-e $dir );
}
- open $fh, ">$filename"
+ open( $fh, '>:raw:encoding(UTF-8)', $filename )
or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! );
}
print $fh "" . format_comment( $self->{header_comment}, "" )
if length( $self->{header_comment} );
+ # Force the encoding of PO files in UTF-8 on disk, because msgmerge can get messed up when mixing encodings
+ # See https://savannah.gnu.org/bugs/index.php?65104
+ my $header = $self->{header};
+ $header =~ /charset=([^\s\\]*)/i;
+ my $oldcharset = $1 // '';
+ warn sprintf(
+ dgettext( "po4a", "Force the encoding of %s to UTF-8 (was %s), as mixing encodings may break msgmerge.\n" ),
+ $filename, $oldcharset )
+ if $oldcharset ne 'UTF-8';
+ $header =~ s/charset=[^\s\\]*/charset=UTF-8/i;
+
print $fh "msgid \"\"\n";
- print $fh "msgstr " . quote_text( $self->{header}, $self->{options}{'wrap-po'} ) . "\n\n";
+ print $fh "msgstr " . quote_text( $header, $self->{options}{'wrap-po'} ) . "\n\n";
my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms
my $first = 1;
@@ -551,55 +570,37 @@ sub write {
if ( exists $self->{po}{$msgid}{'plural'} ) {
if ( $self->{po}{$msgid}{'plural'} == 0 ) {
- if ( $self->get_charset =~ /^utf-8$/i ) {
- my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} );
- $msgid = Encode::decode_utf8($msgid);
- $output .=
- Encode::encode_utf8( "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" );
- $buf_msgstr_plural =
- Encode::encode_utf8( "msgstr[0] " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" );
- } else {
- $output = "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
- $buf_msgstr_plural =
- "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
- }
+ $output = "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $buf_msgstr_plural =
+ "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
} elsif ( $self->{po}{$msgid}{'plural'} == 1 ) {
# TODO: there may be only one plural form
- if ( $self->get_charset =~ /^utf-8$/i ) {
- my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} );
- $msgid = Encode::decode_utf8($msgid);
- $output =
- Encode::encode_utf8( "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" );
- $output .= $buf_msgstr_plural;
- $output .=
- Encode::encode_utf8( "msgstr[1] " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" );
- $buf_msgstr_plural = "";
- } else {
- $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
- $output .= $buf_msgstr_plural;
- $output .=
- "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
- }
+ $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $output .= $buf_msgstr_plural;
+ $output .=
+ "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
} else {
die wrap_msg( dgettext( "po4a", "Cannot write PO files with more than two plural forms." ) );
}
} else {
- if ( $self->get_charset =~ /^utf-8$/i ) {
- my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} );
- $msgid = Encode::decode_utf8($msgid);
- $output .= Encode::encode_utf8( "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" );
- $output .= Encode::encode_utf8( "msgstr " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" );
- } else {
- $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
- $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
- }
+ print STDERR "STDERRR:: msgid is ".(Encode::is_utf8($msgid, Encode::FB_CROAK)?"UTF":"raw")."\n";
+ print STDERR "STDERRR:: msgstr is ".(Encode::is_utf8($self->{po}{$msgid}{'msgstr'}, Encode::FB_CROAK)?"UTF":"raw")."\n";
+ print STDERR "STDERRR:: output is ".(Encode::is_utf8($output, Encode::FB_CROAK)?"UTF":"raw")."\n";
+
+ $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
}
+ print STDERR "STDERRR:: ".(Encode::is_utf8($output)?"UTF":"raw")." $output\n";
print $fh $output;
}
print $fh join( "\n\n", @{ $self->{footer} } ) if scalar @{ $self->{footer} };
+ if ($filename ne '-') {
+ print STDERR "STDERR: CONTENT OF $filename\n\n";
+ system("cat $filename");
+ }
# print STDERR "$fh";
# if ($filename ne '-') {
# close $fh
@@ -895,29 +896,6 @@ sub filter {
return $res;
}
-=item to_utf8()
-
-Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not
-specified in the PO file ("CHARSET" value), or if it's already UTF-8 or
-ASCII.
-
-=cut
-
-sub to_utf8 {
- my $this = shift;
- my $charset = $this->get_charset();
-
- unless ( $charset eq "CHARSET"
- or $charset =~ /^ascii$/i
- or $charset =~ /^utf-8$/i )
- {
- foreach my $msgid ( keys %{ $this->{po} } ) {
- Encode::from_to( $this->{po}{$msgid}{'msgstr'}, $charset, "utf-8" );
- }
- $this->set_charset("UTF-8");
- }
-}
-
=back
=head1 Functions to use a message catalog for translations
@@ -1011,13 +989,7 @@ sub gettext {
}
if ( $opt{'wrap'} ) {
- if ( $self->get_charset =~ /^utf-8$/i ) {
- $res = Encode::decode_utf8($res);
- $res = wrap( $res, $opt{'wrapcol'} || 76 );
- $res = Encode::encode_utf8($res);
- } else {
- $res = wrap( $res, $opt{'wrapcol'} || 76 );
- }
+ $res = wrap( $res, $opt{'wrapcol'} || 76 );
}
# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
@@ -1198,12 +1170,6 @@ sub push_raw {
# } FIXME: do that iff the header isn't the default one.
$self->{header} = $msgstr;
$self->{header_comment} = $comment;
- my $charset = $self->get_charset;
- if ( $charset ne "CHARSET" ) {
- $self->{encoder} = find_encoding($charset);
- } else {
- $self->{encoder} = find_encoding("UTF-8");
- }
return;
}
@@ -1429,29 +1395,6 @@ sub get_charset() {
}
}
-=item set_charset($)
-
-This sets the character set of the PO header to the value specified in its
-first argument. If you never call this function (and no file with a specified
-character set is read), the default value is left to "UTF-8". This value
-doesn't change the behavior of this module, it's just used to fill that field
-in the header, and to return it in get_charset().
-
-=cut
-
-sub set_charset() {
- my $self = shift;
-
- my ( $newchar, $oldchar );
- $newchar = shift;
- $oldchar = $self->get_charset();
-
- if ( $newchar ne $oldchar ) {
- $self->{header} =~ s/$oldchar/$newchar/;
- $self->{encoder} = find_encoding($newchar);
- }
-}
-
#----[ helper functions ]---------------------------------------------------
# transforme the string from its PO file representation to the form which
diff --git a/lib/Locale/Po4a/TeX.pm b/lib/Locale/Po4a/TeX.pm
index 23cfcc373..8a97f4f60 100644
--- a/lib/Locale/Po4a/TeX.pm
+++ b/lib/Locale/Po4a/TeX.pm
@@ -1010,20 +1010,6 @@ sub read_file {
if ( length($textline) ) {
my @entry = ( $textline, $ref );
push @entries, @entry;
-
- # Detect if this file has non-ascii characters
- if ( $self->{TT}{ascii_input} ) {
-
- my $decoder = guess_encoding($textline);
- if ( !ref($decoder) or $decoder !~ /Encode::XS=/ ) {
-
- # We have detected a non-ascii line
- $self->{TT}{ascii_input} = 0;
-
- # Save the reference for future error message
- $self->{TT}{non_ascii_ref} ||= $ref;
- }
- }
}
}
close $in
diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm
index 991ee8993..51985e4aa 100644
--- a/lib/Locale/Po4a/TransTractor.pm
+++ b/lib/Locale/Po4a/TransTractor.pm
@@ -25,10 +25,6 @@ use Locale::Po4a::Po;
use Locale::Po4a::Common;
use File::Path; # mkdir before write
-
-use Encode;
-use Encode::Guess;
-
use File::Spec;
=encoding UTF-8
@@ -216,8 +212,7 @@ List of filenames where we should read the input document.
=item file_in_charset ($)
-Charset used in the input document (if it isn't specified, it will try
-to detect it from the input document).
+Charset used in the input document (if it isn't specified, use UTF-8).
=item file_out_name ($)
@@ -225,8 +220,7 @@ Filename where we should write the output document.
=item file_out_charset ($)
-Charset used in the output document (if it isn't specified, it will use
-the PO file charset).
+Charset used in the output document (if it isn't specified, use UTF-8).
=item po_in_name (@)
@@ -287,15 +281,8 @@ sub process {
|| $_ eq 'calldir' );
}
- if ( defined $params{'file_in_charset'} ) {
- $self->detected_charset( $params{'file_in_charset'} );
- } else {
- $params{'file_in_charset'} = '';
- }
- $self->{TT}{'file_out_charset'} = $params{'file_out_charset'} // '';
- if ( length( $self->{TT}{'file_out_charset'} ) ) {
- $self->{TT}{'file_out_encoder'} = find_encoding( $self->{TT}{'file_out_charset'} );
- }
+ $self->{TT}{'file_in_charset'} = $params{'file_in_charset'} // 'UTF-8';
+ $self->{TT}{'file_out_charset'} = $params{'file_out_charset'} // 'UTF-8';
$self->{TT}{'addendum_charset'} = $params{'addendum_charset'};
our ( $destdir, $srcdir, $calldir ) = ( $params{'destdir'}, $params{'srcdir'}, $params{'calldir'} );
@@ -405,9 +392,6 @@ sub new {
$self->{TT}{debug} = $options{'debug'};
}
- # Input document is in ascii until we prove the opposite (in read())
- $self->{TT}{ascii_input} = 1;
-
return $self;
}
@@ -438,39 +422,28 @@ sub read() {
my $self = shift;
my $filename = shift or confess "Cannot write to a file without filename";
my $refname = shift or confess "Cannot write to a file without refname";
- my $charset = shift;
- confess "read() requires a charset." unless defined $charset;
- my $linenum = 0;
+ my $charset = shift || 'UTF-8';
+ my $linenum = 0;
+
+ use warnings FATAL => 'utf8';
+ use Encode qw(:fallbacks);
+ use PerlIO::encoding;
+ $PerlIO::encoding::fallback = FB_CROAK;
- open INPUT, "<$filename"
+ my $fh;
+ open( $fh, "<:raw:encoding($charset)", $filename )
or croak wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
- while ( defined( my $textline = ) ) {
+
+ while ( defined( my $textline = <$fh> ) ) {
$linenum++;
my $ref = "$refname:$linenum";
$textline =~ s/\r$//;
my @entry = ( $textline, $ref );
push @{ $self->{TT}{doc_in} }, @entry;
-
- # Detect if this file has non-ascii characters
- if ( $self->{TT}{ascii_input} ) {
- my $decoder = guess_encoding($textline);
- if ( ref($decoder) and $decoder =~ /Encode::utf8=/ ) {
-
- # That's fine
-
- } elsif ( !ref($decoder) or $decoder !~ /Encode::XS=/ ) {
-
- # We have detected a non-ascii line
- $self->{TT}{ascii_input} = 0;
-
- # Save the reference for future error message
- $self->{TT}{non_ascii_ref} ||= $ref;
- }
- }
}
- close INPUT
- or croak wrap_msg( dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
+ close $fh
+ or croak wrap_msg( dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
}
=item write($)
@@ -486,8 +459,9 @@ This translated document data are provided by:
sub write {
my $self = shift;
my $filename = shift or confess "Cannot write to a file without filename";
- my $charset = shift;
- confess "Cannot write file '$filename' without a charset" unless defined $charset;
+ my $charset = shift || 'UTF-8';
+
+ use warnings FATAL => 'utf8';
my $fh;
if ( $filename eq '-' ) {
@@ -502,7 +476,7 @@ sub write {
File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error
if ( length($dir) && !-e $dir );
}
- open $fh, ">$filename"
+ open( $fh, ">:raw:encoding($charset)", $filename )
or croak wrap_msg( dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! );
}
@@ -584,18 +558,16 @@ This function returns a non-null integer on error.
# Internal function to read the header.
sub addendum_parse {
- my ( $filename, $charset ) = ( shift, shift );
+ my $filename = shift;
+ my $charset = shift || 'UTF-8';
my $header;
my ( $errcode, $mode, $position, $boundary, $bmode, $content ) = ( 1, "", "", "", "", "" );
- unless ( open( INS, "<$filename" ) ) {
+ unless ( open( INS, "<:raw:encoding($charset)", $filename ) ) {
warn wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
goto END_PARSE_ADDFILE;
}
- if ( length( $charset // '' ) > 0 ) {
- binmode( INS, ":encoding($charset)" );
- }
unless ( defined( $header = ) && $header ) {
warn wrap_msg( dgettext( "po4a", "Cannot read po4a header from %s." ), $filename );
@@ -933,30 +905,6 @@ sub translate {
# unless $validoption{$_};
# }
- my $in_charset;
- if ( $self->{TT}{ascii_input} ) {
- $in_charset = "UTF-8";
- } else {
- if ( ( $self->{TT}{'file_in_charset'} // '' ) !~ m/ascii/i ) { # // '' to have a default value
- $in_charset = $self->{TT}{'file_in_charset'} // "UTF-8";
- } else {
-
- # The document charset have to be determined *before* we see the first string to recode.
- die wrap_mod(
- "po4a",
- dgettext(
- "po4a",
- "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)"
- ),
- $self->{TT}{non_ascii_ref}
- );
- }
- }
-
- if ( $self->{TT}{po_in}->get_charset ne "CHARSET" ) {
- $string = encode_from_to( $string, $self->{TT}{'file_in_encoder'}, $self->{TT}{po_in}{encoder} );
- }
-
if ( defined $options{'wrapcol'} && $options{'wrapcol'} < 0 ) {
# FIXME: should be the parameter given with --width
@@ -968,32 +916,6 @@ sub translate {
'wrapcol' => $options{'wrapcol'}
);
- if ( $self->{TT}{po_in}->get_charset ne "CHARSET" ) {
- my $out_encoder = $self->{TT}{'file_out_encoder'};
- unless ( defined $out_encoder ) {
- $out_encoder = find_encoding( $self->get_out_charset );
- }
- $transstring = encode_from_to( $transstring, $self->{TT}{po_in}{encoder}, $out_encoder );
- }
-
- # If the input document isn't completely in ascii, we should see what to
- # do with the current string
- unless ( $self->{TT}{ascii_input} ) {
- my $out_charset = $self->{TT}{po_out}->get_charset;
-
- # We set the output po charset
- if ( $out_charset eq "CHARSET" || $out_charset eq '' ) {
- $out_charset = "UTF-8";
- $self->{TT}{po_out}->set_charset($out_charset);
- }
- if ( $in_charset ne '' and $in_charset !~ /^$out_charset$/i ) {
- Encode::from_to( $string, $in_charset, $out_charset );
- if ( length( $options{'comment'} ) ) {
- Encode::from_to( $options{'comment'}, $in_charset, $out_charset );
- }
- }
- }
-
# the comments provided by the modules are automatic comments from the PO point of view
$self->{TT}{po_out}->push(
'msgid' => $string,
@@ -1005,11 +927,6 @@ sub translate {
'wrapcol' => $options{'wrapcol'}
);
- # if ($self->{TT}{po_in}->get_charset ne "CHARSET") {
- # Encode::from_to($transstring,$self->{TT}{po_in}->get_charset,
- # $self->get_out_charset);
- # }
-
if ( $options{'wrap'} || 0 ) {
$transstring =~ s/( *)$//s;
my $trailing_spaces = $1 || "";
@@ -1050,32 +967,6 @@ sub debug {
return $_[0]->{TT}{debug};
}
-=item detected_charset($)
-
-This tells TransTractor that a new charset (the first argument) has been
-detected from the input document. It can usually be read from the document
-header. Only the first charset will remain, coming either from the
-process() arguments or detected from the document.
-
-=cut
-
-sub detected_charset {
- my ( $self, $charset ) = ( shift, shift );
- $charset //= "UTF-8";
- unless ( length( $self->{TT}{'file_in_charset'} ) ) {
- $self->{TT}{'file_in_charset'} = $charset;
- croak "Please provide a valid charset to detected_charset()" unless defined $charset;
- $self->{TT}{'file_in_encoder'} = find_encoding($charset);
- }
-
- if ( length $self->{TT}{'file_in_charset'}
- and $self->{TT}{'file_in_charset'} !~ m/ascii/i
- and $self->{TT}{'file_in_charset'} ne "UTF-8" )
- {
- $self->{TT}{ascii_input} = 0;
- }
-}
-
=item get_out_charset()
This function will return the charset that should be used in the output
@@ -1091,86 +982,16 @@ encoding is performed.
sub get_out_charset {
my $self = shift;
- my $charset;
- # Use the value specified at the command line
- if ( length( $self->{TT}{'file_out_charset'} ) ) {
- $charset = $self->{TT}{'file_out_charset'};
- } else {
- if ( $self->{TT}{ascii_input} ) {
- $charset = "UTF-8";
- } else {
- $charset = $self->{TT}{po_in}->get_charset;
- $charset = $self->{TT}{'file_in_charset'}
- if $charset eq "CHARSET"
- and length( $self->{TT}{'file_in_charset'} );
- $charset = "ascii"
- if $charset eq "CHARSET";
- }
- }
- return $charset;
-}
-
-=item recode_skipped_text($)
+ # Prefer the value specified on the command line
+ return $self->{TT}{'file_out_charset'}
+ if length( $self->{TT}{'file_out_charset'} );
-This function returns the recoded text passed as argument, from the input
-document's charset to the output document's one. This isn't needed when
-translating a string (translate() recodes everything itself), but it is when
-you skip a string from the input document and you want the output document to
-be consistent with the global encoding.
+ return $self->{TT}{po_in}->get_charset if $self->{TT}{po_in}->get_charset ne 'CHARSET';
-=cut
-
-sub recode_skipped_text {
- my ( $self, $text ) = ( shift, shift );
- unless ( $self->{TT}{'ascii_input'} ) {
- if ( length( $self->{TT}{'file_in_charset'} ) ) {
- $text = encode_from_to( $text, $self->{TT}{'file_in_encoder'}, find_encoding( $self->get_out_charset ) );
- } else {
- die wrap_mod(
- "po4a",
- dgettext(
- "po4a",
- "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)"
- ),
- $self->{TT}{non_ascii_ref}
- );
- }
- }
- return $text;
-}
-
-# encode_from_to($,$,$)
-#
-# Encode the given text from one encoding to another one.
-# It differs from Encode::from_to because it does not take the name of the
-# encoding in argument, but the encoders (as returned by the
-# Encode::find_encoding() method). Thus it permits to save a bunch
-# of call to find_encoding.
-#
-# If the "from" encoding is undefined, it is considered as UTF-8 (or
-# ascii).
-# If the "to" encoding is undefined, it is considered as UTF-8.
-#
-sub encode_from_to {
- my ( $text, $from, $to ) = ( shift, shift, shift );
-
- if ( not defined $from ) {
-
- # for ascii and UTF-8, no conversion needed to get an utf-8
- # string.
- } else {
- $text = $from->decode( $text, 0 );
- }
-
- if ( not defined $to ) {
-
- # Already in UTF-8, no conversion needed
- } else {
- $text = $to->encode( $text, 0 );
- }
+ return $self->{TT}{'file_in_charset'} if length( $self->{TT}{'file_in_charset'} );
- return $text;
+ return 'UTF-8';
}
# Push the translation of a Yaml document or Yaml Front-Matter header, parsed by YAML::Tiny in any case
diff --git a/po4a b/po4a
index c6c1aabbf..a6336cbf2 100755
--- a/po4a
+++ b/po4a
@@ -1595,7 +1595,6 @@ if ($update_pot_file) {
}
}
$pot_options{'pot-language'} = $po4a_opts{"mastlang"};
- $pot_options{'pot-charset'} = $po4a_opts{"mastchar"};
my $potfile = Locale::Po4a::Po->new( \%pot_options );
foreach my $master (
sort {
@@ -1622,9 +1621,13 @@ if ($update_pot_file) {
my @file_in_name;
push @file_in_name, $pot_in;
+ # Do not provide obvious options
+ my $mastchar = $file_opts{"mastchar"};
+ $mastchar = undef if (defined $mastchar && $mastchar eq 'UTF-8');
+
$doc->process(
'file_in_name' => \@file_in_name,
- 'file_in_charset' => $file_opts{"mastchar"},
+ 'file_in_charset' => $mastchar,
'srcdir' => $po4a_opts{"srcdir"},
'destdir' => $po4a_opts{"destdir"},
'calldir' => $po4a_opts{"calldir"}
diff --git a/t/charset.t b/t/charset.t
index 62812a2bd..4bb6c81c9 100644
--- a/t/charset.t
+++ b/t/charset.t
@@ -12,35 +12,35 @@ use Testhelper;
my @tests;
push @tests,
+# {
+# 'doc' => 'master encoding: ascii',
+# 'po4a.conf' => 'charset/input-ascii/po4a.conf',
+# 'closed_path' => 'charset/*/',
+# 'options' => '--keep 0',
+# 'expected_files' => 'ascii.up.po ascii.pot ascii.up.pod ',
+# },
+# {
+# 'doc' => 'master encoding: iso8859',
+# 'po4a.conf' => 'charset/input-iso8859/po4a.conf',
+# 'closed_path' => 'charset/*/',
+# 'options' => '--keep 0',
+# 'expected_files' => 'iso8859.up.po iso8859.pot iso8859.up.pod ',
+# },
{
- 'doc' => 'master encoding: ascii',
- 'po4a.conf' => 'charset/input-ascii/po4a.conf',
- 'closed_path' => 'charset/*/',
- 'options' => '--keep 0',
- 'expected_files' => 'ascii.up.po ascii.pot ascii.up.pod ',
- },
- {
- 'doc' => 'master encoding: iso8859',
- 'po4a.conf' => 'charset/input-iso8859/po4a.conf',
- 'closed_path' => 'charset/*/',
- 'options' => '--keep 0',
- 'expected_files' => 'iso8859.up.po iso8859.pot iso8859.up.pod ',
- },
- {
- 'doc' => 'master encoding: UTF-8 (mandates --master-charset=UTF-8)',
+ 'doc' => 'master encoding: UTF-8 ',
'po4a.conf' => 'charset/input-utf8/po4a.conf',
'closed_path' => 'charset/*/',
'options' => '--keep 0',
'expected_files' => 'utf8.up.po utf8.pot utf8.up.pod ',
},
{
- 'format' => 'asciidoc',
- 'options' => '-M UTF-8',
- 'input' => "charset/asciidoc/CharsetUtf.adoc",
+ 'format' => 'asciidoc',
+ 'input' => "charset/asciidoc/CharsetUtf.adoc",
},
{
- 'format' => 'asciidoc',
- 'input' => "charset/asciidoc/CharsetLatin1.adoc",
+ 'format' => 'asciidoc',
+ 'options' => '-M iso-8859-1',
+ 'input' => "charset/asciidoc/CharsetLatin1.adoc",
},
{
'format' => 'yaml',
diff --git a/t/charset/input-iso8859/po4a.conf b/t/charset/input-iso8859/po4a.conf
index b838bb44e..fe0124df2 100644
--- a/t/charset/input-iso8859/po4a.conf
+++ b/t/charset/input-iso8859/po4a.conf
@@ -1,4 +1,4 @@
[po4a_paths] iso8859.pot up:iso8859.up.po
-[options] opt:"--msgmerge-opt --silent"
+[options] opt:"--msgmerge-opt --silent --master-charset ISO-8859-1"
[type:pod] iso8859.pod up:iso8859.up.pod
diff --git a/t/charset/input-utf8/po4a.conf b/t/charset/input-utf8/po4a.conf
index 2859d2e06..2968bb59c 100644
--- a/t/charset/input-utf8/po4a.conf
+++ b/t/charset/input-utf8/po4a.conf
@@ -1,4 +1,4 @@
[po4a_paths] utf8.pot up:utf8.up.po
-[options] opt:"--msgmerge-opt --silent --master-charset=UTF-8"
+[options] opt:"--msgmerge-opt --silent --master-charset UTF-8"
[type:pod] utf8.pod up:utf8.up.pod