diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm index eaf018ad8..cfadd635e 100644 --- a/lib/Locale/Po4a/Po.pm +++ b/lib/Locale/Po4a/Po.pm @@ -225,7 +225,6 @@ sub initialize { $self->{options}{'package-name'} = "PACKAGE"; $self->{options}{'package-version'} = "VERSION"; $self->{options}{'wrap-po'} = 76; - $self->{options}{'pot-charset'} = "UTF-8"; $self->{options}{'pot-language'} = ""; foreach my $opt ( keys %$options ) { @@ -296,13 +295,11 @@ sub initialize { . "Language: " . $self->{options}{'pot-language'} . "\n" . "MIME-Version: 1.0\n" - . "Content-Type: text/plain; charset=" - . $self->{options}{'pot-charset'} . "\n" + . "Content-Type: text/plain; charset=UTF-8\n" . "Content-Transfer-Encoding: 8bit\n" ); - $self->{encoder} = find_encoding("UTF-8"); - $self->{footer} = []; + $self->{footer} = []; # To make stats about gettext hits $self->stats_clear(); @@ -320,6 +317,8 @@ sub read { my $self = shift; my $filename = shift or croak wrap_mod( "po4a::po", dgettext( "po4a", "Please provide a non-null filename" ) ); + my $charset = shift // 'UTF-8'; + warn "Read $filename with encoding: $charset" if $debug{'encoding'}; my $lang = basename($filename); $lang =~ s/\.po$//; @@ -336,23 +335,32 @@ sub read { unless ( $? == 0 ); my $fh; - my $close_fh = 1; if ( $filename eq '-' ) { - $fh = *STDIN; - $close_fh = 0; + $fh = *STDIN; } else { - open $fh, "<$filename" + open( $fh, "<:raw:encoding($charset)", $filename ) or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); } ## Read paragraphs line-by-line my $pofile = ""; - my $textline; - while ( defined( $textline = <$fh> ) ) { + while ( defined( my $textline = <$fh> ) ) { $pofile .= $textline; } - if ($close_fh) { + # If we did not get the charset right, reload the file with the right one + if ( $pofile =~ /charset=(.*?)[\s\\]/ ) { + my $detected_charset = $1; + + if ( $detected_charset ne $charset || uc($detected_charset) ne $charset ) { + warn "Reloading the PO file, changing the charset from '$charset' to '$detected_charset'" + if $debug{'encoding'}; + $self->read( $filename, $detected_charset ); + return; + } + } + + if ( $filename ne '-' ) { close $fh or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! ); } @@ -508,15 +516,26 @@ sub write { File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error if ( length($dir) && !-e $dir ); } - open $fh, ">$filename" + open( $fh, '>:raw:encoding(UTF-8)', $filename ) or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! ); } print $fh "" . format_comment( $self->{header_comment}, "" ) if length( $self->{header_comment} ); + # Force the encoding of PO files in UTF-8 on disk, because msgmerge can get messed up when mixing encodings + # See https://savannah.gnu.org/bugs/index.php?65104 + my $header = $self->{header}; + $header =~ /charset=([^\s\\]*)/i; + my $oldcharset = $1 // ''; + warn sprintf( + dgettext( "po4a", "Force the encoding of %s to UTF-8 (was %s), as mixing encodings may break msgmerge.\n" ), + $filename, $oldcharset ) + if $oldcharset ne 'UTF-8'; + $header =~ s/charset=[^\s\\]*/charset=UTF-8/i; + print $fh "msgid \"\"\n"; - print $fh "msgstr " . quote_text( $self->{header}, $self->{options}{'wrap-po'} ) . "\n\n"; + print $fh "msgstr " . quote_text( $header, $self->{options}{'wrap-po'} ) . "\n\n"; my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms my $first = 1; @@ -551,55 +570,37 @@ sub write { if ( exists $self->{po}{$msgid}{'plural'} ) { if ( $self->{po}{$msgid}{'plural'} == 0 ) { - if ( $self->get_charset =~ /^utf-8$/i ) { - my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} ); - $msgid = Encode::decode_utf8($msgid); - $output .= - Encode::encode_utf8( "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" ); - $buf_msgstr_plural = - Encode::encode_utf8( "msgstr[0] " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" ); - } else { - $output = "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; - $buf_msgstr_plural = - "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; - } + $output = "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; + $buf_msgstr_plural = + "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; } elsif ( $self->{po}{$msgid}{'plural'} == 1 ) { # TODO: there may be only one plural form - if ( $self->get_charset =~ /^utf-8$/i ) { - my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} ); - $msgid = Encode::decode_utf8($msgid); - $output = - Encode::encode_utf8( "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" ); - $output .= $buf_msgstr_plural; - $output .= - Encode::encode_utf8( "msgstr[1] " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" ); - $buf_msgstr_plural = ""; - } else { - $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; - $output .= $buf_msgstr_plural; - $output .= - "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; - } + $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; + $output .= $buf_msgstr_plural; + $output .= + "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; } else { die wrap_msg( dgettext( "po4a", "Cannot write PO files with more than two plural forms." ) ); } } else { - if ( $self->get_charset =~ /^utf-8$/i ) { - my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} ); - $msgid = Encode::decode_utf8($msgid); - $output .= Encode::encode_utf8( "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" ); - $output .= Encode::encode_utf8( "msgstr " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" ); - } else { - $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; - $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; - } + print STDERR "STDERRR:: msgid is ".(Encode::is_utf8($msgid, Encode::FB_CROAK)?"UTF":"raw")."\n"; + print STDERR "STDERRR:: msgstr is ".(Encode::is_utf8($self->{po}{$msgid}{'msgstr'}, Encode::FB_CROAK)?"UTF":"raw")."\n"; + print STDERR "STDERRR:: output is ".(Encode::is_utf8($output, Encode::FB_CROAK)?"UTF":"raw")."\n"; + + $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; + $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; } + print STDERR "STDERRR:: ".(Encode::is_utf8($output)?"UTF":"raw")." $output\n"; print $fh $output; } print $fh join( "\n\n", @{ $self->{footer} } ) if scalar @{ $self->{footer} }; + if ($filename ne '-') { + print STDERR "STDERR: CONTENT OF $filename\n\n"; + system("cat $filename"); + } # print STDERR "$fh"; # if ($filename ne '-') { # close $fh @@ -895,29 +896,6 @@ sub filter { return $res; } -=item to_utf8() - -Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not -specified in the PO file ("CHARSET" value), or if it's already UTF-8 or -ASCII. - -=cut - -sub to_utf8 { - my $this = shift; - my $charset = $this->get_charset(); - - unless ( $charset eq "CHARSET" - or $charset =~ /^ascii$/i - or $charset =~ /^utf-8$/i ) - { - foreach my $msgid ( keys %{ $this->{po} } ) { - Encode::from_to( $this->{po}{$msgid}{'msgstr'}, $charset, "utf-8" ); - } - $this->set_charset("UTF-8"); - } -} - =back =head1 Functions to use a message catalog for translations @@ -1011,13 +989,7 @@ sub gettext { } if ( $opt{'wrap'} ) { - if ( $self->get_charset =~ /^utf-8$/i ) { - $res = Encode::decode_utf8($res); - $res = wrap( $res, $opt{'wrapcol'} || 76 ); - $res = Encode::encode_utf8($res); - } else { - $res = wrap( $res, $opt{'wrapcol'} || 76 ); - } + $res = wrap( $res, $opt{'wrapcol'} || 76 ); } # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n"; @@ -1198,12 +1170,6 @@ sub push_raw { # } FIXME: do that iff the header isn't the default one. $self->{header} = $msgstr; $self->{header_comment} = $comment; - my $charset = $self->get_charset; - if ( $charset ne "CHARSET" ) { - $self->{encoder} = find_encoding($charset); - } else { - $self->{encoder} = find_encoding("UTF-8"); - } return; } @@ -1429,29 +1395,6 @@ sub get_charset() { } } -=item set_charset($) - -This sets the character set of the PO header to the value specified in its -first argument. If you never call this function (and no file with a specified -character set is read), the default value is left to "UTF-8". This value -doesn't change the behavior of this module, it's just used to fill that field -in the header, and to return it in get_charset(). - -=cut - -sub set_charset() { - my $self = shift; - - my ( $newchar, $oldchar ); - $newchar = shift; - $oldchar = $self->get_charset(); - - if ( $newchar ne $oldchar ) { - $self->{header} =~ s/$oldchar/$newchar/; - $self->{encoder} = find_encoding($newchar); - } -} - #----[ helper functions ]--------------------------------------------------- # transforme the string from its PO file representation to the form which diff --git a/lib/Locale/Po4a/TeX.pm b/lib/Locale/Po4a/TeX.pm index 23cfcc373..8a97f4f60 100644 --- a/lib/Locale/Po4a/TeX.pm +++ b/lib/Locale/Po4a/TeX.pm @@ -1010,20 +1010,6 @@ sub read_file { if ( length($textline) ) { my @entry = ( $textline, $ref ); push @entries, @entry; - - # Detect if this file has non-ascii characters - if ( $self->{TT}{ascii_input} ) { - - my $decoder = guess_encoding($textline); - if ( !ref($decoder) or $decoder !~ /Encode::XS=/ ) { - - # We have detected a non-ascii line - $self->{TT}{ascii_input} = 0; - - # Save the reference for future error message - $self->{TT}{non_ascii_ref} ||= $ref; - } - } } } close $in diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm index 991ee8993..51985e4aa 100644 --- a/lib/Locale/Po4a/TransTractor.pm +++ b/lib/Locale/Po4a/TransTractor.pm @@ -25,10 +25,6 @@ use Locale::Po4a::Po; use Locale::Po4a::Common; use File::Path; # mkdir before write - -use Encode; -use Encode::Guess; - use File::Spec; =encoding UTF-8 @@ -216,8 +212,7 @@ List of filenames where we should read the input document. =item file_in_charset ($) -Charset used in the input document (if it isn't specified, it will try -to detect it from the input document). +Charset used in the input document (if it isn't specified, use UTF-8). =item file_out_name ($) @@ -225,8 +220,7 @@ Filename where we should write the output document. =item file_out_charset ($) -Charset used in the output document (if it isn't specified, it will use -the PO file charset). +Charset used in the output document (if it isn't specified, use UTF-8). =item po_in_name (@) @@ -287,15 +281,8 @@ sub process { || $_ eq 'calldir' ); } - if ( defined $params{'file_in_charset'} ) { - $self->detected_charset( $params{'file_in_charset'} ); - } else { - $params{'file_in_charset'} = ''; - } - $self->{TT}{'file_out_charset'} = $params{'file_out_charset'} // ''; - if ( length( $self->{TT}{'file_out_charset'} ) ) { - $self->{TT}{'file_out_encoder'} = find_encoding( $self->{TT}{'file_out_charset'} ); - } + $self->{TT}{'file_in_charset'} = $params{'file_in_charset'} // 'UTF-8'; + $self->{TT}{'file_out_charset'} = $params{'file_out_charset'} // 'UTF-8'; $self->{TT}{'addendum_charset'} = $params{'addendum_charset'}; our ( $destdir, $srcdir, $calldir ) = ( $params{'destdir'}, $params{'srcdir'}, $params{'calldir'} ); @@ -405,9 +392,6 @@ sub new { $self->{TT}{debug} = $options{'debug'}; } - # Input document is in ascii until we prove the opposite (in read()) - $self->{TT}{ascii_input} = 1; - return $self; } @@ -438,39 +422,28 @@ sub read() { my $self = shift; my $filename = shift or confess "Cannot write to a file without filename"; my $refname = shift or confess "Cannot write to a file without refname"; - my $charset = shift; - confess "read() requires a charset." unless defined $charset; - my $linenum = 0; + my $charset = shift || 'UTF-8'; + my $linenum = 0; + + use warnings FATAL => 'utf8'; + use Encode qw(:fallbacks); + use PerlIO::encoding; + $PerlIO::encoding::fallback = FB_CROAK; - open INPUT, "<$filename" + my $fh; + open( $fh, "<:raw:encoding($charset)", $filename ) or croak wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); - while ( defined( my $textline = ) ) { + + while ( defined( my $textline = <$fh> ) ) { $linenum++; my $ref = "$refname:$linenum"; $textline =~ s/\r$//; my @entry = ( $textline, $ref ); push @{ $self->{TT}{doc_in} }, @entry; - - # Detect if this file has non-ascii characters - if ( $self->{TT}{ascii_input} ) { - my $decoder = guess_encoding($textline); - if ( ref($decoder) and $decoder =~ /Encode::utf8=/ ) { - - # That's fine - - } elsif ( !ref($decoder) or $decoder !~ /Encode::XS=/ ) { - - # We have detected a non-ascii line - $self->{TT}{ascii_input} = 0; - - # Save the reference for future error message - $self->{TT}{non_ascii_ref} ||= $ref; - } - } } - close INPUT - or croak wrap_msg( dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! ); + close $fh + or croak wrap_msg( dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! ); } =item write($) @@ -486,8 +459,9 @@ This translated document data are provided by: sub write { my $self = shift; my $filename = shift or confess "Cannot write to a file without filename"; - my $charset = shift; - confess "Cannot write file '$filename' without a charset" unless defined $charset; + my $charset = shift || 'UTF-8'; + + use warnings FATAL => 'utf8'; my $fh; if ( $filename eq '-' ) { @@ -502,7 +476,7 @@ sub write { File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error if ( length($dir) && !-e $dir ); } - open $fh, ">$filename" + open( $fh, ">:raw:encoding($charset)", $filename ) or croak wrap_msg( dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! ); } @@ -584,18 +558,16 @@ This function returns a non-null integer on error. # Internal function to read the header. sub addendum_parse { - my ( $filename, $charset ) = ( shift, shift ); + my $filename = shift; + my $charset = shift || 'UTF-8'; my $header; my ( $errcode, $mode, $position, $boundary, $bmode, $content ) = ( 1, "", "", "", "", "" ); - unless ( open( INS, "<$filename" ) ) { + unless ( open( INS, "<:raw:encoding($charset)", $filename ) ) { warn wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); goto END_PARSE_ADDFILE; } - if ( length( $charset // '' ) > 0 ) { - binmode( INS, ":encoding($charset)" ); - } unless ( defined( $header = ) && $header ) { warn wrap_msg( dgettext( "po4a", "Cannot read po4a header from %s." ), $filename ); @@ -933,30 +905,6 @@ sub translate { # unless $validoption{$_}; # } - my $in_charset; - if ( $self->{TT}{ascii_input} ) { - $in_charset = "UTF-8"; - } else { - if ( ( $self->{TT}{'file_in_charset'} // '' ) !~ m/ascii/i ) { # // '' to have a default value - $in_charset = $self->{TT}{'file_in_charset'} // "UTF-8"; - } else { - - # The document charset have to be determined *before* we see the first string to recode. - die wrap_mod( - "po4a", - dgettext( - "po4a", - "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)" - ), - $self->{TT}{non_ascii_ref} - ); - } - } - - if ( $self->{TT}{po_in}->get_charset ne "CHARSET" ) { - $string = encode_from_to( $string, $self->{TT}{'file_in_encoder'}, $self->{TT}{po_in}{encoder} ); - } - if ( defined $options{'wrapcol'} && $options{'wrapcol'} < 0 ) { # FIXME: should be the parameter given with --width @@ -968,32 +916,6 @@ sub translate { 'wrapcol' => $options{'wrapcol'} ); - if ( $self->{TT}{po_in}->get_charset ne "CHARSET" ) { - my $out_encoder = $self->{TT}{'file_out_encoder'}; - unless ( defined $out_encoder ) { - $out_encoder = find_encoding( $self->get_out_charset ); - } - $transstring = encode_from_to( $transstring, $self->{TT}{po_in}{encoder}, $out_encoder ); - } - - # If the input document isn't completely in ascii, we should see what to - # do with the current string - unless ( $self->{TT}{ascii_input} ) { - my $out_charset = $self->{TT}{po_out}->get_charset; - - # We set the output po charset - if ( $out_charset eq "CHARSET" || $out_charset eq '' ) { - $out_charset = "UTF-8"; - $self->{TT}{po_out}->set_charset($out_charset); - } - if ( $in_charset ne '' and $in_charset !~ /^$out_charset$/i ) { - Encode::from_to( $string, $in_charset, $out_charset ); - if ( length( $options{'comment'} ) ) { - Encode::from_to( $options{'comment'}, $in_charset, $out_charset ); - } - } - } - # the comments provided by the modules are automatic comments from the PO point of view $self->{TT}{po_out}->push( 'msgid' => $string, @@ -1005,11 +927,6 @@ sub translate { 'wrapcol' => $options{'wrapcol'} ); - # if ($self->{TT}{po_in}->get_charset ne "CHARSET") { - # Encode::from_to($transstring,$self->{TT}{po_in}->get_charset, - # $self->get_out_charset); - # } - if ( $options{'wrap'} || 0 ) { $transstring =~ s/( *)$//s; my $trailing_spaces = $1 || ""; @@ -1050,32 +967,6 @@ sub debug { return $_[0]->{TT}{debug}; } -=item detected_charset($) - -This tells TransTractor that a new charset (the first argument) has been -detected from the input document. It can usually be read from the document -header. Only the first charset will remain, coming either from the -process() arguments or detected from the document. - -=cut - -sub detected_charset { - my ( $self, $charset ) = ( shift, shift ); - $charset //= "UTF-8"; - unless ( length( $self->{TT}{'file_in_charset'} ) ) { - $self->{TT}{'file_in_charset'} = $charset; - croak "Please provide a valid charset to detected_charset()" unless defined $charset; - $self->{TT}{'file_in_encoder'} = find_encoding($charset); - } - - if ( length $self->{TT}{'file_in_charset'} - and $self->{TT}{'file_in_charset'} !~ m/ascii/i - and $self->{TT}{'file_in_charset'} ne "UTF-8" ) - { - $self->{TT}{ascii_input} = 0; - } -} - =item get_out_charset() This function will return the charset that should be used in the output @@ -1091,86 +982,16 @@ encoding is performed. sub get_out_charset { my $self = shift; - my $charset; - # Use the value specified at the command line - if ( length( $self->{TT}{'file_out_charset'} ) ) { - $charset = $self->{TT}{'file_out_charset'}; - } else { - if ( $self->{TT}{ascii_input} ) { - $charset = "UTF-8"; - } else { - $charset = $self->{TT}{po_in}->get_charset; - $charset = $self->{TT}{'file_in_charset'} - if $charset eq "CHARSET" - and length( $self->{TT}{'file_in_charset'} ); - $charset = "ascii" - if $charset eq "CHARSET"; - } - } - return $charset; -} - -=item recode_skipped_text($) + # Prefer the value specified on the command line + return $self->{TT}{'file_out_charset'} + if length( $self->{TT}{'file_out_charset'} ); -This function returns the recoded text passed as argument, from the input -document's charset to the output document's one. This isn't needed when -translating a string (translate() recodes everything itself), but it is when -you skip a string from the input document and you want the output document to -be consistent with the global encoding. + return $self->{TT}{po_in}->get_charset if $self->{TT}{po_in}->get_charset ne 'CHARSET'; -=cut - -sub recode_skipped_text { - my ( $self, $text ) = ( shift, shift ); - unless ( $self->{TT}{'ascii_input'} ) { - if ( length( $self->{TT}{'file_in_charset'} ) ) { - $text = encode_from_to( $text, $self->{TT}{'file_in_encoder'}, find_encoding( $self->get_out_charset ) ); - } else { - die wrap_mod( - "po4a", - dgettext( - "po4a", - "Couldn't determine the input document's charset. Please specify it on the command line. (non-ASCII char at %s)" - ), - $self->{TT}{non_ascii_ref} - ); - } - } - return $text; -} - -# encode_from_to($,$,$) -# -# Encode the given text from one encoding to another one. -# It differs from Encode::from_to because it does not take the name of the -# encoding in argument, but the encoders (as returned by the -# Encode::find_encoding() method). Thus it permits to save a bunch -# of call to find_encoding. -# -# If the "from" encoding is undefined, it is considered as UTF-8 (or -# ascii). -# If the "to" encoding is undefined, it is considered as UTF-8. -# -sub encode_from_to { - my ( $text, $from, $to ) = ( shift, shift, shift ); - - if ( not defined $from ) { - - # for ascii and UTF-8, no conversion needed to get an utf-8 - # string. - } else { - $text = $from->decode( $text, 0 ); - } - - if ( not defined $to ) { - - # Already in UTF-8, no conversion needed - } else { - $text = $to->encode( $text, 0 ); - } + return $self->{TT}{'file_in_charset'} if length( $self->{TT}{'file_in_charset'} ); - return $text; + return 'UTF-8'; } # Push the translation of a Yaml document or Yaml Front-Matter header, parsed by YAML::Tiny in any case diff --git a/po4a b/po4a index c6c1aabbf..a6336cbf2 100755 --- a/po4a +++ b/po4a @@ -1595,7 +1595,6 @@ if ($update_pot_file) { } } $pot_options{'pot-language'} = $po4a_opts{"mastlang"}; - $pot_options{'pot-charset'} = $po4a_opts{"mastchar"}; my $potfile = Locale::Po4a::Po->new( \%pot_options ); foreach my $master ( sort { @@ -1622,9 +1621,13 @@ if ($update_pot_file) { my @file_in_name; push @file_in_name, $pot_in; + # Do not provide obvious options + my $mastchar = $file_opts{"mastchar"}; + $mastchar = undef if (defined $mastchar && $mastchar eq 'UTF-8'); + $doc->process( 'file_in_name' => \@file_in_name, - 'file_in_charset' => $file_opts{"mastchar"}, + 'file_in_charset' => $mastchar, 'srcdir' => $po4a_opts{"srcdir"}, 'destdir' => $po4a_opts{"destdir"}, 'calldir' => $po4a_opts{"calldir"} diff --git a/t/charset.t b/t/charset.t index 62812a2bd..4bb6c81c9 100644 --- a/t/charset.t +++ b/t/charset.t @@ -12,35 +12,35 @@ use Testhelper; my @tests; push @tests, +# { +# 'doc' => 'master encoding: ascii', +# 'po4a.conf' => 'charset/input-ascii/po4a.conf', +# 'closed_path' => 'charset/*/', +# 'options' => '--keep 0', +# 'expected_files' => 'ascii.up.po ascii.pot ascii.up.pod ', +# }, +# { +# 'doc' => 'master encoding: iso8859', +# 'po4a.conf' => 'charset/input-iso8859/po4a.conf', +# 'closed_path' => 'charset/*/', +# 'options' => '--keep 0', +# 'expected_files' => 'iso8859.up.po iso8859.pot iso8859.up.pod ', +# }, { - 'doc' => 'master encoding: ascii', - 'po4a.conf' => 'charset/input-ascii/po4a.conf', - 'closed_path' => 'charset/*/', - 'options' => '--keep 0', - 'expected_files' => 'ascii.up.po ascii.pot ascii.up.pod ', - }, - { - 'doc' => 'master encoding: iso8859', - 'po4a.conf' => 'charset/input-iso8859/po4a.conf', - 'closed_path' => 'charset/*/', - 'options' => '--keep 0', - 'expected_files' => 'iso8859.up.po iso8859.pot iso8859.up.pod ', - }, - { - 'doc' => 'master encoding: UTF-8 (mandates --master-charset=UTF-8)', + 'doc' => 'master encoding: UTF-8 ', 'po4a.conf' => 'charset/input-utf8/po4a.conf', 'closed_path' => 'charset/*/', 'options' => '--keep 0', 'expected_files' => 'utf8.up.po utf8.pot utf8.up.pod ', }, { - 'format' => 'asciidoc', - 'options' => '-M UTF-8', - 'input' => "charset/asciidoc/CharsetUtf.adoc", + 'format' => 'asciidoc', + 'input' => "charset/asciidoc/CharsetUtf.adoc", }, { - 'format' => 'asciidoc', - 'input' => "charset/asciidoc/CharsetLatin1.adoc", + 'format' => 'asciidoc', + 'options' => '-M iso-8859-1', + 'input' => "charset/asciidoc/CharsetLatin1.adoc", }, { 'format' => 'yaml', diff --git a/t/charset/input-iso8859/po4a.conf b/t/charset/input-iso8859/po4a.conf index b838bb44e..fe0124df2 100644 --- a/t/charset/input-iso8859/po4a.conf +++ b/t/charset/input-iso8859/po4a.conf @@ -1,4 +1,4 @@ [po4a_paths] iso8859.pot up:iso8859.up.po -[options] opt:"--msgmerge-opt --silent" +[options] opt:"--msgmerge-opt --silent --master-charset ISO-8859-1" [type:pod] iso8859.pod up:iso8859.up.pod diff --git a/t/charset/input-utf8/po4a.conf b/t/charset/input-utf8/po4a.conf index 2859d2e06..2968bb59c 100644 --- a/t/charset/input-utf8/po4a.conf +++ b/t/charset/input-utf8/po4a.conf @@ -1,4 +1,4 @@ [po4a_paths] utf8.pot up:utf8.up.po -[options] opt:"--msgmerge-opt --silent --master-charset=UTF-8" +[options] opt:"--msgmerge-opt --silent --master-charset UTF-8" [type:pod] utf8.pod up:utf8.up.pod