Jcode::CP932っぽいもの
Jcode::CP932っぽいもの
試しに作ってみた。基本的にはUnicodeとの変換をEncodeのCP932を使うように変更しただけです*1。
他にJcode-2.0以降に含まれるJcode::_Classicと::Constantsと::Trと::H2Zが必要。
EncodeのFallbackとかどうにかしたほうがいいんだろうなぁ、とか思いつつ。Jcodeのt/*が通るように調整中。
package Jcode::CP932; use 5.008001; our $VERSION = '0.01'; use Carp; use warnings; use strict; use Exporter; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG); require Jcode::_Classic; @ISA = qw(Jcode::_Classic Exporter); @EXPORT = qw(jcode getcode); @EXPORT_OK = qw($RCSID $VERSION $DEBUG); %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] ); $DEBUG = 0; use overload q("") => sub { $_[0]->euc }, q(==) => sub { overload::StrVal($_[0]) eq overload::StrVal($_[1]) }, q(.=) => sub { $_[0]->append( $_[1] ) }, fallback => 1, ; #for my $sub (qw/jcode getcode convert load_module/){ for my $sub (qw/getcode convert/){ no strict 'refs'; *{$sub} = \&{'Jcode::_Classic::' . $sub }; } *Jcode::_Classic::load_module = sub {}; #for my $enc (qw/sjis jis ucs2 utf8/){ for my $enc (qw/sjis jis/) { no strict 'refs'; *{"euc_" . $enc} = \&{"Jcode::_Classic::" . "euc_" . $enc}; *{$enc . "_euc"} = \&{"Jcode::_Classic::" . $enc . "_euc"}; } for my $enc (qw/ucs2 utf8/) { no strict 'refs'; *{"Jcode::_Classic::" . "euc_" . $enc} = \&{"euc_" . $enc}; *{"Jcode::_Classic::" . $enc . "_euc"} = \&{$enc . "_euc"}; } sub jcode { return Jcode::CP932->new(@_) } #### Modified use Encode; sub euc_ucs2 { my $thingy = shift; my $sjis = euc_sjis( $thingy ); Encode::from_to( $sjis, 'cp932', 'UTF-16BE' ); $sjis; } sub ucs2_euc { my $thingy = shift; my $r_str = ref $thingy ? $thingy : \$thingy; Encode::from_to( $$r_str, 'UTF-16BE', 'cp932' ); sjis_euc( $r_str ); } sub euc_utf8 { my $thingy = shift; my $sjis = euc_sjis( $thingy ); Encode::from_to( $sjis, 'cp932', 'utf8'); $sjis; } sub utf8_euc { my $thingy = shift; my $r_str = ref $thingy ? $thingy : \$thingy; Encode::is_utf8($$r_str) and utf8::encode($$r_str); Encode::from_to( $$r_str, 'utf8', 'cp932' ); sjis_euc( $r_str ); } 1;
*1:Encode::EUCJPMSがあるかどうか判定して、あるならそっちを使うべきな気もするけど。