document updated 18 years ago, on Jul 1, 2005
use Text::Iconv;
sub utf32_chr {pack "N", shift}
sub utf32_ord {unpack "N", shift}
sub jis_chr {pack "n", shift}
sub jis_ord {unpack "n", shift}
# given a particular character in a given encoding, determine whether it maps to any known-good japanese encoding
sub is_japanese {
my ($enc, $char) = @_;
local $_ = $enc;
!/^utf/i && defined(convert($enc, "UTF-8", $char)) && return 1;
!/jis$/i && defined(convert($enc, "SHIFT-JIS", $char)) && return 1;
!/2022/ && defined(convert($enc, "ISO-2022-JP", $char)) && return 1;
!/^euc/ && defined(convert($enc, "EUC-JP", $char)) && return 1;
return 0;
}
# Convert from one encoding to another
BEGIN {
my %converters;
sub convert {
my ($from, $to, $str) = @_;
return $str if ($from eq $to);
my $key = "$from\x00$to";
$converters{$key} = Text::Iconv->new($from, $to)
unless exists($converters{$key});
$converters{$key}->convert($str);
}
}
# An international version of 'length'
#
# wee bit of a hack, may not always work (eg. for the whole cm/mm/km stuff...
# http://isthisthingon.org/unicode/index.phtml?page=3&subpage=3&hilite=339D)
sub num_chars {
my ($enc, $str) = @_;
return (length(convert($enc, "UTF-32", $str)) / 4);
}