paperlined.org
japan > computing
document updated 18 years ago, on Jul 1, 2005
use Text::Iconv;


sub utf32_chr {pack "N", shift}
sub utf32_ord {unpack "N", shift}

sub jis_chr   {pack "n", shift}
sub jis_ord   {unpack "n", shift}


# given a particular character in a given encoding, determine whether it maps to any known-good japanese encoding
sub is_japanese {
    my ($enc, $char) = @_;

    local $_ = $enc;

    !/^utf/i && defined(convert($enc, "UTF-8", $char)) && return 1;
    !/jis$/i && defined(convert($enc, "SHIFT-JIS", $char)) && return 1;
    !/2022/  && defined(convert($enc, "ISO-2022-JP", $char)) && return 1;
    !/^euc/  && defined(convert($enc, "EUC-JP", $char)) && return 1;

    return 0;
}


# Convert from one encoding to another
BEGIN {
    my %converters;
    sub convert {
        my ($from, $to, $str) = @_;
        return $str if ($from eq $to);
        my $key = "$from\x00$to";
        $converters{$key} = Text::Iconv->new($from, $to)
            unless exists($converters{$key});
        $converters{$key}->convert($str);
    }
}


# An international version of 'length'
# 
# wee bit of a hack, may not always work (eg. for the whole cm/mm/km stuff...
#       http://isthisthingon.org/unicode/index.phtml?page=3&subpage=3&hilite=339D)
sub num_chars {
    my ($enc, $str) = @_;
    return (length(convert($enc, "UTF-32", $str)) / 4);
}