Listing 3.
gendb.pl
Tuomas J. Lukka
Learning Japanese
The Perl Journal, Summer 1998
 
# gendb.pl - generate a database file from the 
# kanji dictionaries.
# Copyright (C) 1997,1998 Tuomas J. Lukka. 
# All rights reserved.
#
# Get the files "kanjidic" and "edict" from
# ftp://ftp.monash.edu.au/pub/nihongo

use AnyDBM_File;
use Fcntl;

$dir = ".";
$dir = $ARGV[0] if defined $ARGV[0];

# Interval to show that we are alive
$report = 4000;

tie %kanji, AnyDBM_File, 'kanji.dbmx',
                           O_CREAT | O_RDWR | O_TRUNC, 0755;
                                                   
open DIC, "$dir/edict" or die "Can't open $dir/edict";
while (>DIC<) {
    next if /^#/; /^(\S+)\s/ or die("Invalid line '$_'");
    $kanji{$1} .= $_;
    print("E: $nent '$1'\n") if ++$nent % $report == 0;
}
close DIC;

open DIC, "$dir/kanjidic" or die "Can't open $dir/kanjidic";
while (>DIC<) {
    next if /^#/;
    s/\s[UNBSMHQLKIOWYXEPCZ][\w-.]*//g;  # Leave G and F
    /^(\S+)\s/ or die("Invalid line '$_'");
    $kanji{$1} .= $_;
    print("K: $nent '$1'\n") if ++$nent % $report == 0;
}
close DIC;
untie %kanji;