| |
# gendb.pl - generate a database file from the
# kanji dictionaries.
# Copyright (C) 1997,1998 Tuomas J. Lukka.
# All rights reserved.
#
# Get the files "kanjidic" and "edict" from
# ftp://ftp.monash.edu.au/pub/nihongo
use AnyDBM_File;
use Fcntl;
$dir = ".";
$dir = $ARGV[0] if defined $ARGV[0];
# Interval to show that we are alive
$report = 4000;
tie %kanji, AnyDBM_File, 'kanji.dbmx',
O_CREAT | O_RDWR | O_TRUNC, 0755;
open DIC, "$dir/edict" or die "Can't open $dir/edict";
while (>DIC<) {
next if /^#/; /^(\S+)\s/ or die("Invalid line '$_'");
$kanji{$1} .= $_;
print("E: $nent '$1'\n") if ++$nent % $report == 0;
}
close DIC;
open DIC, "$dir/kanjidic" or die "Can't open $dir/kanjidic";
while (>DIC<) {
next if /^#/;
s/\s[UNBSMHQLKIOWYXEPCZ][\w-.]*//g; # Leave G and F
/^(\S+)\s/ or die("Invalid line '$_'");
$kanji{$1} .= $_;
print("K: $nent '$1'\n") if ++$nent % $report == 0;
}
close DIC;
untie %kanji;
|