| |
#!/usr/bin/perl
#
# wwwkan1.pl - translate kanji or compounds in Japanese HTML.
# Copyright (C) 1997,1998 Tuomas J. Lukka. All rights reserved.
# Directory to the kanji dictionary database
$libdir = "/my/home/dir/japanese_files/";
# The url of this CGI-script, for mangling the links on the page
$my_url = "http://komodo.media.mit.edu/~tjl/cgi-bin/wwwkan1.cgi";
# Link types to substitute.
# 0 = absolute, 1 = relative.
%links = (a => ['href', 1], img => ['src', 0],
form => ['action', 1], link => ['href', 1],
frame => ['src', 1]);
# ---- main program
use CGI;
use LWP::Simple;
use HTML::Parse;
use URI::URL;
use Fcntl;
use AnyDBM_File;
tie %kanji, AnyDBM_File, "$libdir/kanji.dbmx", O_RDONLY, 0;
$query = new CGI;
print $query->header, "CONVERTED By TJL's kanji explainer on ",
'date', '. Mail comments to lukka@fas.harvard.edu.<P>',
$query->startform(), "<b>Go To:</b> ",
$query->textfield(-name => 'url',
-default => 'http://www.yahoo.co.jp/', -size => 50),
$query->submit('Action','Doit'),
$query->endform, "<HR>\n";
# Get the original document from the net.
$url = $query->param('url');
$doc = get $url;
# Substitute web addresses so that text documents are fetched with
# this script and pictures are fetched directly.
$h = parse_html($doc);
$h->traverse(
sub {
my($e, $start) = @_;
return 1 unless $start;
my $attr = $links{lc $e->tag} or return 1;
my $url = $e->attr($attr->[0]) or return 1;
$e->attr($attr->[0], ($attr->[1] ?
getlink($url) : abslink($url)));
},
1);
$doc = $h->as_HTML;
# Substitute kanji for English
for ( split "\n", $doc ) {
s/((?:[\x80-\xFF][\x40-\xFF])+)/explainstr($1)/ge;
print;
}
exit;
# SUBROUTINES
# Make an absolute URL from a relative URL in the original document
sub abslink {
return (new URI::URL($_[0]))->abs($url)->as_string;
}
# Make a new URL which gets a document through our translation service.
sub getlink {
my $url_to = (new URI::URL($_[0]))->abs($url);
my $proxy_url = new URI::URL($my_url);
$proxy_url->query_form(url => $url_to->as_string);
return $proxy_url->as_string;
}
# Insert explanations into a string of kanji
sub explainstr {
my $str = @_;
my $res = "";
my ($pos, $mlen, $s);
for ( $pos = 0; $pos < length($str); $pos += $mlen ) {
my $expl;
$mlen = 20;
while (!defined($expl = $kanji{$s=(substr(($str),$pos,$mlen))})
and $mlen > 2) {
$mlen -= 2;
}
$res .= $s;
if (defined $expl) {
$res .= " <small><[[[".($expl)."]]]></small> ";
}
}
return $res;
}
|