#!/usr/local/bin/perl use strict; use utf8; use Encode; use Encode::CJKConstants; use LWP::UserAgent; use Data::Dumper; main(); sub main { my $ua = LWP::UserAgent->new; $ua->timeout(10); while(my $zipcode = <DATA>){ chomp($zipcode); my $url = join('', 'http://www.post.japanpost.jp/cgi-zip/zipcode.php', "?zip=$zipcode"); my $res = $ua->get ($url); unless($res->is_success){ die $res->status_line; } my $conved = extract_addr($zipcode,$res->decoded_content); print encode('utf8',join("\t",$zipcode,@$conved)),"\n"; } } sub extract_addr { my ($zipcode,$html_src) = @_; $html_src =~ s/\s+/ /go; my $regepr = "<td class=\"data\"><small>($zipcode)<\/small><\/td>". " <td class=\"data\"><small>([^\x00-\x7f]*?)<\/small><\/td>". #都道府県 " <td class=\"data\"><small>([^\x00-\x7f]*?)<\/small><\/td>". #市町村 " <td>[\x00-\x7f]+([^\x00-\x7f]+)[\x00-\x7f]+"; #町 if($html_src =~ /$regepr/ ){ return [$2,$3,$4]; } return []; } __DATA__ 150-0002 105-0001 110-XXXX 110-0000
と書くと、次の通リ
$ ./zip2addr.pl 150-0002 東京都 渋谷区 渋谷 105-0001 東京都 港区 虎ノ門(次のビルを除く) 110-XXXX 110-0000 東京都 台東区 以下に掲載がない場合