end0tknr's kipple - 新web写経開発

http://d.hatena.ne.jp/end0tknr/ から移転しました

postgisである地点から海岸線までの距離算出を高速化する

postgisである地点からの距離を算出 - end0tknrのkipple - web写経開発

全国の海岸線データをインポートした状態で、地点から海岸線までの距離算出を次のようなpostgissqlで算出すると、当然、遅い(私の環境で30s/回)。

select min(ST_Distance_Sphere(?,the_geom)) from kaigan

であれば、海岸線gisデータを包含するboundary boxを予め算出し、ST_Boundary()の対象にするgisデータを制限すればよいかと。

↓というテーブルを作成し

create table kaigan_box (
  gid integer primary key,
  st_xmin int,
  st_xmax int,
  st_ymin int,
  st_ymax int
);

↓というperl scriptで海岸線gisのboundary boxを算出し

#!/usr/local/bin/perl
use strict;
use DBI;
use Data::Dumper;

my $DB_CONF =
    {db_name => 'dbi:Pg:dbname=kaigan;host=localhost;port=5432;',
     user => 'postgres',
     passwd => '',
     option => {AutoCommit => 0,
	       pg_enable_utf8 => 1}};

main(@ARGV);

sub main {

    my $dbh = connect_db();

    my $box_cos = get_box_cos($dbh);
    for my $box_co ( @$box_cos ){
	set_box_co($dbh,$box_co);
    }

    $dbh->commit();
    $dbh->disconnect();
}

sub set_box_co {
    my ($dbh,$box_co) = @_;
    my $sql =<<EOF;
insert into kaigan_box (gid,st_xmin,st_xmax,st_ymin,st_ymax)
values(?,?,?,?,?)
EOF
    my $sth = $dbh->prepare($sql);
    $sth->execute($box_co->{gid},
		  int($box_co->{st_xmin}),
		  int($box_co->{st_xmax}),
		  int($box_co->{st_ymin}),
		  int($box_co->{st_ymax}));
}

sub get_box_cos {
    my ($dbh) = @_;
    my $sql =<<EOF;
select
gid,
ST_XMin(the_geom), ST_XMax(the_geom),ST_YMin(the_geom), ST_YMax(the_geom)
from kaigan
EOF
    my $sth = $dbh->prepare($sql);
    $sth->execute();
    my @ret;
    while(my $row = $sth->fetchrow_hashref() ){
	push(@ret,$row);
    }
    return \@ret;
}

sub connect_db {
    return DBI->connect($DB_CONF->{db_name},
			$DB_CONF->{user},
			$DB_CONF->{passwd},
			$DB_CONF->{option});
}

↓のようにkaiganとkaigan_boxでjoinすると、1回あたりの計算時間が30秒から3-4秒に短縮されました。

#!/usr/local/bin/perl
use strict;
use DBI;
use Data::Dumper;

my $DIFF_ROT = 1;
my $DB_CONF =
    {db_name => 'dbi:Pg:dbname=kaigan;host=localhost;port=5432;',
     user => 'postgres',
     passwd => '',
     option => {AutoCommit => 0,
	       pg_enable_utf8 => 1}};

main(@ARGV);

sub main {
    my ($lon_lat_file) = @_;

    my $dbh = connect_db();

    open (my $fh,"<:encoding(utf8)",$lon_lat_file) or
	die "can't open file: $lon_lat_file: $!";
    my @lines = <$fh>;
    close($fh) or die "can't close file: $lon_lat_file: $!";

    my $i = 0;
    for my $line ( @lines ){
	$i++;
	$line =~ s/\s+$//go;
	my ($teicode,
	    $address_org,
	    $address_parse,
	    $latitude, #緯度
	    $longitude, #経度
	    $accuracy)  #住所parseの精度?
	    = split("\t",$line);
	print STDOUT "$i $teicode ->";
	my $kaigan_dist = calc_kaigan_distance($dbh,$latitude,$longitude);
	print STDOUT join("\t",$teicode,$kaigan_dist),"\n";
    }

    $dbh->disconnect();
}

sub calc_kaigan_distance {
    my ($dbh,$latitude,$longitude) = @_;
    my $sql =<<EOF;
select min(ST_Distance_Sphere(?, k.the_geom))
from kaigan k
join kaigan_box kj on (k.gid=kj.gid)
where
(st_xmin between ? and ?) or
(st_xmax between ? and ?) or
(st_ymin between ? and ?) or
(st_ymax between ? and ?)
EOF
    my $sth = $dbh->prepare($sql);
    my $diff_rot = 1;
    my @vals = ("POINT($longitude $latitude)",
		int($longitude-$DIFF_ROT), int($longitude+$DIFF_ROT),
		int($longitude-$DIFF_ROT), int($longitude+$DIFF_ROT),
		int($latitude-$DIFF_ROT),  int($latitude+$DIFF_ROT),
		int($latitude-$DIFF_ROT),  int($latitude+$DIFF_ROT) );
    $sth->execute( @vals );
    return $sth->fetchrow_array();
}

sub connect_db {
    return DBI->connect($DB_CONF->{db_name},
			$DB_CONF->{user},
			$DB_CONF->{passwd},
			$DB_CONF->{option});
}

参考まで