end0tknr's kipple - 新web写経開発

http://d.hatena.ne.jp/end0tknr/ から移転しました

perlのread()やunpack()の練習にtiffのヘッダを解析してみた

http://d.hatena.ne.jp/end0tknr/20110707

以前のエントリに関連しますが、perlの主にunpack()の練習として、白黒画像のtiffのヘッダ、及びIFD部をパースしてみました。

私の場合、unpack()の方が気楽

perl-users.jp に Data::ParseBinary によるバイナリファイルの解析が紹介されていますが、私の場合、unpack()の方が気楽にパースできます。
http://perl-users.jp/articles/advent-calendar/2008/25.html

TIFFファイルの仕様はwww.snap-tck.comを参考

にさせていただきました。TIFFには、様々なオプションがありますが、今回はunpackの練習ですので、tiffの全ての仕様は満たしていません。
http://www.snap-tck.com/room03/c02/cg/cg.html

圧縮画像データもデコードしようとしましたが、気が進みませんでした

モノクロのtiffなので、CCITT Group 4 (T.6) の仕様書を読んで、圧縮画像も解凍しようかとも思いましたが、余りに時間がかかりそうなので、やめときました。libtiff がありますしね。

http://www.itu.int/rec/T-REC-T.6-198811-I/en
http://www.itu.int/rec/T-REC-T.4/en

script と実行結果

次の通りです。

#!/usr/local/bin/perl
use strict;
use Data::Dumper;

my $FILE_NAME;
my $CURRENT_POS;
my $BYTE_ORDER; #={II,MM}
my $MAX_IFD = 10;

main(@ARGV);

sub main {
    my ($tiff_file) = @_;

    my $tiff_info = {};
    $FILE_NAME = $tiff_file;
    #### PARSE HEADER
    $tiff_info->{header} = parse_header();
    #### PARSE IFD
    $tiff_info->{ifd} = [];
    my $ifd_pointer = $tiff_info->{header}->{ifd_pointer};

    my $i = 0;
    while($ifd_pointer and $i++ < $MAX_IFD){
	(my $ifd, $ifd_pointer) = parse_ifd($ifd_pointer);
	push(@{$tiff_info->{ifd}},$ifd) if $ifd;
    }
    #### PARSE BODY(image)
    #### いつか気が向いたら、画像部もparseするかも

    print Dumper($tiff_info);
}


sub parse_header {

    open(my $fh,'<',$FILE_NAME) or die "can't open $FILE_NAME $!";
    binmode $fh;
    $CURRENT_POS = 0;

    my $ret = {};
    $BYTE_ORDER = $ret->{byte_order} = read_bytes($fh,2,undef);

    #n:符号無16bit整数 big endian
    $ret->{version} = read_bytes($fh,2,'n');
    #N:符号無32bit整数 big endian
    $ret->{ifd_pointer} = read_bytes($fh,4,'N');

    close($fh) or die "can't close $FILE_NAME $!";

    return $ret;
}

sub parse_ifd {
    my ($ifd_pointer) = @_;

    open(my $fh,'<',$FILE_NAME) or die "can't open $FILE_NAME $!";
    binmode $fh;

    my $ret = {};
    read_bytes($fh,$ifd_pointer,undef,'MM'); #読飛ばし

    #符号無16bit整数 big endian
    $ret->{entry_count} = read_bytes($fh,2,'n');
    $ret->{entries} = {};
    my $i = 0;
    while($i++ < $ret->{entry_count}){
	my $tag_no = read_bytes($fh,2,'n');
	my $tag_name = entry_tag_no2name($tag_no);
	if(not $tag_name){
	    print STDERR "[WARN] unknown entry tag no=$tag_no\n";
	    next;
	}

	if ($tag_name eq 'XResolution' or
	    $tag_name eq 'YResolution'){
	    $ret->{entries}->{$tag_name} = parse_ifd_entry_Resolution($fh);
	} elsif ($tag_name eq 'BitsPerSample' ) {
	    $ret->{entries}->{$tag_name} = parse_ifd_entry_BitsPerSample($fh);
	} elsif ($tag_name eq 'StripByteCounts' ) {
	    $ret->{entries}->{$tag_name} = parse_ifd_entry_StripByteCounts($fh);
	} elsif ($tag_name eq 'StripOffsets' ) {
	    $ret->{entries}->{$tag_name} = parse_ifd_entry_StripOffsets($fh);
	} else {
	    $ret->{entries}->{$tag_name} = parse_ifd_entry_val($fh);
	}
    }

    $ifd_pointer = read_bytes($fh,4,'N');
    close($fh) or die "can't close $FILE_NAME $!";

    return ($ret,$ifd_pointer);
}




sub entry_tag_no2name {
    my ($tag_no) = @_;
    return "ImageWidth"			if $tag_no eq '256';
    return "ImageLength"		if $tag_no eq '257';
    return "BitsPerSample"		if $tag_no eq '258';

    return "Compression"		if $tag_no eq '259';
    #code=4 は fax互換のITU-T Group4

    return "PhotometricInterpretation"	if $tag_no eq '262';
    return "FillOrder"			if $tag_no eq '266';
    return "StripOffsets"		if $tag_no eq '273';
    return "Orientation"		if $tag_no eq '274';
    return "SamplesPerPixel"		if $tag_no eq '277';
    return "RowsPerStrip"		if $tag_no eq '278';
    return "StripByteCounts"		if $tag_no eq '279';
    return "XResolution"		if $tag_no eq '282';
    return "YResolution"		if $tag_no eq '283';
    return "PlanarConfiguration"	if $tag_no eq '284';

    return "ResolutionUnit"		if $tag_no eq '296';
    #解像度.	code=2→dot per inch , code=3→dot per センチメートル

    return "Software"			if $tag_no eq '305';
    return undef;
}


sub parse_ifd_entry_StripByteCounts {
    my ($fh) = @_;

    my $ret = {};
    my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh);
    $ret->{StripCounts} = read_bytes($fh,4,'N');

    $ret->{StripByteCounts} = read_bytes($fh,$data_size,$unpack_rule);
    read_bytes($fh, 4-$data_size, undef)	if $data_size<4;

    return $ret;
}
sub parse_ifd_entry_StripOffsets {
    my ($fh) = @_;

    my $ret = {};
    my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh);
    $ret->{StripCounts} = read_bytes($fh,4,'N');

    $ret->{StripOffsets} = read_bytes($fh,$data_size,$unpack_rule);
    read_bytes($fh, 4-$data_size, undef)	if $data_size<4;

    return $ret;
}

sub parse_ifd_entry_BitsPerSample {
    my ($fh) = @_;

    my $ret = {};
    my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh);
    $ret->{SamplesPerPixel} = read_bytes($fh,4,'N');

    if ($ret->{SamplesPerPixel} eq '1'){ #モノクロ, カラーマップモデル
	$ret->{PixelDepth} = read_bytes($fh,2,'n');
	read_bytes($fh,2,undef,'MM');
	return $ret;
    } elsif ($ret->{SamplesPerPixel} eq '3'){
	#ダイレクトカラーモデルは未対応
	print STDERR join(' ',
			  "SamplesPerPixel($ret->{SamplesPerPixel})",
			  "is not supported now\n");
    }
    print STDERR join(' ',
		      "unknown SamplesPerPixel($ret->{SamplesPerPixel})",
		      "in BitsPerSample \n");
    return undef;
}

sub parse_ifd_entry_Resolution {
    my ($fh) = @_;

    my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh);
    my $data_count = read_bytes($fh,4,'N');

    return undef if $unpack_rule ne 'RATIONAL';

    my $offset = read_bytes($fh,4,'N');

    open(my $fh_tmp,'<',$FILE_NAME) or die "can't open $FILE_NAME $!";
    binmode $fh_tmp;
    my $now_current_pos = $CURRENT_POS;

    read_bytes($fh_tmp,$offset,'N','MM');
    my $bunshi = read_bytes($fh_tmp,4,'N');
    my $bunbo  = read_bytes($fh_tmp,4,'N');

    close($fh_tmp) or die "can't close $FILE_NAME $!";
    $CURRENT_POS = $now_current_pos;

    return "$bunshi/$bunbo";
}

sub parse_ifd_entry_val {
    my ($fh) = @_;
    my $ret;
    my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh);
    my $data_count = read_bytes($fh,4,'N');

    if ($unpack_rule eq 'a*'){
	my $offset = read_bytes($fh,4,'N');
	open(my $fh_tmp,'<',$FILE_NAME) or die "can't open $FILE_NAME $!";
	binmode $fh_tmp;
	my $now_current_pos = $CURRENT_POS;

	read_bytes($fh_tmp,$offset,'N');
	my $str = read_bytes($fh_tmp,
			     $data_size * $data_count,
			     $unpack_rule,
			     'MM');
	close($fh_tmp) or die "can't close $FILE_NAME $!";
	$CURRENT_POS = $now_current_pos;

	return $str;
    }

    my $net_read_len = $data_size * $data_count;

    $ret = read_bytes($fh,
		      $net_read_len,
		      $unpack_rule);
    if ($net_read_len<4){
	read_bytes($fh,
		   4-$net_read_len,
		   undef);
    }
    return $ret;
}

sub parse_ifd_entry_data_type {
    my ($fh) = @_;

    my $type_no = read_bytes($fh,2,'n');
    return (1,'a*')		if($type_no eq '2');
    return (2,'n')		if($type_no eq '3');
    return (4,'N')		if($type_no eq '4');
    return (8,'RATIONAL')	if($type_no eq '5'); #分数 (分子4桁/分母4桁)

    print STDERR "[WARN] unknown data type no=$type_no\n";
    return undef;
}

sub read_bytes {
    my ($fh,$size,$unpack_rule,$byte_order) = @_;

    $byte_order = $BYTE_ORDER	unless $byte_order;

    my $ret_bytes;
    if($byte_order eq "II"){
	my $i = 0;
	while($i++ < $size ){
	    read($fh,my $tmp_byte,1);
	     $CURRENT_POS += 1;
	    $ret_bytes = $tmp_byte . $ret_bytes;
	}
    } else {
	read($fh,$ret_bytes,$size);
	$CURRENT_POS += $size;
    }

    return unpack($unpack_rule,$ret_bytes) if $unpack_rule;
    return $ret_bytes;
}

実行結果

[endo@colinux tmp]$ ./parse_tif.pl monochro.tif 
$VAR1 =
    {
     'ifd' =>[
	      {
	       'entries' =>
	       {
		'FillOrder' => 1,
		'PhotometricInterpretation' => 0,
		'ImageWidth' => 3968,
		'ResolutionUnit' => 2,
		'Compression' => 4,
		'BitsPerSample' => {
				    'PixelDepth' => 1,
				    'SamplesPerPixel' => 1
				   },
		'YResolution' => '1610612736/16777216',
		'SamplesPerPixel' => 1,
		'ImageLength' => 3174,
		'RowsPerStrip' => 3174,
		'StripByteCounts' => {
				      'StripByteCounts' => 42470,
				      'StripCounts' => 1
				     },
		'StripOffsets' => {
				   'StripCounts' => 1,
				   'StripOffsets' => 8
				  },
		'Orientation' => 1,
		'PlanarConfiguration' => 1,
		'XResolution' => '1610612736/16777216',
		'Software' => 'IrfanView'
	       },
	       'entry_count' => 16
	      }
	     ],
     'header' => {
		  'ifd_pointer' => 42478,
		  'version' => 42,
		  'byte_order' => 'II'
		 }
    };