http://d.hatena.ne.jp/end0tknr/20110707
以前のエントリに関連しますが、perlの主にunpack()の練習として、白黒画像のtiffのヘッダ、及びIFD部をパースしてみました。
私の場合、unpack()の方が気楽
perl-users.jp に Data::ParseBinary によるバイナリファイルの解析が紹介されていますが、私の場合、unpack()の方が気楽にパースできます。
http://perl-users.jp/articles/advent-calendar/2008/25.html
TIFFファイルの仕様はwww.snap-tck.comを参考
にさせていただきました。TIFFには、様々なオプションがありますが、今回はunpackの練習ですので、tiffの全ての仕様は満たしていません。
http://www.snap-tck.com/room03/c02/cg/cg.html
圧縮画像データもデコードしようとしましたが、気が進みませんでした
モノクロのtiffなので、CCITT Group 4 (T.6) の仕様書を読んで、圧縮画像も解凍しようかとも思いましたが、余りに時間がかかりそうなので、やめときました。libtiff がありますしね。
http://www.itu.int/rec/T-REC-T.6-198811-I/en
http://www.itu.int/rec/T-REC-T.4/en
script と実行結果
次の通りです。
#!/usr/local/bin/perl use strict; use Data::Dumper; my $FILE_NAME; my $CURRENT_POS; my $BYTE_ORDER; #={II,MM} my $MAX_IFD = 10; main(@ARGV); sub main { my ($tiff_file) = @_; my $tiff_info = {}; $FILE_NAME = $tiff_file; #### PARSE HEADER $tiff_info->{header} = parse_header(); #### PARSE IFD $tiff_info->{ifd} = []; my $ifd_pointer = $tiff_info->{header}->{ifd_pointer}; my $i = 0; while($ifd_pointer and $i++ < $MAX_IFD){ (my $ifd, $ifd_pointer) = parse_ifd($ifd_pointer); push(@{$tiff_info->{ifd}},$ifd) if $ifd; } #### PARSE BODY(image) #### いつか気が向いたら、画像部もparseするかも print Dumper($tiff_info); } sub parse_header { open(my $fh,'<',$FILE_NAME) or die "can't open $FILE_NAME $!"; binmode $fh; $CURRENT_POS = 0; my $ret = {}; $BYTE_ORDER = $ret->{byte_order} = read_bytes($fh,2,undef); #n:符号無16bit整数 big endian $ret->{version} = read_bytes($fh,2,'n'); #N:符号無32bit整数 big endian $ret->{ifd_pointer} = read_bytes($fh,4,'N'); close($fh) or die "can't close $FILE_NAME $!"; return $ret; } sub parse_ifd { my ($ifd_pointer) = @_; open(my $fh,'<',$FILE_NAME) or die "can't open $FILE_NAME $!"; binmode $fh; my $ret = {}; read_bytes($fh,$ifd_pointer,undef,'MM'); #読飛ばし #符号無16bit整数 big endian $ret->{entry_count} = read_bytes($fh,2,'n'); $ret->{entries} = {}; my $i = 0; while($i++ < $ret->{entry_count}){ my $tag_no = read_bytes($fh,2,'n'); my $tag_name = entry_tag_no2name($tag_no); if(not $tag_name){ print STDERR "[WARN] unknown entry tag no=$tag_no\n"; next; } if ($tag_name eq 'XResolution' or $tag_name eq 'YResolution'){ $ret->{entries}->{$tag_name} = parse_ifd_entry_Resolution($fh); } elsif ($tag_name eq 'BitsPerSample' ) { $ret->{entries}->{$tag_name} = parse_ifd_entry_BitsPerSample($fh); } elsif ($tag_name eq 'StripByteCounts' ) { $ret->{entries}->{$tag_name} = parse_ifd_entry_StripByteCounts($fh); } elsif ($tag_name eq 'StripOffsets' ) { $ret->{entries}->{$tag_name} = parse_ifd_entry_StripOffsets($fh); } else { $ret->{entries}->{$tag_name} = parse_ifd_entry_val($fh); } } $ifd_pointer = read_bytes($fh,4,'N'); close($fh) or die "can't close $FILE_NAME $!"; return ($ret,$ifd_pointer); } sub entry_tag_no2name { my ($tag_no) = @_; return "ImageWidth" if $tag_no eq '256'; return "ImageLength" if $tag_no eq '257'; return "BitsPerSample" if $tag_no eq '258'; return "Compression" if $tag_no eq '259'; #code=4 は fax互換のITU-T Group4 return "PhotometricInterpretation" if $tag_no eq '262'; return "FillOrder" if $tag_no eq '266'; return "StripOffsets" if $tag_no eq '273'; return "Orientation" if $tag_no eq '274'; return "SamplesPerPixel" if $tag_no eq '277'; return "RowsPerStrip" if $tag_no eq '278'; return "StripByteCounts" if $tag_no eq '279'; return "XResolution" if $tag_no eq '282'; return "YResolution" if $tag_no eq '283'; return "PlanarConfiguration" if $tag_no eq '284'; return "ResolutionUnit" if $tag_no eq '296'; #解像度. code=2→dot per inch , code=3→dot per センチメートル return "Software" if $tag_no eq '305'; return undef; } sub parse_ifd_entry_StripByteCounts { my ($fh) = @_; my $ret = {}; my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh); $ret->{StripCounts} = read_bytes($fh,4,'N'); $ret->{StripByteCounts} = read_bytes($fh,$data_size,$unpack_rule); read_bytes($fh, 4-$data_size, undef) if $data_size<4; return $ret; } sub parse_ifd_entry_StripOffsets { my ($fh) = @_; my $ret = {}; my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh); $ret->{StripCounts} = read_bytes($fh,4,'N'); $ret->{StripOffsets} = read_bytes($fh,$data_size,$unpack_rule); read_bytes($fh, 4-$data_size, undef) if $data_size<4; return $ret; } sub parse_ifd_entry_BitsPerSample { my ($fh) = @_; my $ret = {}; my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh); $ret->{SamplesPerPixel} = read_bytes($fh,4,'N'); if ($ret->{SamplesPerPixel} eq '1'){ #モノクロ, カラーマップモデル $ret->{PixelDepth} = read_bytes($fh,2,'n'); read_bytes($fh,2,undef,'MM'); return $ret; } elsif ($ret->{SamplesPerPixel} eq '3'){ #ダイレクトカラーモデルは未対応 print STDERR join(' ', "SamplesPerPixel($ret->{SamplesPerPixel})", "is not supported now\n"); } print STDERR join(' ', "unknown SamplesPerPixel($ret->{SamplesPerPixel})", "in BitsPerSample \n"); return undef; } sub parse_ifd_entry_Resolution { my ($fh) = @_; my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh); my $data_count = read_bytes($fh,4,'N'); return undef if $unpack_rule ne 'RATIONAL'; my $offset = read_bytes($fh,4,'N'); open(my $fh_tmp,'<',$FILE_NAME) or die "can't open $FILE_NAME $!"; binmode $fh_tmp; my $now_current_pos = $CURRENT_POS; read_bytes($fh_tmp,$offset,'N','MM'); my $bunshi = read_bytes($fh_tmp,4,'N'); my $bunbo = read_bytes($fh_tmp,4,'N'); close($fh_tmp) or die "can't close $FILE_NAME $!"; $CURRENT_POS = $now_current_pos; return "$bunshi/$bunbo"; } sub parse_ifd_entry_val { my ($fh) = @_; my $ret; my ($data_size,$unpack_rule) = parse_ifd_entry_data_type($fh); my $data_count = read_bytes($fh,4,'N'); if ($unpack_rule eq 'a*'){ my $offset = read_bytes($fh,4,'N'); open(my $fh_tmp,'<',$FILE_NAME) or die "can't open $FILE_NAME $!"; binmode $fh_tmp; my $now_current_pos = $CURRENT_POS; read_bytes($fh_tmp,$offset,'N'); my $str = read_bytes($fh_tmp, $data_size * $data_count, $unpack_rule, 'MM'); close($fh_tmp) or die "can't close $FILE_NAME $!"; $CURRENT_POS = $now_current_pos; return $str; } my $net_read_len = $data_size * $data_count; $ret = read_bytes($fh, $net_read_len, $unpack_rule); if ($net_read_len<4){ read_bytes($fh, 4-$net_read_len, undef); } return $ret; } sub parse_ifd_entry_data_type { my ($fh) = @_; my $type_no = read_bytes($fh,2,'n'); return (1,'a*') if($type_no eq '2'); return (2,'n') if($type_no eq '3'); return (4,'N') if($type_no eq '4'); return (8,'RATIONAL') if($type_no eq '5'); #分数 (分子4桁/分母4桁) print STDERR "[WARN] unknown data type no=$type_no\n"; return undef; } sub read_bytes { my ($fh,$size,$unpack_rule,$byte_order) = @_; $byte_order = $BYTE_ORDER unless $byte_order; my $ret_bytes; if($byte_order eq "II"){ my $i = 0; while($i++ < $size ){ read($fh,my $tmp_byte,1); $CURRENT_POS += 1; $ret_bytes = $tmp_byte . $ret_bytes; } } else { read($fh,$ret_bytes,$size); $CURRENT_POS += $size; } return unpack($unpack_rule,$ret_bytes) if $unpack_rule; return $ret_bytes; }
実行結果
[endo@colinux tmp]$ ./parse_tif.pl monochro.tif $VAR1 = { 'ifd' =>[ { 'entries' => { 'FillOrder' => 1, 'PhotometricInterpretation' => 0, 'ImageWidth' => 3968, 'ResolutionUnit' => 2, 'Compression' => 4, 'BitsPerSample' => { 'PixelDepth' => 1, 'SamplesPerPixel' => 1 }, 'YResolution' => '1610612736/16777216', 'SamplesPerPixel' => 1, 'ImageLength' => 3174, 'RowsPerStrip' => 3174, 'StripByteCounts' => { 'StripByteCounts' => 42470, 'StripCounts' => 1 }, 'StripOffsets' => { 'StripCounts' => 1, 'StripOffsets' => 8 }, 'Orientation' => 1, 'PlanarConfiguration' => 1, 'XResolution' => '1610612736/16777216', 'Software' => 'IrfanView' }, 'entry_count' => 16 } ], 'header' => { 'ifd_pointer' => 42478, 'version' => 42, 'byte_order' => 'II' } };