end0tknr's kipple - 新web写経開発

http://d.hatena.ne.jp/end0tknr/ から移転しました

aws s3にあるファイルをバージョンID付きで削除 - perl

aws s3には、バージョニングという機能があり、 単純にファイル削除した場合、実体は削除されず、 削除フラグ=ON(つまり、論理削除)の状態になるらしい。

物理削除したい場合、バージョンID付きで削除すればよいらしく、perlで書くと、以下の通り。

  • Net::Amazon::S3 * を使用したかったのですが、 Net::Amazon::S3 によるバージョンID付き削除方法が分からなかった為、 次のように素朴なperl scriptにしました。
#!/usr/local/perl/bin/perl
use strict;
use warnings;
use utf8;
use Digest::HMAC_SHA1;
use File::Basename;
use File::stat;
use HTTP::Date;
use HTTP::Request;
use HTTP::Request::Common;
use LWP::UserAgent;
use Log::Log4perl;
use Data::Dumper;

# https://docs.aws.amazon.com/ja_jp/AmazonS3/latest/dev/RESTAuthentication.html
# http://blog.yusuke.be/entry/2014/01/23/011321
my $CONF =
    {aws_s3=>
     {host=>                   'ないしょ',
      aws_access_key_id =>     'ないしょ',
      aws_secret_access_key => 'ないしょ',
      bucket=>                 'ないしょ'},
     chunk_size_limit => 1000,
     chunk_interval   => 10,  #sec
     log=>
     {'log4perl.rootLogger'=> 'DEBUG, LOGFILE, CONSOLE',
      'log4perl.appender.LOGFILE'=>'Log::Log4perl::Appender::File',
      'log4perl.appender.LOGFILE.dir'=>'/home/end0tknr/delete/',
      'log4perl.appender.LOGFILE.filename'=>
      '/home/end0tknr/delete/delete.log',
      'log4perl.appender.LOGFILE.mode'=>'append',
      'log4perl.appender.LOGFILE.layout'=>'Log::Log4perl::Layout::PatternLayout',
      'log4perl.appender.LOGFILE.layout.ConversionPattern'=>'%d [%p] %m %n',

      'log4perl.appender.CONSOLE'=> 'Log::Log4perl::Appender::Screen',
      'log4perl.appender.CONSOLE.layout' => 'Log::Log4perl::Layout::PatternLayout',
      'log4perl.appender.CONSOLE.layout.ConversionPattern' => '%d [%p] %m %n'}
     };
my $LOGGER;


main(@ARGV);

sub main {
    my (@del_files_lists) = @_;

    for my $del_files_list ( @del_files_lists ){
        # $del_files_list のfileには削除対象のobj keyを改行区切りで記載
        $LOGGER = init_logger($del_files_list);

        # 各削除file一覧は、chunk_size_limit 毎に分割して読込みます
        my $del_files_chunks = load_del_files_list($del_files_list);

        my $i = 0;
        for my $del_files_chunk ( @$del_files_chunks ){
            my $j = 0;
            $i++;
            for my $obj_key_org ( @$del_files_chunk ){
                next unless $obj_key_org;
                
                $j++;
                $LOGGER->info("$i $j $obj_key_org");
                
                # url encoding
                my $obj_key = $obj_key_org;
                $obj_key =~ s/([^ 0-9a-zA-Z])/"%".uc(unpack("H2",$1))/eg;
                
                # version id取得
                my $verion_id = get_version_id($obj_key);
                if( $verion_id ){
                    $LOGGER->info("get_version_id($obj_key_org) -> $verion_id");
                } else {
                    $LOGGER->error("fail get_version_id($obj_key_org)");
                    next;
                }

            }

            sleep( $CONF->{chunk_interval} );

            last;  # for debug
        }
    }
}

sub load_del_files_list {
    my ($del_files_list) = @_;

    open my $fh,"<", $del_files_list or
        die "fail open $del_files_list $!";
    my $ret = [];
    my $chunk_no = 0;
    my $chunk_size = 0;
    for my $line ( <$fh> ){
        chomp($line);
        if( not defined($ret->[$chunk_no]) ){
            $ret->[$chunk_no] = [];
        }
        if($chunk_size < $CONF->{chunk_size_limit}){
            push(@{$ret->[$chunk_no]}, $line);
            $chunk_size++;
        } else {
            $chunk_size = 0;
            $chunk_no++;
        }
    }
    close($fh);

    return $ret;
}


sub del_s3_obj {
    my ($obj_key, $verion_id) = @_;

    $verion_id = '' unless $verion_id;
    
    my $url = join('/',
                   "http://$CONF->{aws_s3}->{host}",
                   "$CONF->{aws_s3}->{bucket}",
                   $obj_key);
   $url .= "?versionId=$verion_id";

    my $date_str = time2str();
    my $str_to_sign_org =
        join("\n",
             'DELETE',
             '',
             '',
             $date_str,
             "/$CONF->{aws_s3}->{bucket}/$obj_key");
    $str_to_sign_org .= "?versionId=$verion_id";

    my $hmac = Digest::HMAC_SHA1->new($CONF->{aws_s3}->{aws_secret_access_key});
    $hmac->add($str_to_sign_org);
    my $sig = $hmac->b64digest . '=';
    
    my $header =
        [
         'Host' => $CONF->{aws_s3}->{host},
         'Date'=> $date_str,
         'Authorization'=>
         "AWS $CONF->{aws_s3}->{aws_access_key_id}:$sig"];
    
    my $req = HTTP::Request->new('DELETE', $url, $header);
    my $ua = LWP::UserAgent->new;
    my $res = $ua->request($req);

    if(not $res->is_success) {
        $LOGGER->error( $res->status_line );
        $LOGGER->error( Dumper($res) );
        return;
    }
    return $obj_key;
}


sub get_version_id {
    my ($obj_key) = @_;
    
    my $url = join('/',
                   "http://$CONF->{aws_s3}->{host}",
                   "$CONF->{aws_s3}->{bucket}",
                   $obj_key);
    my $date_str = time2str();
    my $str_to_sign_org =
        join("\n",
             'HEAD',
             '',
             '',
             $date_str,
             "/$CONF->{aws_s3}->{bucket}/$obj_key");

    my $hmac = Digest::HMAC_SHA1->new($CONF->{aws_s3}->{aws_secret_access_key});
    $hmac->add($str_to_sign_org);
    my $sig = $hmac->b64digest . '=';
    
    my $header =
        [
         'Host' => $CONF->{aws_s3}->{host},
         'Date'=> $date_str,
         'Authorization'=>
         "AWS $CONF->{aws_s3}->{aws_access_key_id}:$sig"];
    
    my $req = HTTP::Request->new('HEAD', $url, $header);
    my $ua = LWP::UserAgent->new;
    my $res = $ua->request($req);

    return $res->header('x-amz-version-id');
}

sub init_logger {
    my ($org_filepaths_list) = @_;

    if ($org_filepaths_list){
        my($filename, $dirs) = File::Basename::fileparse($org_filepaths_list);
        my $datetime = unixtime_to_str(time);
        $CONF->{log}->{'log4perl.appender.LOGFILE.filename'} =
            $CONF->{log}->{'log4perl.appender.LOGFILE.dir'} ."/".
            join(".",$filename,$datetime,"log");
    }
    
    Log::Log4perl::init($CONF->{log});
    my $logger = Log::Log4perl::get_logger("rootLogger");
    unless($logger){
        die "fail init_logger() $!";
    }
    return $logger;
}

sub unixtime_to_str {
    my ($unix_time) = @_;

    my ($sec,$min,$hour, $mday,$mon,$year,$wday,$yday,$isdst) =
        localtime($unix_time);
    return sprintf("%04d%02d%02dT%02d%02d%02d",
                   1900+$year,$mon+1,$mday,$hour,$min,$sec);
}

1;