end0tknr's kipple - 新web写経開発

http://d.hatena.ne.jp/end0tknr/ から移転しました

lwp , xml parse , rtmpdump , ffmpeg , flv->mp3 etc

#!/usr/local/bin/perl
use strict;
use utf8;
use File::Path;
use LWP::UserAgent;
use XML::Simple;
use Data::Dumper;

my $DL_LIMIT_DATE = '20120528';
my $RANDOM_DIR_SRC = 'http://www47.atwiki.jp/jakago/pub/scramble.xml';

my $CHANNEL_BASE_URL = 'http://www.nhk.or.jp/gogaku/english';
my $CHANNELS =
    {basic1 =>	{name=>'基礎英語1'},
     basic2=>	{name=>'基礎英語2'},
     basic3=>	{name=>'基礎英語3'},
     training=>	{name=>'英語5分間トレーニング'},
     kaiwa=>	{name=>'ラジオ英会話'},
     business1=>{name=>'入門ビジネス英語'},
     business2=>{name=>'実践ビジネス英語'}
    };

my $PLAYLIST_SRC = 'listdataflv.xml';
#ex. www.nhk.or.jp/gogaku/english/basic1/0624ML6UEAWJDR/listdataflv.xml

my $FLV_PARENT_PATH =
    'rtmp://flv9.nhk.or.jp/flv9/_definst_/flv:gogaku/streaming/flv';
#ex.
#$ rtmpdump -r "rtmp://flv9.nhk.or.jp/flv9/_definst_/flv:gogaku/streaming/flv/0158JU8Q6YFFG2/12-ek1-4252-591.flv" \
#           -o test.flv
my $RTMPDUMP_CMD = '/usr/local/bin/rtmpdump --quiet';
my $FFMPEG_CMD = '/usr/local/bin/ffmpeg -loglevel quiet';
my $SAVE_DIR = '/home/endo/dev_data/nhk_radio';

main();

sub main {
    my $random_def_xml = wget($RANDOM_DIR_SRC);
    die "can't get $RANDOM_DIR_SRC" unless $random_def_xml;
    my $random_def = XMLin($random_def_xml);

    for my $channel (keys %$CHANNELS ){
	for my $scramble (@{$random_def->{scramble}}){
	    next if $scramble->{date} < $DL_LIMIT_DATE;

	    my $save_dir = join('/',
				$SAVE_DIR,
				$channel,
				$scramble->{date});
	    if(not -d $save_dir){
		die "can't mkpath $save_dir $!" unless mkpath($save_dir);
	    }

	    my $playlist_url = join('/',
				    $CHANNEL_BASE_URL,
				    $channel,
				    $scramble->{code},
				    $PLAYLIST_SRC);
	    my $playlist_xml = wget($playlist_url);
	    die "can't get $playlist_url" unless $playlist_xml;

	    my $playlist = XMLin($playlist_xml);
	    for my $music ( @{$playlist->{music}} ){
		my $save_flv_file = $channel.'_'.$music->{file};
		if ($music->{hdate} =~ /(\d)(\d+)/o){
		    $save_flv_file = $channel.'_'.sprintf("%02d%02d.flv",$1,$2);
		}
		my $save_flv_path = join('/',
					 $save_dir,
					 $save_flv_file);

		my $save_mp3_path = $save_flv_path;
		$save_mp3_path =~ s/\.flv$/\.mp3/io;
		next if -e $save_mp3_path;

		my $saved_flv_path = rtmpdump($scramble->{code},
					      $music->{file},
					      $save_flv_path);
		next unless($saved_flv_path);

		$save_mp3_path = flv_to_mp3($saved_flv_path,$save_mp3_path);
		unless($save_mp3_path){
		    my $msg = join(' ',
				   "fail ffmpeg",
				   "$channel/$scramble->{date}/$music->{file}",
				   "$saved_flv_path");
		    die $msg;
		}

		unless( unlink($saved_flv_path)){
		    die "can't unlink $saved_flv_path";
		}
		print "DONE $save_mp3_path\n";
	    }
	}
    }
}

sub flv_to_mp3 {
    my ($flv_path,$mp3_path) = @_;

    my $sys_cmd = join(' ',
		       $FFMPEG_CMD,
		       "-i $flv_path",
		       "-acodec copy",
		       $mp3_path);
    if( system("$sys_cmd") ){
	return undef;
    }
    return $mp3_path;
}

sub rtmpdump {
    my ($random_code,$flv_file,$save_flv_path) = @_;

    my $flv_url = join('/',
		       $FLV_PARENT_PATH,
		       $random_code,
		       $flv_file);
    my $sys_cmd = join(' ',
		       $RTMPDUMP_CMD,
		       "-r $flv_url",
		       "-o $save_flv_path");
    if( system("$sys_cmd") ){
	return undef;
    }
    return $save_flv_path;
}

sub wget {
    my ($url) = @_;

    my $ua = LWP::UserAgent->new;
    $ua->timeout(10);
    my $req = HTTP::Request->new(GET =>$url);
    my $res = $ua->request($req);

    return $res->content if $res->is_success;

    print STDERR $res->status_line,"\n";
    return undef;
}