政府統計 e-stat の データ一覧は、上記urlからダウンロードできるのかもしれませんが、 よく分かりませんでしたので、BeautifulSoup for python で scraping
#!python3 # -*- coding: utf-8 -*- from bs4 import BeautifulSoup import requests import csv import re import sys import time base_url = "https://www.e-stat.go.jp" search_path = "/stat-search" max_page_no = 13 conf = { "retry_limit" : 10, "retry_sleep" : 10, } def main(): data_list = [] page_no = 0 while page_no <= max_page_no: page_no+=1 print("page:",page_no, file=sys.stderr,flush=True) search_base_url = base_url + search_path data_list_tmp = get_data_list(search_base_url, page_no) data_list.extend(data_list_tmp) disp_data_list(data_list) def disp_data_list(data_items): for data_item in data_items: disp_str = \ "\t".join([ data_item["id"], data_item["name"], data_item["url_1"], data_item["url_2"] ]) print( disp_str ) def get_data_list(search_base_url, page_no): http_result = get_http_requests(search_base_url, page_no) if not http_result: return [] soup = BeautifulSoup(http_result.content, 'html.parser') data_lis = soup.select("ul.stat-search_result-list li") data_list = [] for data_li in data_lis: data_id = data_li["data-value"] spans = data_li.select( "div.stat-toukei_name_items span.stat-title" ) data_name = spans[0].text.strip() a_hrefs_1 = data_li.select("div.stat-search_result-item1-main a" ) data_url_1 = base_url + a_hrefs_1[0]["href"] a_hrefs_2 = data_li.select("div.stat-pc_detail a" ) data_url_2 = base_url + a_hrefs_2[0]["href"] data_list.append( {"id" :data_id, "name" :data_name, "url_1":data_url_1, "url_2":data_url_2 }) return data_list def get_http_requests(search_base_url, page_no): result_url = search_base_url + "?page="+ str(page_no) i = 0 while i < conf["retry_limit"]: i += 1 result = None try: result = requests.get(result_url) return result except: print("WARN retry requests.get()", result_url ,file=sys.stderr) time.sleep(conf["retry_sleep"]) print("ERROR requests.get()", result_url ,file=sys.stderr) return None if __name__ == '__main__': main()