end0tknr's kipple - web写経開発

太宰府天満宮の狛犬って、妙にカワイイ

feedparser for python で https://jvndb.jvn.jp/ja/feed/ にある cveを集計

#!/usr/bin/python3
# -*- coding: utf-8 -*-

from functools import cmp_to_key

import io
import urllib.request
import feedparser
import re
import sys

# refer to https://jvndb.jvn.jp/ja/feed/
feed_url_tmpl = "https://jvndb.jvn.jp/ja/rss/years/jvndb_%d.rdf"

re_compile_1 = re.compile("^(.+)(における|において)")
re_compile_2 = re.compile("^(.+)に")


def main():

    cvs_counts = {}
#    years = [2017]
    years = [2017,2018,2019,2020,2021]
    for year in years:
        cvs_counts[year] = {}
        req_url = feed_url_tmpl % (year)

        print("parsing... %s" % (req_url) ,  file=sys.stderr)
        feed = feedparser.parse( req_url )

        i = 0
        for entry in feed['entries']:
            i += 1
            # if i > 50:
            #     break
            
            mw_name = find_mw_name( entry.title )
            
            if not mw_name in cvs_counts[year]:
                cvs_counts[year][mw_name] = 0
            cvs_counts[year][mw_name] += 1

    for year in years:
        cves = cvs_counts[year].items()
        cves = sorted( cves, key=cmp_to_key(cmp_func) )
        for mw_name, count in cves:
            print("%d\t%s\t%d" % (year,mw_name,count) )

        
def find_mw_name(cve_title):
    
    re_result = re_compile_1.search( cve_title )
    if re_result:
        mw_name = re_result.group(1).strip()
        # print(mw_name +"<-1-"+cve_title)
        return mw_name
    
    re_result = re_compile_2.search( cve_title )
    if re_result:
        mw_name = re_result.group(1).strip()
        # print(mw_name +"<-2-"+cve_title)
        return mw_name

    return "?"

def cmp_func(a, b):
    
    if a[1] == b[1]:
        return cmp_func_2(a, b)
    if a[1] > b[1] :
        return -1
    else :
        return 1

def cmp_func_2(a, b):
    
    if a[0] == b[0]:
        return 0
    if a[0] < b[0] :
        return -1
    else :
        return 1


if __name__ == '__main__':
    main()