end0tknr's kipple - web写経開発

太宰府天満宮の狛犬って、妙にカワイイ

antlr4-python3-runtime for python3 による java source の parse / 構文解析

java (seasar2) の source を parse / 構文解析 する必要があり、 ANTLR + Python を利用した際のメモ。

qiita.com

と言っても、上記 url の写経です。 上記urlとの違いは、

BasicInfoListener (basic_info_listener.py)内でgetText()した場合、
空白や改行がないtextが取得される為、
AstProcessor (ast_processor.py) で CommonTokenStream から getText()

という点かと思います。

dir 構成

以下に記載の通りですが 「ast_analyze_executor.py」「ast_processor.py」 「basic_info_listener.py」がメインで、 これらについては、以降に記載しています

$ tree
.
│     # src解析する際、command lineから以下のpyを実行
├── ast_analyze_executor.py  ★別途記載
│     # ANTLR から downloadしたjar
├── antlr-4.8-complete.jar
├── antlr-runtime-4.8.jar
│     # logとその設定ですが、自分でも怪しい
├── antlr.log
├── log_conf.yaml
├── ast
│   ├── __init__.py
│   ├── ast_processor.py        ★別途記載
│   ├── basic_info_listener.py  ★別途記載
│   ├── JavaLexer.py
│   ├── JavaParserListener.py
│   └── JavaParser.py
│    # ANTLR の github から downloadした文法file
├── grammar
│   ├── Java8Lexer.g4
│   ├── Java8Lexer.interp
│   ├── Java8Lexer.py
│   ├── Java8Lexer.tokens
│   ├── Java8Parser.g4
│   ├── Java8Parser.interp
│   ├── Java8ParserListener.py
│   ├── Java8Parser.py
│   ├── Java8Parser.tokens
│   ├── JavaLexer.g4
│   ├── JavaLexer.interp
│   ├── JavaLexer.py
│   ├── JavaLexer.tokens
│   ├── JavaParser.g4
│   ├── JavaParser.interp
│   ├── JavaParserListener.py
│   ├── JavaParser.py
│   └── JavaParser.tokens
│ # test解析に使用した
│ # 「Seasar2徹底入門 - SAStruts/S2JDBC対応 - 翔泳社」のサンプルコード
└── CsvAction.java

python script

ast_analyze_executor.py

# -*- coding: utf-8 -*-

import logging.config
from ast.ast_processor import AstProcessor
from ast.basic_info_listener import BasicInfoListener
import sys
import yaml
import pprint

log_conf = './log_conf.yaml'  # log設定は自分でも怪しいと思う

def main():
    logging.config.dictConfig(yaml.load(open(log_conf).read(),
                                        Loader=yaml.SafeLoader))
    logger = logging.getLogger('mainLogger')

    target_file_path = sys.argv[1]
    ast_info = \
        AstProcessor(logging, BasicInfoListener()).execute(target_file_path)

    print(pprint.pformat(ast_info, width=80)) # 幅:80文字に整形
#    pprint.pprint(ast_info)


if __name__ == "__main__":
    main()

ast.ast_processor.py

# -*- coding: utf-8 -*-
from antlr4 import FileStream, CommonTokenStream, ParseTreeWalker
from ast.JavaLexer import JavaLexer
from ast.JavaParser import JavaParser
import pprint

source_encode = "utf-8"

class AstProcessor:

    def __init__(self, logging, listener):
        self.logging = logging
        self.logger = logging.getLogger(self.__class__.__name__)
        self.listener = listener

    def execute(self, input_source):
        file_stream = FileStream(input_source,encoding=source_encode)
        common_token_stream = CommonTokenStream(JavaLexer(file_stream))
        
        
        parser = JavaParser(common_token_stream)
        walker = ParseTreeWalker()
        walker.walk(self.listener, parser.compilationUnit())

        ast_info = self.listener.ast_info

        # BasicInfoListener 内で getText()した場合、
        # 空白や改行がないtextが取得される為、ここで
        # CommonTokenStream から getText() します
        for method in ast_info['methods']:
            pprint.pprint(method)

            start_index = method['body_pos']['start_index']
            stop_index  = method['body_pos']['stop_index']
            
            method['body_src'] = \
                common_token_stream.getText(start_index,stop_index)
        
        return ast_info

ast.basic_info_listener.py

# -*- coding: utf-8 -*-
from ast.JavaParserListener import JavaParserListener
from ast.JavaParser import JavaParser
import re
import sys
import pprint

class BasicInfoListener(JavaParserListener):
    def __init__(self):
        self.ast_info = {'packageName': '',
                         'className'  : '',
                         'annotation' : [],
                         'modifier'   : [],
                         'implements' : [],
                         'extends'    : '',
                         'imports'    : [],
                         'fields'     : [],
                         'methods'    : []}
        self.tmp_annotation = []
        self.tmp_modifier   = []

    def enterPackageDeclaration(self, ctx):
        # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText())
        self.ast_info['packageName'] = ctx.qualifiedName().getText()
        pprint.pprint(self.ast_info['packageName'])

    def enterImportDeclaration(self, ctx):
        # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText())
        import_class = ctx.qualifiedName().getText()
        self.ast_info['imports'].append(import_class)

    def enterClassOrInterfaceModifier(self, ctx):
        pprint.pprint(sys._getframe().f_code.co_name+' '+ ctx.getText())
        if re.match('^@',ctx.getText()):
            self.tmp_annotation.append(ctx.getText())
        else :
            self.tmp_modifier.append(ctx.getText())
        
    def enterClassDeclaration(self, ctx):
        # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText())
        
        self.ast_info['annotation'] += self.tmp_annotation
        self.ast_info['modifier']   += self.tmp_modifier
        self.tmp_annotation = []
        self.tmp_modifier = []
        
        child_count = int(ctx.getChildCount())
        if child_count == 7:
            # c1 = ctx.getChild(0)                      # class
            c2 = ctx.getChild(1).getText()              # class name
            c3 = ctx.getChild(2)                        # extends
            c4 = ctx.getChild(3).getChild(0).getText()  # extends class name
            c5 = ctx.getChild(4)                        # implements
            c7 = ctx.getChild(6)                        # class body
            
            self.ast_info['className']  = c2
            self.ast_info['extends']    = c4
            self.ast_info['implements'] = \
            self.parse_implements_block(ctx.getChild(5))
            return
        
        if child_count == 5:
            c1 = ctx.getChild(0)                        # class
            c2 = ctx.getChild(1).getText()              # class name
            c3 = ctx.getChild(2).getText()              # extends or implements
            c5 = ctx.getChild(4)                        # class body

            
            self.ast_info['className'] = c2
            if c3 == 'implements':
                self.ast_info['implements'] = \
                    self.parse_implements_block(ctx.getChild(3))
            elif c3 == 'extends':
                c4 = ctx.getChild(3).getChild(0).getText()
                self.ast_info['extends'] = c4
            return
        
        if child_count == 3:
            c1 = ctx.getChild(0)                        # class
            c2 = ctx.getChild(1).getText()              # class name
            c3 = ctx.getChild(2)                        # class body
            self.ast_info['className'] = c2
            
            return
        
        print("unknown child_count"+ str(child_count))
        sys.exit()
        
    
    def enterFieldDeclaration(self, ctx):
        # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText())
        field = {'fieldType'      : ctx.getChild(0).getText(),
                 'fieldDefinition': ctx.getChild(1).getText(),
                 'annotation'     : [],
                 'modifier'       : []  }
        
        field['annotation'] += self.tmp_annotation
        field['modifier']   += self.tmp_modifier
        self.tmp_annotation = []
        self.tmp_modifier = []
        
        self.ast_info['fields'].append(field)

#    def exitFieldDeclaration(self, ctx): pass
        
    def enterMemberDeclaration(self, ctx): pass
        # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText())
        # print("{0} {1} {2} {3}".format(sys._getframe().f_code.co_name,
        #                                ctx.start.line,
        #                                ctx.start.column,
        #                                ctx.getText()))

    def enterMethodDeclaration(self, ctx):
        # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText())
        
        c1 = ctx.getChild(0).getText()  # return type
        c2 = ctx.getChild(1).getText()  # method name
        # params
        params = self.parse_method_params_block(ctx.getChild(2))

        # method bodyを CommonTokenStream と tokenIndex により得る為
        ctx_method_body = ctx.getChild(-1)


        method_info = {'returnType': c1,
                       'methodName': c2,
                       'annotation': [],
                       'modifier'  : [],
                       'params': params,
                       'body_pos' : {
                           'start_line'  : ctx_method_body.start.line,
                           'start_column': ctx_method_body.start.column,
                           'start_index' : ctx_method_body.start.tokenIndex,
                           'stop_line'   : ctx_method_body.stop.line,
                           'stop_column' : ctx_method_body.stop.column,
                           'stop_index'  : ctx_method_body.stop.tokenIndex}}
        method_info['annotation'] += self.tmp_annotation
        method_info['modifier']   += self.tmp_modifier
        self.tmp_annotation = []
        self.tmp_modifier = []
        self.ast_info['methods'].append(method_info)

        # cols = []
        # for child in ctx.getChildren():
        #     cols.append(child.getText())
        # print("HOGEHOGE:"+" ".join(cols))
        
           
    def parse_implements_block(self, ctx):
        implements_child_count = int(ctx.getChildCount())
        result = []
        if implements_child_count == 1:
            impl_class = ctx.getChild(0).getText()
            result.append(impl_class)
        elif implements_child_count > 1:
            for i in range(implements_child_count):
                if i % 2 == 0:
                    impl_class = ctx.getChild(i).getText()
                    result.append(impl_class)
        return result

    def parse_method_params_block(self, ctx):
        params_exist_check = int(ctx.getChildCount())
        result = []
        if params_exist_check == 3:
            params_child_count = int(ctx.getChild(1).getChildCount())
            if params_child_count == 1:
                param_type = ctx.getChild(1).getChild(0).getChild(0).getText()
                param_name = ctx.getChild(1).getChild(0).getChild(1).getText()
                param_info = {'paramType': param_type,
                              'paramName': param_name }
                result.append(param_info)
            elif params_child_count > 1:
                for i in range(params_child_count):
                    if i % 2 == 0:
                        param_type = \
                            ctx.getChild(1).getChild(i).getChild(0).getText()
                        param_name = \
                            ctx.getChild(1).getChild(i).getChild(1).getText()
                        param_info = {'paramType': param_type,
                                      'paramName': param_name }
                        result.append(param_info)
        return result

実行結果

$ /usr/local/python3/bin/python3 ast_analyze_executor.py CsvAction.java
'org.seasar.sastruts.example.action'
'enterClassOrInterfaceModifier public'
'enterClassOrInterfaceModifier @Resource'
'enterClassOrInterfaceModifier @ActionForm'
'enterClassOrInterfaceModifier protected'
'enterClassOrInterfaceModifier @Resource'
'enterClassOrInterfaceModifier protected'
'enterClassOrInterfaceModifier @Resource'
'enterClassOrInterfaceModifier protected'
'enterClassOrInterfaceModifier @Resource'
'enterClassOrInterfaceModifier protected'
'enterClassOrInterfaceModifier public'
'enterClassOrInterfaceModifier public'
'enterClassOrInterfaceModifier @Execute(validator=false)'
'enterClassOrInterfaceModifier public'
'enterClassOrInterfaceModifier @Execute(validator=true,input="index.jsp")'
'enterClassOrInterfaceModifier public'
{'annotation': ['@Execute(validator=false)'],
 'body_pos': {'start_column': 45,
              'start_index': 310,
              'start_line': 48,
              'stop_column': 4,
              'stop_index': 380,
              'stop_line': 62},
 'methodName': 'index',
 'modifier': ['public'],
 'params': [],
 'returnType': 'String'}
{'annotation': ['@Execute(validator=true,input="index.jsp")'],
 'body_pos': {'start_column': 42,
              'start_index': 407,
              'start_line': 65,
              'stop_column': 4,
              'stop_index': 583,
              'stop_line': 90},
 'methodName': 'read',
 'modifier': ['public'],
 'params': [],
 'returnType': 'String'}
{'annotation': [],
 'className': 'CsvAction',
 'extends': '',
 'fields': [{'annotation': ['@Resource', '@ActionForm'],
             'fieldDefinition': 'csvForm',
             'fieldType': 'CsvForm',
             'modifier': ['protected']},
            {'annotation': ['@Resource'],
             'fieldDefinition': 'csvService',
             'fieldType': 'CsvService',
             'modifier': ['protected']},
            {'annotation': ['@Resource'],
             'fieldDefinition': 's2csvCtrlFactory',
             'fieldType': 'S2CSVCtrlFactory',
             'modifier': ['protected']},
            {'annotation': ['@Resource'],
             'fieldDefinition': 'request',
             'fieldType': 'HttpServletRequest',
             'modifier': ['protected']},
            {'annotation': [],
             'fieldDefinition': 'deptList',
             'fieldType': 'List<DeptCsv>',
             'modifier': ['public']},
            {'annotation': [],
             'fieldDefinition': 'csv',
             'fieldType': 'String',
             'modifier': ['public']}],
 'implements': [],
 'imports': ['java.io.IOException',
             'java.io.InputStream',
             'java.io.StringWriter',
             'java.util.List',
             'javax.annotation.Resource',
             'javax.servlet.http.HttpServletRequest',
             'org.apache.commons.io.IOUtils',
             'org.apache.struts.action.ActionErrors',
             'org.seasar.framework.util.StringUtil',
             'org.seasar.s2csv.csv.S2CSVWriteCtrl',
             'org.seasar.s2csv.csv.factory.S2CSVCtrlFactory',
             'org.seasar.sastruts.example.csv.DeptCsv',
             'org.seasar.sastruts.example.form.CsvForm',
             'org.seasar.sastruts.example.service.CsvService',
             'org.seasar.struts.annotation.ActionForm',
             'org.seasar.struts.annotation.Execute',
             'org.seasar.struts.util.ActionMessagesUtil'],
 'methods': [{'annotation': ['@Execute(validator=false)'],
              'body_pos': {'start_column': 45,
                           'start_index': 310,
                           'start_line': 48,
                           'stop_column': 4,
                           'stop_index': 380,
                           'stop_line': 62},
              'body_src': '{\r\n'
                          '\r\n'
                          '        InputStream in = Thread.currentThread()\r\n'
                          '            .getContextClassLoader()\r\n'
                          '            '
                          '.getResourceAsStream("/data/dept.csv");\r\n'
                          '\r\n'
                          '        try {\r\n'
                          '            this.csvForm.csvData = '
                          'IOUtils.toString(in, "UTF-8");\r\n'
                          '\r\n'
                          '        } finally {\r\n'
                          '            IOUtils.closeQuietly(in);\r\n'
                          '        }\r\n'
                          '\r\n'
                          '        return "index.jsp";\r\n'
                          '    }',
              'methodName': 'index',
              'modifier': ['public'],
              'params': [],
              'returnType': 'String'},
             {'annotation': ['@Execute(validator=true,input="index.jsp")'],
              'body_pos': {'start_column': 42,
                           'start_index': 407,
                           'start_line': 65,
                           'stop_column': 4,
                           'stop_index': 583,
                           'stop_line': 90},
              'body_src': '{\r\n'
                          '        ActionErrors errors = new '
                          'ActionErrors();\r\n'
                          '\r\n'
                          '        '
                          'if(StringUtil.isNotEmpty(this.csvForm.processAll)){\r\n'
                          '            // 一括で処理\r\n'
                          '            this.deptList = '
                          'csvService.parseAll(this.csvForm.csvData, '
                          'errors);\r\n'
                          '        } else {\r\n'
                          '            // 1件ずつ処理\r\n'
                          '            this.deptList = '
                          'csvService.parse(this.csvForm.csvData, errors);\r\n'
                          '        }\r\n'
                          '\r\n'
                          '        if(!errors.isEmpty()){\r\n'
                          '            ActionMessagesUtil.addErrors(request, '
                          'errors);\r\n'
                          '            return "index.jsp";\r\n'
                          '        }\r\n'
                          '\r\n'
                          '        StringWriter writer = new '
                          'StringWriter();\r\n'
                          '\r\n'
                          '        S2CSVWriteCtrl<DeptCsv> controller\r\n'
                          '            = '
                          's2csvCtrlFactory.getWriteController(DeptCsv.class, '
                          'writer);\r\n'
                          '\r\n'
                          '        controller.writeAll(this.deptList);\r\n'
                          '        this.csv = writer.toString();\r\n'
                          '\r\n'
                          '        return "read.jsp";\r\n'
                          '    }',
              'methodName': 'read',
              'modifier': ['public'],
              'params': [],
              'returnType': 'String'}],
 'modifier': ['public'],
 'packageName': 'org.seasar.sastruts.example.action'}