java (seasar2) の source を parse / 構文解析 する必要があり、 ANTLR + Python を利用した際のメモ。
と言っても、上記 url の写経です。 上記urlとの違いは、
BasicInfoListener (basic_info_listener.py)内でgetText()した場合、 空白や改行がないtextが取得される為、 AstProcessor (ast_processor.py) で CommonTokenStream から getText()
という点かと思います。
dir 構成
以下に記載の通りですが 「ast_analyze_executor.py」「ast_processor.py」 「basic_info_listener.py」がメインで、 これらについては、以降に記載しています
$ tree . │ # src解析する際、command lineから以下のpyを実行 ├── ast_analyze_executor.py ★別途記載 │ # ANTLR から downloadしたjar ├── antlr-4.8-complete.jar ├── antlr-runtime-4.8.jar │ # logとその設定ですが、自分でも怪しい ├── antlr.log ├── log_conf.yaml ├── ast │ ├── __init__.py │ ├── ast_processor.py ★別途記載 │ ├── basic_info_listener.py ★別途記載 │ ├── JavaLexer.py │ ├── JavaParserListener.py │ └── JavaParser.py │ # ANTLR の github から downloadした文法file ├── grammar │ ├── Java8Lexer.g4 │ ├── Java8Lexer.interp │ ├── Java8Lexer.py │ ├── Java8Lexer.tokens │ ├── Java8Parser.g4 │ ├── Java8Parser.interp │ ├── Java8ParserListener.py │ ├── Java8Parser.py │ ├── Java8Parser.tokens │ ├── JavaLexer.g4 │ ├── JavaLexer.interp │ ├── JavaLexer.py │ ├── JavaLexer.tokens │ ├── JavaParser.g4 │ ├── JavaParser.interp │ ├── JavaParserListener.py │ ├── JavaParser.py │ └── JavaParser.tokens │ # test解析に使用した │ # 「Seasar2徹底入門 - SAStruts/S2JDBC対応 - 翔泳社」のサンプルコード └── CsvAction.java
python script
ast_analyze_executor.py
# -*- coding: utf-8 -*- import logging.config from ast.ast_processor import AstProcessor from ast.basic_info_listener import BasicInfoListener import sys import yaml import pprint log_conf = './log_conf.yaml' # log設定は自分でも怪しいと思う def main(): logging.config.dictConfig(yaml.load(open(log_conf).read(), Loader=yaml.SafeLoader)) logger = logging.getLogger('mainLogger') target_file_path = sys.argv[1] ast_info = \ AstProcessor(logging, BasicInfoListener()).execute(target_file_path) print(pprint.pformat(ast_info, width=80)) # 幅:80文字に整形 # pprint.pprint(ast_info) if __name__ == "__main__": main()
ast.ast_processor.py
# -*- coding: utf-8 -*- from antlr4 import FileStream, CommonTokenStream, ParseTreeWalker from ast.JavaLexer import JavaLexer from ast.JavaParser import JavaParser import pprint source_encode = "utf-8" class AstProcessor: def __init__(self, logging, listener): self.logging = logging self.logger = logging.getLogger(self.__class__.__name__) self.listener = listener def execute(self, input_source): file_stream = FileStream(input_source,encoding=source_encode) common_token_stream = CommonTokenStream(JavaLexer(file_stream)) parser = JavaParser(common_token_stream) walker = ParseTreeWalker() walker.walk(self.listener, parser.compilationUnit()) ast_info = self.listener.ast_info # BasicInfoListener 内で getText()した場合、 # 空白や改行がないtextが取得される為、ここで # CommonTokenStream から getText() します for method in ast_info['methods']: pprint.pprint(method) start_index = method['body_pos']['start_index'] stop_index = method['body_pos']['stop_index'] method['body_src'] = \ common_token_stream.getText(start_index,stop_index) return ast_info
ast.basic_info_listener.py
# -*- coding: utf-8 -*- from ast.JavaParserListener import JavaParserListener from ast.JavaParser import JavaParser import re import sys import pprint class BasicInfoListener(JavaParserListener): def __init__(self): self.ast_info = {'packageName': '', 'className' : '', 'annotation' : [], 'modifier' : [], 'implements' : [], 'extends' : '', 'imports' : [], 'fields' : [], 'methods' : []} self.tmp_annotation = [] self.tmp_modifier = [] def enterPackageDeclaration(self, ctx): # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText()) self.ast_info['packageName'] = ctx.qualifiedName().getText() pprint.pprint(self.ast_info['packageName']) def enterImportDeclaration(self, ctx): # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText()) import_class = ctx.qualifiedName().getText() self.ast_info['imports'].append(import_class) def enterClassOrInterfaceModifier(self, ctx): pprint.pprint(sys._getframe().f_code.co_name+' '+ ctx.getText()) if re.match('^@',ctx.getText()): self.tmp_annotation.append(ctx.getText()) else : self.tmp_modifier.append(ctx.getText()) def enterClassDeclaration(self, ctx): # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText()) self.ast_info['annotation'] += self.tmp_annotation self.ast_info['modifier'] += self.tmp_modifier self.tmp_annotation = [] self.tmp_modifier = [] child_count = int(ctx.getChildCount()) if child_count == 7: # c1 = ctx.getChild(0) # class c2 = ctx.getChild(1).getText() # class name c3 = ctx.getChild(2) # extends c4 = ctx.getChild(3).getChild(0).getText() # extends class name c5 = ctx.getChild(4) # implements c7 = ctx.getChild(6) # class body self.ast_info['className'] = c2 self.ast_info['extends'] = c4 self.ast_info['implements'] = \ self.parse_implements_block(ctx.getChild(5)) return if child_count == 5: c1 = ctx.getChild(0) # class c2 = ctx.getChild(1).getText() # class name c3 = ctx.getChild(2).getText() # extends or implements c5 = ctx.getChild(4) # class body self.ast_info['className'] = c2 if c3 == 'implements': self.ast_info['implements'] = \ self.parse_implements_block(ctx.getChild(3)) elif c3 == 'extends': c4 = ctx.getChild(3).getChild(0).getText() self.ast_info['extends'] = c4 return if child_count == 3: c1 = ctx.getChild(0) # class c2 = ctx.getChild(1).getText() # class name c3 = ctx.getChild(2) # class body self.ast_info['className'] = c2 return print("unknown child_count"+ str(child_count)) sys.exit() def enterFieldDeclaration(self, ctx): # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText()) field = {'fieldType' : ctx.getChild(0).getText(), 'fieldDefinition': ctx.getChild(1).getText(), 'annotation' : [], 'modifier' : [] } field['annotation'] += self.tmp_annotation field['modifier'] += self.tmp_modifier self.tmp_annotation = [] self.tmp_modifier = [] self.ast_info['fields'].append(field) # def exitFieldDeclaration(self, ctx): pass def enterMemberDeclaration(self, ctx): pass # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText()) # print("{0} {1} {2} {3}".format(sys._getframe().f_code.co_name, # ctx.start.line, # ctx.start.column, # ctx.getText())) def enterMethodDeclaration(self, ctx): # pprint.pprint(sys._getframe().f_code.co_name+' '+ctx.getText()) c1 = ctx.getChild(0).getText() # return type c2 = ctx.getChild(1).getText() # method name # params params = self.parse_method_params_block(ctx.getChild(2)) # method bodyを CommonTokenStream と tokenIndex により得る為 ctx_method_body = ctx.getChild(-1) method_info = {'returnType': c1, 'methodName': c2, 'annotation': [], 'modifier' : [], 'params': params, 'body_pos' : { 'start_line' : ctx_method_body.start.line, 'start_column': ctx_method_body.start.column, 'start_index' : ctx_method_body.start.tokenIndex, 'stop_line' : ctx_method_body.stop.line, 'stop_column' : ctx_method_body.stop.column, 'stop_index' : ctx_method_body.stop.tokenIndex}} method_info['annotation'] += self.tmp_annotation method_info['modifier'] += self.tmp_modifier self.tmp_annotation = [] self.tmp_modifier = [] self.ast_info['methods'].append(method_info) # cols = [] # for child in ctx.getChildren(): # cols.append(child.getText()) # print("HOGEHOGE:"+" ".join(cols)) def parse_implements_block(self, ctx): implements_child_count = int(ctx.getChildCount()) result = [] if implements_child_count == 1: impl_class = ctx.getChild(0).getText() result.append(impl_class) elif implements_child_count > 1: for i in range(implements_child_count): if i % 2 == 0: impl_class = ctx.getChild(i).getText() result.append(impl_class) return result def parse_method_params_block(self, ctx): params_exist_check = int(ctx.getChildCount()) result = [] if params_exist_check == 3: params_child_count = int(ctx.getChild(1).getChildCount()) if params_child_count == 1: param_type = ctx.getChild(1).getChild(0).getChild(0).getText() param_name = ctx.getChild(1).getChild(0).getChild(1).getText() param_info = {'paramType': param_type, 'paramName': param_name } result.append(param_info) elif params_child_count > 1: for i in range(params_child_count): if i % 2 == 0: param_type = \ ctx.getChild(1).getChild(i).getChild(0).getText() param_name = \ ctx.getChild(1).getChild(i).getChild(1).getText() param_info = {'paramType': param_type, 'paramName': param_name } result.append(param_info) return result
実行結果
$ /usr/local/python3/bin/python3 ast_analyze_executor.py CsvAction.java 'org.seasar.sastruts.example.action' 'enterClassOrInterfaceModifier public' 'enterClassOrInterfaceModifier @Resource' 'enterClassOrInterfaceModifier @ActionForm' 'enterClassOrInterfaceModifier protected' 'enterClassOrInterfaceModifier @Resource' 'enterClassOrInterfaceModifier protected' 'enterClassOrInterfaceModifier @Resource' 'enterClassOrInterfaceModifier protected' 'enterClassOrInterfaceModifier @Resource' 'enterClassOrInterfaceModifier protected' 'enterClassOrInterfaceModifier public' 'enterClassOrInterfaceModifier public' 'enterClassOrInterfaceModifier @Execute(validator=false)' 'enterClassOrInterfaceModifier public' 'enterClassOrInterfaceModifier @Execute(validator=true,input="index.jsp")' 'enterClassOrInterfaceModifier public' {'annotation': ['@Execute(validator=false)'], 'body_pos': {'start_column': 45, 'start_index': 310, 'start_line': 48, 'stop_column': 4, 'stop_index': 380, 'stop_line': 62}, 'methodName': 'index', 'modifier': ['public'], 'params': [], 'returnType': 'String'} {'annotation': ['@Execute(validator=true,input="index.jsp")'], 'body_pos': {'start_column': 42, 'start_index': 407, 'start_line': 65, 'stop_column': 4, 'stop_index': 583, 'stop_line': 90}, 'methodName': 'read', 'modifier': ['public'], 'params': [], 'returnType': 'String'} {'annotation': [], 'className': 'CsvAction', 'extends': '', 'fields': [{'annotation': ['@Resource', '@ActionForm'], 'fieldDefinition': 'csvForm', 'fieldType': 'CsvForm', 'modifier': ['protected']}, {'annotation': ['@Resource'], 'fieldDefinition': 'csvService', 'fieldType': 'CsvService', 'modifier': ['protected']}, {'annotation': ['@Resource'], 'fieldDefinition': 's2csvCtrlFactory', 'fieldType': 'S2CSVCtrlFactory', 'modifier': ['protected']}, {'annotation': ['@Resource'], 'fieldDefinition': 'request', 'fieldType': 'HttpServletRequest', 'modifier': ['protected']}, {'annotation': [], 'fieldDefinition': 'deptList', 'fieldType': 'List<DeptCsv>', 'modifier': ['public']}, {'annotation': [], 'fieldDefinition': 'csv', 'fieldType': 'String', 'modifier': ['public']}], 'implements': [], 'imports': ['java.io.IOException', 'java.io.InputStream', 'java.io.StringWriter', 'java.util.List', 'javax.annotation.Resource', 'javax.servlet.http.HttpServletRequest', 'org.apache.commons.io.IOUtils', 'org.apache.struts.action.ActionErrors', 'org.seasar.framework.util.StringUtil', 'org.seasar.s2csv.csv.S2CSVWriteCtrl', 'org.seasar.s2csv.csv.factory.S2CSVCtrlFactory', 'org.seasar.sastruts.example.csv.DeptCsv', 'org.seasar.sastruts.example.form.CsvForm', 'org.seasar.sastruts.example.service.CsvService', 'org.seasar.struts.annotation.ActionForm', 'org.seasar.struts.annotation.Execute', 'org.seasar.struts.util.ActionMessagesUtil'], 'methods': [{'annotation': ['@Execute(validator=false)'], 'body_pos': {'start_column': 45, 'start_index': 310, 'start_line': 48, 'stop_column': 4, 'stop_index': 380, 'stop_line': 62}, 'body_src': '{\r\n' '\r\n' ' InputStream in = Thread.currentThread()\r\n' ' .getContextClassLoader()\r\n' ' ' '.getResourceAsStream("/data/dept.csv");\r\n' '\r\n' ' try {\r\n' ' this.csvForm.csvData = ' 'IOUtils.toString(in, "UTF-8");\r\n' '\r\n' ' } finally {\r\n' ' IOUtils.closeQuietly(in);\r\n' ' }\r\n' '\r\n' ' return "index.jsp";\r\n' ' }', 'methodName': 'index', 'modifier': ['public'], 'params': [], 'returnType': 'String'}, {'annotation': ['@Execute(validator=true,input="index.jsp")'], 'body_pos': {'start_column': 42, 'start_index': 407, 'start_line': 65, 'stop_column': 4, 'stop_index': 583, 'stop_line': 90}, 'body_src': '{\r\n' ' ActionErrors errors = new ' 'ActionErrors();\r\n' '\r\n' ' ' 'if(StringUtil.isNotEmpty(this.csvForm.processAll)){\r\n' ' // 一括で処理\r\n' ' this.deptList = ' 'csvService.parseAll(this.csvForm.csvData, ' 'errors);\r\n' ' } else {\r\n' ' // 1件ずつ処理\r\n' ' this.deptList = ' 'csvService.parse(this.csvForm.csvData, errors);\r\n' ' }\r\n' '\r\n' ' if(!errors.isEmpty()){\r\n' ' ActionMessagesUtil.addErrors(request, ' 'errors);\r\n' ' return "index.jsp";\r\n' ' }\r\n' '\r\n' ' StringWriter writer = new ' 'StringWriter();\r\n' '\r\n' ' S2CSVWriteCtrl<DeptCsv> controller\r\n' ' = ' 's2csvCtrlFactory.getWriteController(DeptCsv.class, ' 'writer);\r\n' '\r\n' ' controller.writeAll(this.deptList);\r\n' ' this.csv = writer.toString();\r\n' '\r\n' ' return "read.jsp";\r\n' ' }', 'methodName': 'read', 'modifier': ['public'], 'params': [], 'returnType': 'String'}], 'modifier': ['public'], 'packageName': 'org.seasar.sastruts.example.action'}