javalang 生成抽象语法树AST ----python源码分析
作者:互联网
维基百科中说:
在计算机科学中,抽象语法树(Abstract Syntax Tree,AST),或简称语法树(Syntax tree),是源代码语法结构的一种抽象表示。它以树状的形式表现编程语言的语法结构,树上的每个节点都表示源代码中的一种结构。之所以说语法是“抽象”的,是因为这里的语法并不会表示出真实语法中出现的每个细节。比如,嵌套括号被隐含在树的结构中,并没有以节点的形式呈现;而类似于 if-condition-then
这样的条件跳转语句,可以使用带有三个分支的节点来表示。
如辗转相除法:while b ≠ 0 if a > b a := a − b else b := b − a return a 的抽象语法树为:
目标文件code.txt:
public static void BubbleSortFloat2(float[] num) {
int last_exchange;
int right_border = num.length - 1;
do {
last_exchange = 0;
for (int j = 0; j < num.length - 1; j++) {
if (num[j] > num[j + 1])
{
float temp = num[j];
num[j] = num[j + 1];
num[j + 1] = temp;
last_exchange = j;
}
}
right_border = last_exchange;
} while (right_border > 0);
}
使用python包javalang对上述java文件进行解析:
import javalang
import os
programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()
print(programast)
打印结果类型为<class 'javalang.tree.MethodDeclaration'>,具体内容如下
MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None)
换行后如下,可以看出body里面的存在java源代码中的元素,
MethodDeclaration中包含:
LocalVariableDeclaration :局部变量 *3
Statement :for、if-else、do语句等
operator :操作符
MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None)
接下来,遍历以上信息新建一棵树,代码续上啊!
import javalang
from javalang.ast import Node
import os
from anytree import AnyNode, RenderTree
#代码数据预处理
programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()
#print(programast)
tree = programast
#得到AST需要的数据,递归各节点遍历出一棵树 tree
def get_token(node):
token = ''
#print(isinstance(node, Node))
#print(type(node))
if isinstance(node, str):
token = node
elif isinstance(node, set):
token = 'Modifier'
elif isinstance(node, Node):
token = node.__class__.__name__
#print(node.__class__.__name__,str(node))
#print(node.__class__.__name__, node)
return token
def get_child(root):
#print(root)
if isinstance(root, Node):
children = root.children
elif isinstance(root, set):
children = list(root)
else:
children = []
def expand(nested_list):
for item in nested_list:
if isinstance(item, list):
for sub_item in expand(item):
#print(sub_item)
yield sub_item
elif item:
#print(item)
yield item
return list(expand(children))
def createtree(root,node,nodelist,parent=None):
id = len(nodelist)
#print(id)
token, children = get_token(node), get_child(node)
if id==0:
root.token=token
root.data=node
else:
newnode=AnyNode(id=id,token=token,data=node,parent=parent)
nodelist.append(node)
for child in children:
if id==0:
createtree(root,child, nodelist, parent=root)
else:
createtree(root,child, nodelist, parent=newnode)
nodelist = []
newtree=AnyNode(id=0,token=None,data=None)
createtree(newtree, tree, nodelist)
print(newtree)
打印出结果,好似与上面输出差别不太大:
AnyNode(data=MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None), id=0, token='MethodDeclaration')
AnyNode(data=MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None), id=0, token='MethodDeclaration')
import javalang
from javalang.ast import Node
import os
from anytree import AnyNode, RenderTree
#代码数据预处理
programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()
#print(programast)
tree = programast
#得到AST需要的数据,递归各节点遍历出一棵树 tree
def get_token(node):
token = ''
#print(isinstance(node, Node))
#print(type(node))
if isinstance(node, str):
token = node
elif isinstance(node, set):
token = 'Modifier'
elif isinstance(node, Node):
token = node.__class__.__name__
#print(node.__class__.__name__,str(node))
#print(node.__class__.__name__, node)
return token
def get_child(root):
#print(root)
if isinstance(root, Node):
children = root.children
elif isinstance(root, set):
children = list(root)
else:
children = []
def expand(nested_list):
for item in nested_list:
if isinstance(item, list):
for sub_item in expand(item):
#print(sub_item)
yield sub_item
elif item:
#print(item)
yield item
return list(expand(children))
def createtree(root,node,nodelist,parent=None):
id = len(nodelist)
#print(id)
token, children = get_token(node), get_child(node)
if id==0:
root.token=token
root.data=node
else:
newnode=AnyNode(id=id,token=token,data=node,parent=parent)
nodelist.append(node)
for child in children:
if id==0:
createtree(root,child, nodelist, parent=root)
else:
createtree(root,child, nodelist, parent=newnode)
nodelist = []
newtree=AnyNode(id=0,token=None,data=None)
createtree(newtree, tree, nodelist)
#print(newtree)
def get_sequence(node, sequence):
token, children = get_token(node), get_child(node)
sequence.append(token)
#print(len(sequence), token)
for child in children:
get_sequence(child, sequence)
alltokens=[]
get_sequence(programast,alltokens)
#统计各种语句数量#################################
ifcount=0
whilecount=0
forcount=0
blockcount=0
docount = 0
switchcount = 0
for token in alltokens:
if token=='IfStatement':
ifcount+=1
if token=='WhileStatement':
whilecount+=1
if token=='ForStatement':
forcount+=1
if token=='BlockStatement':
blockcount+=1
if token=='DoStatement':
docount+=1
if token=='SwitchStatement':
switchcount+=1
################################################
print('allnodes: ',len(alltokens)) #allnodes: 138
alltokens=list(set(alltokens))
vocabsize = len(alltokens)
tokenids = range(vocabsize)
vocabdict = dict(zip(alltokens, tokenids))
print("vocabsize: ",vocabsize) #token数量 #vocabsize: 37
def getnodeandedge_astonly(node,nodeindexlist,vocabdict,src,tgt):
token=node.token
nodeindexlist.append([vocabdict[token]])
for child in node.children:
src.append(node.id)
tgt.append(child.id)
src.append(child.id)
tgt.append(node.id)
getnodeandedge_astonly(child,nodeindexlist,vocabdict,src,tgt)
# 遍历出树中所有的结点与边
x = []
edgesrc = []
edgetgt = []
getnodeandedge_astonly(newtree, x, vocabdict, edgesrc, edgetgt)
print("edgesrc:", edgesrc)
print("edgetgt:", edgetgt)
打印出所有的边,用结点id表示,记录了两个方向的双向边,即从父结点到子结点,从子结点到父结点,两个列表上下一一对应,x是递归当前结点在vocabdict中对应的ID:
edgesrc: [0, 1, 1, 2, 1, 3, 0, 4, 0, 5, 5, 6, 6, 7, 5, 8, 0, 9, 9, 10, 10, 11, 9, 12, 12, 13, 0, 14, 14, 15, 15, 16, 14, 17, 17, 18, 17, 19, 19, 20, 19, 21, 21, 22, 21, 23, 19, 24, 24, 25, 0, 26, 26, 27, 27, 28, 27, 29, 29, 30, 27, 31, 31, 32, 26, 33, 33, 34, 34, 35, 35, 36, 36, 37, 35, 38, 38, 39, 35, 40, 33, 41, 41, 42, 42, 43, 43, 44, 44, 45, 43, 46, 46, 47, 46, 48, 48, 49, 42, 50, 50, 51, 50, 52, 52, 53, 50, 54, 54, 55, 54, 56, 56, 57, 56, 58, 54, 59, 59, 60, 42, 61, 61, 62, 61, 63, 41, 64, 64, 65, 65, 66, 66, 67, 66, 68, 68, 69, 69, 70, 70, 71, 68, 72, 66, 73, 73, 74, 74, 75, 75, 76, 75, 77, 77, 78, 75, 79, 79, 80, 73, 81, 65, 82, 82, 83, 83, 84, 84, 85, 83, 86, 86, 87, 86, 88, 88, 89, 89, 90, 90, 91, 88, 92, 82, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 95, 99, 94, 100, 100, 101, 101, 102, 102, 103, 102, 104, 104, 105, 102, 106, 106, 107, 100, 108, 94, 109, 82, 110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 114, 116, 116, 117, 114, 118, 118, 119, 112, 120, 111, 121, 121, 122, 111, 123, 82, 124, 124, 125, 125, 126, 126, 127, 125, 128, 128, 129, 125, 130, 33, 131, 131, 132, 132, 133, 133, 134, 132, 135, 135, 136, 132, 137]
edgetgt: [1, 0, 2, 1, 3, 1, 4, 0, 5, 0, 6, 5, 7, 6, 8, 5, 9, 0, 10, 9, 11, 10, 12, 9, 13, 12, 14, 0, 15, 14, 16, 15, 17, 14, 18, 17, 19, 17, 20, 19, 21, 19, 22, 21, 23, 21, 24, 19, 25, 24, 26, 0, 27, 26, 28, 27, 29, 27, 30, 29, 31, 27, 32, 31, 33, 26, 34, 33, 35, 34, 36, 35, 37, 36, 38, 35, 39, 38, 40, 35, 41, 33, 42, 41, 43, 42, 44, 43, 45, 44, 46, 43, 47, 46, 48, 46, 49, 48, 50, 42, 51, 50, 52, 50, 53, 52, 54, 50, 55, 54, 56, 54, 57, 56, 58, 56, 59, 54, 60, 59, 61, 42, 62, 61, 63, 61, 64, 41, 65, 64, 66, 65, 67, 66, 68, 66, 69, 68, 70, 69, 71, 70, 72, 68, 73, 66, 74, 73, 75, 74, 76, 75, 77, 75, 78, 77, 79, 75, 80, 79, 81, 73, 82, 65, 83, 82, 84, 83, 85, 84, 86, 83, 87, 86, 88, 86, 89, 88, 90, 89, 91, 90, 92, 88, 93, 82, 94, 93, 95, 94, 96, 95, 97, 96, 98, 97, 99, 95, 100, 94, 101, 100, 102, 101, 103, 102, 104, 102, 105, 104, 106, 102, 107, 106, 108, 100, 109, 94, 110, 82, 111, 110, 112, 111, 113, 112, 114, 113, 115, 114, 116, 114, 117, 116, 118, 114, 119, 118, 120, 112, 121, 111, 122, 121, 123, 111, 124, 82, 125, 124, 126, 125, 127, 126, 128, 125, 129, 128, 130, 125, 131, 33, 132, 131, 133, 132, 134, 133, 135, 132, 136, 135, 137, 132]
x: [[34], [6], [16], [15], [19], [25], [28], [10], [9], [24], [28], [27], [14], [4], [24], [28], [27], [14], [23], [8], [11], [31], [9], [0], [12], [33], [29], [8], [1], [31], [23], [12], [21], [22], [7], [13], [31], [4], [12], [21], [2], [17], [5], [36], [28], [27], [14], [20], [12], [21], [8], [3], [31], [20], [8], [11], [31], [9], [0], [12], [33], [31], [18], [20], [22], [30], [8], [1], [31], [32], [31], [20], [9], [31], [32], [8], [35], [31], [20], [12], [33], [9], [22], [24], [28], [10], [14], [26], [31], [32], [31], [20], [9], [7], [13], [31], [32], [31], [20], [9], [31], [32], [8], [35], [31], [20], [12], [33], [9], [2], [7], [13], [31], [32], [8], [35], [31], [20], [12], [33], [9], [31], [26], [2], [7], [13], [31], [4], [31], [20], [2], [7], [13], [31], [23], [31], [4], [2]]
词汇字典,各token及其ID
vocabdict: {'Modifier': 0, 'Assignment': 1, '+': 2, 'j': 3, '1': 4, '-': 5, 'BlockStatement': 6, '0': 7, 'right_border': 8, 'MemberReference': 9, '++': 10, 'ForStatement': 11, '>': 12, 'temp': 13, 'length': 14, 'DoStatement': 15, 'FormalParameter': 16, 'ForControl': 17, 'float': 18, 'static': 19, 'IfStatement': 20, 'int': 21, 'ArraySelector': 22, 'BubbleSortFloat2': 23, '<': 24, 'public': 25, 'VariableDeclarator': 26, 'BasicType': 27, 'num': 28, 'MethodDeclaration': 29, '=': 30, 'VariableDeclaration': 31, 'Literal': 32, 'BinaryOperation': 33, 'StatementExpression': 34, 'last_exchange': 35, 'LocalVariableDeclaration': 36}
以上是纯AST,下面是作者加入了一些IfStatement、WhileStatement、ForStatement、SwitchStatement等一些控制流edges之后得到的“加强AST”的边:
edgesrc: [0, 1, 1, 2, 1, 3, 0, 4, 0, 5, 5, 6, 6, 7, 5, 8, 0, 9, 9, 10, 10, 11, 9, 12, 12, 13, 0, 14, 14, 15, 15, 16, 14, 17, 17, 18, 17, 19, 19, 20, 19, 21, 21, 22, 21, 23, 19, 24, 24, 25, 0, 26, 26, 27, 27, 28, 27, 29, 29, 30, 27, 31, 31, 32, 26, 33, 33, 34, 34, 35, 35, 36, 36, 37, 35, 38, 38, 39, 35, 40, 33, 41, 41, 42, 42, 43, 43, 44, 44, 45, 43, 46, 46, 47, 46, 48, 48, 49, 42, 50, 50, 51, 50, 52, 52, 53, 50, 54, 54, 55, 54, 56, 56, 57, 56, 58, 54, 59, 59, 60, 42, 61, 61, 62, 61, 63, 41, 64, 64, 65, 65, 66, 66, 67, 66, 68, 68, 69, 69, 70, 70, 71, 68, 72, 66, 73, 73, 74, 74, 75, 75, 76, 75, 77, 77, 78, 75, 79, 79, 80, 73, 81, 65, 82, 82, 83, 83, 84, 84, 85, 83, 86, 86, 87, 86, 88, 88, 89, 89, 90, 90, 91, 88, 92, 82, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 95, 99, 94, 100, 100, 101, 101, 102, 102, 103, 102, 104, 104, 105, 102, 106, 106, 107, 100, 108, 94, 109, 82, 110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 114, 116, 116, 117, 114, 118, 118, 119, 112, 120, 111, 121, 121, 122, 111, 123, 82, 124, 124, 125, 125, 126, 126, 127, 125, 128, 128, 129, 125, 130, 33, 131, 131, 132, 132, 133, 133, 134, 132, 135, 135, 136, 132, 137, 1, 4, 4, 5, 5, 9, 9, 14, 14, 26, 2, 3, 6, 8, 10, 12, 15, 17, 18, 19, 20, 21, 21, 24, 22, 23, 27, 33, 28, 29, 29, 31, 34, 41, 41, 131, 36, 38, 38, 40, 42, 64, 43, 50, 50, 61, 44, 46, 47, 48, 51, 52, 52, 54, 55, 56, 56, 59, 57, 58, 62, 63, 66, 82, 67, 68, 68, 73, 69, 72, 74, 81, 76, 77, 77, 79, 83, 93, 93, 110, 110, 124, 84, 86, 87, 88, 89, 92, 95, 100, 100, 109, 96, 99, 101, 108, 103, 104, 104, 106, 112, 121, 121, 123, 113, 120, 115, 116, 116, 118, 126, 128, 128, 130, 133, 135, 135, 137, 42, 64, 66, 82, 34, 41, 41, 131, 83, 93, 93, 110, 110, 124, 2, 3, 3, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 16, 16, 18, 18, 20, 20, 22, 22, 23, 23, 25, 25, 28, 28, 30, 30, 32, 32, 37, 37, 39, 39, 40, 40, 45, 45, 47, 47, 49, 49, 51, 51, 53, 53, 55, 55, 57, 57, 58, 58, 60, 60, 62, 62, 63, 63, 67, 67, 71, 71, 72, 72, 76, 76, 78, 78, 80, 80, 81, 81, 85, 85, 87, 87, 91, 91, 92, 92, 98, 98, 99, 99, 103, 103, 105, 105, 107, 107, 108, 108, 109, 109, 115, 115, 117, 117, 119, 119, 120, 120, 122, 122, 123, 123, 127, 127, 129, 129, 130, 130, 134, 134, 136, 136, 137, 23, 58, 30, 134, 37, 127, 127, 136, 53, 63, 63, 71, 71, 78, 78, 91, 91, 98, 98, 105, 105, 117, 117, 129, 72, 81, 81, 92, 92, 99, 99, 108, 108, 120]
edgetgt: [1, 0, 2, 1, 3, 1, 4, 0, 5, 0, 6, 5, 7, 6, 8, 5, 9, 0, 10, 9, 11, 10, 12, 9, 13, 12, 14, 0, 15, 14, 16, 15, 17, 14, 18, 17, 19, 17, 20, 19, 21, 19, 22, 21, 23, 21, 24, 19, 25, 24, 26, 0, 27, 26, 28, 27, 29, 27, 30, 29, 31, 27, 32, 31, 33, 26, 34, 33, 35, 34, 36, 35, 37, 36, 38, 35, 39, 38, 40, 35, 41, 33, 42, 41, 43, 42, 44, 43, 45, 44, 46, 43, 47, 46, 48, 46, 49, 48, 50, 42, 51, 50, 52, 50, 53, 52, 54, 50, 55, 54, 56, 54, 57, 56, 58, 56, 59, 54, 60, 59, 61, 42, 62, 61, 63, 61, 64, 41, 65, 64, 66, 65, 67, 66, 68, 66, 69, 68, 70, 69, 71, 70, 72, 68, 73, 66, 74, 73, 75, 74, 76, 75, 77, 75, 78, 77, 79, 75, 80, 79, 81, 73, 82, 65, 83, 82, 84, 83, 85, 84, 86, 83, 87, 86, 88, 86, 89, 88, 90, 89, 91, 90, 92, 88, 93, 82, 94, 93, 95, 94, 96, 95, 97, 96, 98, 97, 99, 95, 100, 94, 101, 100, 102, 101, 103, 102, 104, 102, 105, 104, 106, 102, 107, 106, 108, 100, 109, 94, 110, 82, 111, 110, 112, 111, 113, 112, 114, 113, 115, 114, 116, 114, 117, 116, 118, 114, 119, 118, 120, 112, 121, 111, 122, 121, 123, 111, 124, 82, 125, 124, 126, 125, 127, 126, 128, 125, 129, 128, 130, 125, 131, 33, 132, 131, 133, 132, 134, 133, 135, 132, 136, 135, 137, 132, 4, 1, 5, 4, 9, 5, 14, 9, 26, 14, 3, 2, 8, 6, 12, 10, 17, 15, 19, 18, 21, 20, 24, 21, 23, 22, 33, 27, 29, 28, 31, 29, 41, 34, 131, 41, 38, 36, 40, 38, 64, 42, 50, 43, 61, 50, 46, 44, 48, 47, 52, 51, 54, 52, 56, 55, 59, 56, 58, 57, 63, 62, 82, 66, 68, 67, 73, 68, 72, 69, 81, 74, 77, 76, 79, 77, 93, 83, 110, 93, 124, 110, 86, 84, 88, 87, 92, 89, 100, 95, 109, 100, 99, 96, 108, 101, 104, 103, 106, 104, 121, 112, 123, 121, 120, 113, 116, 115, 118, 116, 128, 126, 130, 128, 135, 133, 137, 135, 64, 42, 82, 66, 41, 34, 131, 41, 93, 83, 110, 93, 124, 110, 3, 2, 4, 3, 7, 4, 8, 7, 11, 8, 13, 11, 16, 13, 18, 16, 20, 18, 22, 20, 23, 22, 25, 23, 28, 25, 30, 28, 32, 30, 37, 32, 39, 37, 40, 39, 45, 40, 47, 45, 49, 47, 51, 49, 53, 51, 55, 53, 57, 55, 58, 57, 60, 58, 62, 60, 63, 62, 67, 63, 71, 67, 72, 71, 76, 72, 78, 76, 80, 78, 81, 80, 85, 81, 87, 85, 91, 87, 92, 91, 98, 92, 99, 98, 103, 99, 105, 103, 107, 105, 108, 107, 109, 108, 115, 109, 117, 115, 119, 117, 120, 119, 122, 120, 123, 122, 127, 123, 129, 127, 130, 129, 134, 130, 136, 134, 137, 136, 58, 23, 134, 30, 127, 37, 136, 127, 63, 53, 71, 63, 78, 71, 91, 78, 98, 91, 105, 98, 117, 105, 129, 117, 81, 72, 92, 81, 99, 92, 108, 99, 120, 108]
x: [[33], [36], [25], [31], [11], [28], [5], [18], [29], [3], [5], [9], [12], [35], [3], [5], [9], [12], [0], [2], [23], [19], [29], [8], [16], [26], [27], [2], [13], [19], [0], [16], [22], [30], [1], [32], [19], [35], [16], [22], [14], [15], [34], [7], [5], [9], [12], [17], [16], [22], [2], [10], [19], [17], [2], [23], [19], [29], [8], [16], [26], [19], [6], [17], [30], [21], [2], [13], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [30], [3], [5], [18], [12], [4], [19], [24], [19], [17], [29], [1], [32], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [14], [1], [32], [19], [24], [2], [20], [19], [17], [16], [26], [29], [19], [4], [14], [1], [32], [19], [35], [19], [17], [14], [1], [32], [19], [0], [19], [35], [14]]
加强边代码:
import javalang
from javalang.ast import Node
import os
from anytree import AnyNode, RenderTree
#代码数据预处理
programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()
#print(programast)
tree = programast
#得到AST需要的数据,递归各节点遍历出一棵树 tree
def get_token(node):
token = ''
#print(isinstance(node, Node))
#print(type(node))
if isinstance(node, str):
token = node
elif isinstance(node, set):
token = 'Modifier'
elif isinstance(node, Node):
token = node.__class__.__name__
#print(node.__class__.__name__,str(node))
#print(node.__class__.__name__, node)
return token
def get_child(root):
#print(root)
if isinstance(root, Node):
children = root.children
elif isinstance(root, set):
children = list(root)
else:
children = []
def expand(nested_list):
for item in nested_list:
if isinstance(item, list):
for sub_item in expand(item):
#print(sub_item)
yield sub_item
elif item:
#print(item)
yield item
return list(expand(children))
def createtree(root,node,nodelist,parent=None):
id = len(nodelist)
#print(id)
token, children = get_token(node), get_child(node)
if id==0:
root.token=token
root.data=node
else:
newnode=AnyNode(id=id,token=token,data=node,parent=parent)
nodelist.append(node)
for child in children:
if id==0:
createtree(root,child, nodelist, parent=root)
else:
createtree(root,child, nodelist, parent=newnode)
nodelist = []
newtree=AnyNode(id=0,token=None,data=None)
createtree(newtree, tree, nodelist)
#print(newtree)
def get_sequence(node, sequence):
token, children = get_token(node), get_child(node)
sequence.append(token)
#print(len(sequence), token)
for child in children:
get_sequence(child, sequence)
alltokens=[]
get_sequence(programast,alltokens)
#统计各种语句数量#################################
ifcount=0
whilecount=0
forcount=0
blockcount=0
docount = 0
switchcount = 0
for token in alltokens:
if token=='IfStatement':
ifcount+=1
if token=='WhileStatement':
whilecount+=1
if token=='ForStatement':
forcount+=1
if token=='BlockStatement':
blockcount+=1
if token=='DoStatement':
docount+=1
if token=='SwitchStatement':
switchcount+=1
print(ifcount,whilecount,forcount,blockcount,docount,switchcount)
################################################
print('allnodes: ',len(alltokens)) #allnodes: 138
alltokens=list(set(alltokens))
print("token set size:", len(alltokens)) #去重后 37
vocabsize = len(alltokens)
tokenids = range(vocabsize)
vocabdict = dict(zip(alltokens, tokenids))
print("vocabsize: ",vocabsize) #token数量 #vocabsize: 37
print("vocabdict:",vocabdict)
def getnodeandedge(node,nodeindexlist,vocabdict,src,tgt,edgetype):
token=node.token
nodeindexlist.append([vocabdict[token]])
for child in node.children:
src.append(node.id)
tgt.append(child.id)
edgetype.append([0])
src.append(child.id)
tgt.append(node.id)
edgetype.append([0])
getnodeandedge(child,nodeindexlist,vocabdict,src,tgt,edgetype)
#Tools
edges={'Nexttoken':2,'Prevtoken':3,'Nextuse':4,'Prevuse':5,'If':6,'Ifelse':7,'While':8,'For':9,'Nextstmt':10,'Prevstmt':11,'Prevsib':12}
def getedge_nextsib(node,vocabdict,src,tgt,edgetype):
token=node.token
for i in range(len(node.children)-1):
src.append(node.children[i].id)
tgt.append(node.children[i+1].id)
edgetype.append([1])
src.append(node.children[i+1].id)
tgt.append(node.children[i].id)
edgetype.append([edges['Prevsib']])
for child in node.children:
getedge_nextsib(child,vocabdict,src,tgt,edgetype)
def getedge_flow(node,vocabdict,src,tgt,edgetype,ifedge=False,whileedge=False,foredge=False):
token=node.token
if whileedge==True:
if token=='WhileStatement':
src.append(node.children[0].id)
tgt.append(node.children[1].id)
edgetype.append([edges['While']])
src.append(node.children[1].id)
tgt.append(node.children[0].id)
edgetype.append([edges['While']])
if foredge==True:
if token=='ForStatement':
src.append(node.children[0].id)
tgt.append(node.children[1].id)
edgetype.append([edges['For']])
src.append(node.children[1].id)
tgt.append(node.children[0].id)
edgetype.append([edges['For']])
'''if len(node.children[1].children)!=0:
src.append(node.children[0].id)
tgt.append(node.children[1].children[0].id)
edgetype.append(edges['For_loopstart'])
src.append(node.children[1].children[0].id)
tgt.append(node.children[0].id)
edgetype.append(edges['For_loopstart'])
src.append(node.children[1].children[-1].id)
tgt.append(node.children[0].id)
edgetype.append(edges['For_loopend'])
src.append(node.children[0].id)
tgt.append(node.children[1].children[-1].id)
edgetype.append(edges['For_loopend'])'''
#if token=='ForControl':
#print(token,len(node.children))
if ifedge==True:
if token=='IfStatement':
src.append(node.children[0].id)
tgt.append(node.children[1].id)
edgetype.append([edges['If']])
src.append(node.children[1].id)
tgt.append(node.children[0].id)
edgetype.append([edges['If']])
if len(node.children)==3:
src.append(node.children[0].id)
tgt.append(node.children[2].id)
edgetype.append([edges['Ifelse']])
src.append(node.children[2].id)
tgt.append(node.children[0].id)
edgetype.append([edges['Ifelse']])
for child in node.children:
getedge_flow(child,vocabdict,src,tgt,edgetype,ifedge,whileedge,foredge)
def getedge_nextstmt(node,vocabdict,src,tgt,edgetype):
token=node.token
if token=='BlockStatement':
for i in range(len(node.children)-1):
src.append(node.children[i].id)
tgt.append(node.children[i+1].id)
edgetype.append([edges['Nextstmt']])
src.append(node.children[i+1].id)
tgt.append(node.children[i].id)
edgetype.append([edges['Prevstmt']])
for child in node.children:
getedge_nextstmt(child,vocabdict,src,tgt,edgetype)
def getedge_nexttoken(node,vocabdict,src,tgt,edgetype,tokenlist):
def gettokenlist(node,vocabdict,edgetype,tokenlist):
token=node.token
if len(node.children)==0:
tokenlist.append(node.id)
for child in node.children:
gettokenlist(child,vocabdict,edgetype,tokenlist)
gettokenlist(node,vocabdict,edgetype,tokenlist)
for i in range(len(tokenlist)-1):
src.append(tokenlist[i])
tgt.append(tokenlist[i+1])
edgetype.append([edges['Nexttoken']])
src.append(tokenlist[i+1])
tgt.append(tokenlist[i])
edgetype.append([edges['Prevtoken']])
def getedge_nextuse(node,vocabdict,src,tgt,edgetype,variabledict):
def getvariables(node,vocabdict,edgetype,variabledict):
token=node.token
if token=='MemberReference':
for child in node.children:
if child.token==node.data.member:
variable=child.token
variablenode=child
if not variabledict.__contains__(variable):
variabledict[variable]=[variablenode.id]
else:
variabledict[variable].append(variablenode.id)
for child in node.children:
getvariables(child,vocabdict,edgetype,variabledict)
getvariables(node,vocabdict,edgetype,variabledict)
#print(variabledict)
for v in variabledict.keys():
for i in range(len(variabledict[v])-1):
src.append(variabledict[v][i])
tgt.append(variabledict[v][i+1])
edgetype.append([edges['Nextuse']])
src.append(variabledict[v][i+1])
tgt.append(variabledict[v][i])
edgetype.append([edges['Prevuse']])
# 遍历出树中所有的结点与边
x = []
edgesrc = []
edgetgt = []
edge_attr = []
nextsib=True
ifedge=True
whileedge=True
foredge=True
blockedge=True
nexttoken=True
nextuse=True
getnodeandedge(newtree, x, vocabdict, edgesrc, edgetgt, edge_attr)
if nextsib==True:
getedge_nextsib(newtree,vocabdict,edgesrc,edgetgt,edge_attr)
getedge_flow(newtree,vocabdict,edgesrc,edgetgt,edge_attr,ifedge,whileedge,foredge)
if blockedge==True:
getedge_nextstmt(newtree,vocabdict,edgesrc,edgetgt,edge_attr)
tokenlist=[]
if nexttoken==True:
getedge_nexttoken(newtree,vocabdict,edgesrc,edgetgt,edge_attr,tokenlist)
variabledict={}
if nextuse==True:
getedge_nextuse(newtree,vocabdict,edgesrc,edgetgt,edge_attr,variabledict)
#print("edgesrc:", edgesrc)
#print("edgetgt:", len(edgetgt)) #len = 558
#print("x:",x) #138
#print("vocabdict:",vocabdict)
edge_index=[edgesrc, edgetgt]
print("tree:",newtree)
edgesrc: [0, 1, 1, 2, 1, 3, 0, 4, 0, 5, 5, 6, 6, 7, 5, 8, 0, 9, 9, 10, 10, 11, 9, 12, 12, 13, 0, 14, 14, 15, 15, 16, 14, 17, 17, 18, 17, 19, 19, 20, 19, 21, 21, 22, 21, 23, 19, 24, 24, 25, 0, 26, 26, 27, 27, 28, 27, 29, 29, 30, 27, 31, 31, 32, 26, 33, 33, 34, 34, 35, 35, 36, 36, 37, 35, 38, 38, 39, 35, 40, 33, 41, 41, 42, 42, 43, 43, 44, 44, 45, 43, 46, 46, 47, 46, 48, 48, 49, 42, 50, 50, 51, 50, 52, 52, 53, 50, 54, 54, 55, 54, 56, 56, 57, 56, 58, 54, 59, 59, 60, 42, 61, 61, 62, 61, 63, 41, 64, 64, 65, 65, 66, 66, 67, 66, 68, 68, 69, 69, 70, 70, 71, 68, 72, 66, 73, 73, 74, 74, 75, 75, 76, 75, 77, 77, 78, 75, 79, 79, 80, 73, 81, 65, 82, 82, 83, 83, 84, 84, 85, 83, 86, 86, 87, 86, 88, 88, 89, 89, 90, 90, 91, 88, 92, 82, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 95, 99, 94, 100, 100, 101, 101, 102, 102, 103, 102, 104, 104, 105, 102, 106, 106, 107, 100, 108, 94, 109, 82, 110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 114, 116, 116, 117, 114, 118, 118, 119, 112, 120, 111, 121, 121, 122, 111, 123, 82, 124, 124, 125, 125, 126, 126, 127, 125, 128, 128, 129, 125, 130, 33, 131, 131, 132, 132, 133, 133, 134, 132, 135, 135, 136, 132, 137, 1, 4, 4, 5, 5, 9, 9, 14, 14, 26, 2, 3, 6, 8, 10, 12, 15, 17, 18, 19, 20, 21, 21, 24, 22, 23, 27, 33, 28, 29, 29, 31, 34, 41, 41, 131, 36, 38, 38, 40, 42, 64, 43, 50, 50, 61, 44, 46, 47, 48, 51, 52, 52, 54, 55, 56, 56, 59, 57, 58, 62, 63, 66, 82, 67, 68, 68, 73, 69, 72, 74, 81, 76, 77, 77, 79, 83, 93, 93, 110, 110, 124, 84, 86, 87, 88, 89, 92, 95, 100, 100, 109, 96, 99, 101, 108, 103, 104, 104, 106, 112, 121, 121, 123, 113, 120, 115, 116, 116, 118, 126, 128, 128, 130, 133, 135, 135, 137, 42, 64, 66, 82, 34, 41, 41, 131, 83, 93, 93, 110, 110, 124, 2, 3, 3, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 16, 16, 18, 18, 20, 20, 22, 22, 23, 23, 25, 25, 28, 28, 30, 30, 32, 32, 37, 37, 39, 39, 40, 40, 45, 45, 47, 47, 49, 49, 51, 51, 53, 53, 55, 55, 57, 57, 58, 58, 60, 60, 62, 62, 63, 63, 67, 67, 71, 71, 72, 72, 76, 76, 78, 78, 80, 80, 81, 81, 85, 85, 87, 87, 91, 91, 92, 92, 98, 98, 99, 99, 103, 103, 105, 105, 107, 107, 108, 108, 109, 109, 115, 115, 117, 117, 119, 119, 120, 120, 122, 122, 123, 123, 127, 127, 129, 129, 130, 130, 134, 134, 136, 136, 137, 23, 58, 30, 134, 37, 127, 127, 136, 53, 63, 63, 71, 71, 78, 78, 91, 91, 98, 98, 105, 105, 117, 117, 129, 72, 81, 81, 92, 92, 99, 99, 108, 108, 120]
edgetgt: [1, 0, 2, 1, 3, 1, 4, 0, 5, 0, 6, 5, 7, 6, 8, 5, 9, 0, 10, 9, 11, 10, 12, 9, 13, 12, 14, 0, 15, 14, 16, 15, 17, 14, 18, 17, 19, 17, 20, 19, 21, 19, 22, 21, 23, 21, 24, 19, 25, 24, 26, 0, 27, 26, 28, 27, 29, 27, 30, 29, 31, 27, 32, 31, 33, 26, 34, 33, 35, 34, 36, 35, 37, 36, 38, 35, 39, 38, 40, 35, 41, 33, 42, 41, 43, 42, 44, 43, 45, 44, 46, 43, 47, 46, 48, 46, 49, 48, 50, 42, 51, 50, 52, 50, 53, 52, 54, 50, 55, 54, 56, 54, 57, 56, 58, 56, 59, 54, 60, 59, 61, 42, 62, 61, 63, 61, 64, 41, 65, 64, 66, 65, 67, 66, 68, 66, 69, 68, 70, 69, 71, 70, 72, 68, 73, 66, 74, 73, 75, 74, 76, 75, 77, 75, 78, 77, 79, 75, 80, 79, 81, 73, 82, 65, 83, 82, 84, 83, 85, 84, 86, 83, 87, 86, 88, 86, 89, 88, 90, 89, 91, 90, 92, 88, 93, 82, 94, 93, 95, 94, 96, 95, 97, 96, 98, 97, 99, 95, 100, 94, 101, 100, 102, 101, 103, 102, 104, 102, 105, 104, 106, 102, 107, 106, 108, 100, 109, 94, 110, 82, 111, 110, 112, 111, 113, 112, 114, 113, 115, 114, 116, 114, 117, 116, 118, 114, 119, 118, 120, 112, 121, 111, 122, 121, 123, 111, 124, 82, 125, 124, 126, 125, 127, 126, 128, 125, 129, 128, 130, 125, 131, 33, 132, 131, 133, 132, 134, 133, 135, 132, 136, 135, 137, 132, 4, 1, 5, 4, 9, 5, 14, 9, 26, 14, 3, 2, 8, 6, 12, 10, 17, 15, 19, 18, 21, 20, 24, 21, 23, 22, 33, 27, 29, 28, 31, 29, 41, 34, 131, 41, 38, 36, 40, 38, 64, 42, 50, 43, 61, 50, 46, 44, 48, 47, 52, 51, 54, 52, 56, 55, 59, 56, 58, 57, 63, 62, 82, 66, 68, 67, 73, 68, 72, 69, 81, 74, 77, 76, 79, 77, 93, 83, 110, 93, 124, 110, 86, 84, 88, 87, 92, 89, 100, 95, 109, 100, 99, 96, 108, 101, 104, 103, 106, 104, 121, 112, 123, 121, 120, 113, 116, 115, 118, 116, 128, 126, 130, 128, 135, 133, 137, 135, 64, 42, 82, 66, 41, 34, 131, 41, 93, 83, 110, 93, 124, 110, 3, 2, 4, 3, 7, 4, 8, 7, 11, 8, 13, 11, 16, 13, 18, 16, 20, 18, 22, 20, 23, 22, 25, 23, 28, 25, 30, 28, 32, 30, 37, 32, 39, 37, 40, 39, 45, 40, 47, 45, 49, 47, 51, 49, 53, 51, 55, 53, 57, 55, 58, 57, 60, 58, 62, 60, 63, 62, 67, 63, 71, 67, 72, 71, 76, 72, 78, 76, 80, 78, 81, 80, 85, 81, 87, 85, 91, 87, 92, 91, 98, 92, 99, 98, 103, 99, 105, 103, 107, 105, 108, 107, 109, 108, 115, 109, 117, 115, 119, 117, 120, 119, 122, 120, 123, 122, 127, 123, 129, 127, 130, 129, 134, 130, 136, 134, 137, 136, 58, 23, 134, 30, 127, 37, 136, 127, 63, 53, 71, 63, 78, 71, 91, 78, 98, 91, 105, 98, 117, 105, 129, 117, 81, 72, 92, 81, 99, 92, 108, 99, 120, 108]
x: [[33], [36], [25], [31], [11], [28], [5], [18], [29], [3], [5], [9], [12], [35], [3], [5], [9], [12], [0], [2], [23], [19], [29], [8], [16], [26], [27], [2], [13], [19], [0], [16], [22], [30], [1], [32], [19], [35], [16], [22], [14], [15], [34], [7], [5], [9], [12], [17], [16], [22], [2], [10], [19], [17], [2], [23], [19], [29], [8], [16], [26], [19], [6], [17], [30], [21], [2], [13], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [30], [3], [5], [18], [12], [4], [19], [24], [19], [17], [29], [1], [32], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [14], [1], [32], [19], [24], [2], [20], [19], [17], [16], [26], [29], [19], [4], [14], [1], [32], [19], [35], [19], [17], [14], [1], [32], [19], [0], [19], [35], [14]]
vocabdict: {'num': 0, 'FormalParameter': 1, '++': 2, 'right_border': 3, 'StatementExpression': 4, '-': 5, '0': 6, 'ArraySelector': 7, 'public': 8, '>': 9, 'DoStatement': 10, '+': 11, 'MethodDeclaration': 12, 'int': 13, '1': 14, 'last_exchange': 15, 'length': 16, 'ForControl': 17, 'float': 18, 'IfStatement': 19, 'Assignment': 20, 'MemberReference': 21, 'VariableDeclaration': 22, 'BubbleSortFloat2': 23, 'BinaryOperation': 24, 'LocalVariableDeclaration': 25, 'j': 26, 'static': 27, 'Literal': 28, 'BasicType': 29, 'temp': 30, 'Modifier': 31, 'ForStatement': 32, 'BlockStatement': 33, '=': 34, 'VariableDeclarator': 35, '<': 36}
又回到最初的七点,Let me 康康最后得到的“带BUFF”的AST数据:确实,凭肉眼什么区别也看不出!!或许边多了乎??或许没有、只是记录到edge_index中了?最后得到的最有用的数据应该就是上面这四行了。over...
tree: AnyNode(data=MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'static', 'public'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None), id=0, token='MethodDeclaration')
标签:node,qualifier,AST,postfix,operators,selectors,----,prefix,源码 来源: https://blog.csdn.net/qq_35294564/article/details/115342407