编程语言
首页 > 编程语言> > javalang 生成抽象语法树AST ----python源码分析

javalang 生成抽象语法树AST ----python源码分析

作者:互联网

维基百科中说:

        在计算机科学中,抽象语法树Abstract Syntax Tree,AST),或简称语法树(Syntax tree),是源代码语法结构的一种抽象表示。它以树状的形式表现编程语言的语法结构,树上的每个节点都表示源代码中的一种结构。之所以说语法是“抽象”的,是因为这里的语法并不会表示出真实语法中出现的每个细节。比如,嵌套括号被隐含在树的结构中,并没有以节点的形式呈现;而类似于 if-condition-then 这样的条件跳转语句,可以使用带有三个分支的节点来表示。

如辗转相除法:while b ≠ 0 if a > b a := a − b else b := b − a return a 的抽象语法树为:

目标文件code.txt:

	public static void BubbleSortFloat2(float[] num) {
		int last_exchange;
		int right_border = num.length - 1;
		do {
			last_exchange = 0;
			for (int j = 0; j < num.length - 1; j++) {
				if (num[j] > num[j + 1])
				{
					float temp = num[j];
					num[j] = num[j + 1];
					num[j + 1] = temp;
					last_exchange = j;
				}
			}
			right_border = last_exchange;
		} while (right_border > 0);
	}

使用python包javalang对上述java文件进行解析:

import javalang
import os

programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()

print(programast)

打印结果类型为<class 'javalang.tree.MethodDeclaration'>,具体内容如下

MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None)

 换行后如下,可以看出body里面的存在java源代码中的元素,

MethodDeclaration中包含:

LocalVariableDeclaration  :局部变量 *3

Statement  :for、if-else、do语句等

operator  :操作符


 MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None)


接下来,遍历以上信息新建一棵树,代码续上啊!

import javalang
from javalang.ast import Node
import os
from anytree import AnyNode, RenderTree

#代码数据预处理
programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()
#print(programast)
tree = programast
#得到AST需要的数据,递归各节点遍历出一棵树 tree

def get_token(node):
    token = ''
    #print(isinstance(node, Node))
    #print(type(node))
    if isinstance(node, str):
        token = node
    elif isinstance(node, set):
        token = 'Modifier'
    elif isinstance(node, Node):
        token = node.__class__.__name__
    #print(node.__class__.__name__,str(node))
    #print(node.__class__.__name__, node)
    return token
def get_child(root):
    #print(root)
    if isinstance(root, Node):
        children = root.children
    elif isinstance(root, set):
        children = list(root)
    else:
        children = []

    def expand(nested_list):
        for item in nested_list:
            if isinstance(item, list):
                for sub_item in expand(item):
                    #print(sub_item)
                    yield sub_item
            elif item:
                #print(item)
                yield item
    return list(expand(children))
def createtree(root,node,nodelist,parent=None):
    id = len(nodelist)
    #print(id)
    token, children = get_token(node), get_child(node)
    if id==0:
        root.token=token
        root.data=node
    else:
        newnode=AnyNode(id=id,token=token,data=node,parent=parent)
    nodelist.append(node)
    for child in children:
        if id==0:
            createtree(root,child, nodelist, parent=root)
        else:
            createtree(root,child, nodelist, parent=newnode)


nodelist = []
newtree=AnyNode(id=0,token=None,data=None)
createtree(newtree, tree, nodelist)

print(newtree)

打印出结果,好似与上面输出差别不太大:

AnyNode(data=MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None), id=0, token='MethodDeclaration')

AnyNode(data=MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'public', 'static'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None), id=0, token='MethodDeclaration')

import javalang
from javalang.ast import Node
import os
from anytree import AnyNode, RenderTree

#代码数据预处理
programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()
#print(programast)
tree = programast
#得到AST需要的数据,递归各节点遍历出一棵树 tree

def get_token(node):
    token = ''
    #print(isinstance(node, Node))
    #print(type(node))
    if isinstance(node, str):
        token = node
    elif isinstance(node, set):
        token = 'Modifier'
    elif isinstance(node, Node):
        token = node.__class__.__name__
    #print(node.__class__.__name__,str(node))
    #print(node.__class__.__name__, node)
    return token
def get_child(root):
    #print(root)
    if isinstance(root, Node):
        children = root.children
    elif isinstance(root, set):
        children = list(root)
    else:
        children = []

    def expand(nested_list):
        for item in nested_list:
            if isinstance(item, list):
                for sub_item in expand(item):
                    #print(sub_item)
                    yield sub_item
            elif item:
                #print(item)
                yield item
    return list(expand(children))
def createtree(root,node,nodelist,parent=None):
    id = len(nodelist)
    #print(id)
    token, children = get_token(node), get_child(node)
    if id==0:
        root.token=token
        root.data=node
    else:
        newnode=AnyNode(id=id,token=token,data=node,parent=parent)
    nodelist.append(node)
    for child in children:
        if id==0:
            createtree(root,child, nodelist, parent=root)
        else:
            createtree(root,child, nodelist, parent=newnode)


nodelist = []
newtree=AnyNode(id=0,token=None,data=None)
createtree(newtree, tree, nodelist)

#print(newtree)


def get_sequence(node, sequence):
    token, children = get_token(node), get_child(node)
    sequence.append(token)
    #print(len(sequence), token)
    for child in children:
        get_sequence(child, sequence)



alltokens=[]
get_sequence(programast,alltokens)

#统计各种语句数量#################################
ifcount=0
whilecount=0
forcount=0
blockcount=0
docount = 0
switchcount = 0
for token in alltokens:
    if token=='IfStatement':
        ifcount+=1
    if token=='WhileStatement':
        whilecount+=1
    if token=='ForStatement':
        forcount+=1
    if token=='BlockStatement':
        blockcount+=1
    if token=='DoStatement':
        docount+=1
    if token=='SwitchStatement':
        switchcount+=1
################################################
print('allnodes: ',len(alltokens))  #allnodes:  138
alltokens=list(set(alltokens))
vocabsize = len(alltokens)
tokenids = range(vocabsize)
vocabdict = dict(zip(alltokens, tokenids))
print("vocabsize: ",vocabsize) #token数量 #vocabsize:  37

def getnodeandedge_astonly(node,nodeindexlist,vocabdict,src,tgt):
    token=node.token
    nodeindexlist.append([vocabdict[token]])
    for child in node.children:
        src.append(node.id)
        tgt.append(child.id)
        src.append(child.id)
        tgt.append(node.id)
        getnodeandedge_astonly(child,nodeindexlist,vocabdict,src,tgt)
# 遍历出树中所有的结点与边
x = []
edgesrc = []
edgetgt = []
getnodeandedge_astonly(newtree, x, vocabdict, edgesrc, edgetgt)

print("edgesrc:", edgesrc)
print("edgetgt:", edgetgt)

打印出所有的边,用结点id表示,记录了两个方向的双向边,即从父结点到子结点,从子结点到父结点,两个列表上下一一对应,x是递归当前结点在vocabdict中对应的ID:

edgesrc: [0, 1, 1, 2, 1, 3, 0, 4, 0, 5, 5, 6, 6, 7, 5, 8, 0, 9, 9, 10, 10, 11, 9, 12, 12, 13, 0, 14, 14, 15, 15, 16, 14, 17, 17, 18, 17, 19, 19, 20, 19, 21, 21, 22, 21, 23, 19, 24, 24, 25, 0, 26, 26, 27, 27, 28, 27, 29, 29, 30, 27, 31, 31, 32, 26, 33, 33, 34, 34, 35, 35, 36, 36, 37, 35, 38, 38, 39, 35, 40, 33, 41, 41, 42, 42, 43, 43, 44, 44, 45, 43, 46, 46, 47, 46, 48, 48, 49, 42, 50, 50, 51, 50, 52, 52, 53, 50, 54, 54, 55, 54, 56, 56, 57, 56, 58, 54, 59, 59, 60, 42, 61, 61, 62, 61, 63, 41, 64, 64, 65, 65, 66, 66, 67, 66, 68, 68, 69, 69, 70, 70, 71, 68, 72, 66, 73, 73, 74, 74, 75, 75, 76, 75, 77, 77, 78, 75, 79, 79, 80, 73, 81, 65, 82, 82, 83, 83, 84, 84, 85, 83, 86, 86, 87, 86, 88, 88, 89, 89, 90, 90, 91, 88, 92, 82, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 95, 99, 94, 100, 100, 101, 101, 102, 102, 103, 102, 104, 104, 105, 102, 106, 106, 107, 100, 108, 94, 109, 82, 110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 114, 116, 116, 117, 114, 118, 118, 119, 112, 120, 111, 121, 121, 122, 111, 123, 82, 124, 124, 125, 125, 126, 126, 127, 125, 128, 128, 129, 125, 130, 33, 131, 131, 132, 132, 133, 133, 134, 132, 135, 135, 136, 132, 137]
edgetgt: [1, 0, 2, 1, 3, 1, 4, 0, 5, 0, 6, 5, 7, 6, 8, 5, 9, 0, 10, 9, 11, 10, 12, 9, 13, 12, 14, 0, 15, 14, 16, 15, 17, 14, 18, 17, 19, 17, 20, 19, 21, 19, 22, 21, 23, 21, 24, 19, 25, 24, 26, 0, 27, 26, 28, 27, 29, 27, 30, 29, 31, 27, 32, 31, 33, 26, 34, 33, 35, 34, 36, 35, 37, 36, 38, 35, 39, 38, 40, 35, 41, 33, 42, 41, 43, 42, 44, 43, 45, 44, 46, 43, 47, 46, 48, 46, 49, 48, 50, 42, 51, 50, 52, 50, 53, 52, 54, 50, 55, 54, 56, 54, 57, 56, 58, 56, 59, 54, 60, 59, 61, 42, 62, 61, 63, 61, 64, 41, 65, 64, 66, 65, 67, 66, 68, 66, 69, 68, 70, 69, 71, 70, 72, 68, 73, 66, 74, 73, 75, 74, 76, 75, 77, 75, 78, 77, 79, 75, 80, 79, 81, 73, 82, 65, 83, 82, 84, 83, 85, 84, 86, 83, 87, 86, 88, 86, 89, 88, 90, 89, 91, 90, 92, 88, 93, 82, 94, 93, 95, 94, 96, 95, 97, 96, 98, 97, 99, 95, 100, 94, 101, 100, 102, 101, 103, 102, 104, 102, 105, 104, 106, 102, 107, 106, 108, 100, 109, 94, 110, 82, 111, 110, 112, 111, 113, 112, 114, 113, 115, 114, 116, 114, 117, 116, 118, 114, 119, 118, 120, 112, 121, 111, 122, 121, 123, 111, 124, 82, 125, 124, 126, 125, 127, 126, 128, 125, 129, 128, 130, 125, 131, 33, 132, 131, 133, 132, 134, 133, 135, 132, 136, 135, 137, 132]
x:       [[34], [6], [16], [15], [19], [25], [28], [10], [9], [24], [28], [27], [14], [4], [24], [28], [27], [14], [23], [8], [11], [31], [9], [0], [12], [33], [29], [8], [1], [31], [23], [12], [21], [22], [7], [13], [31], [4], [12], [21], [2], [17], [5], [36], [28], [27], [14], [20], [12], [21], [8], [3], [31], [20], [8], [11], [31], [9], [0], [12], [33], [31], [18], [20], [22], [30], [8], [1], [31], [32], [31], [20], [9], [31], [32], [8], [35], [31], [20], [12], [33], [9], [22], [24], [28], [10], [14], [26], [31], [32], [31], [20], [9], [7], [13], [31], [32], [31], [20], [9], [31], [32], [8], [35], [31], [20], [12], [33], [9], [2], [7], [13], [31], [32], [8], [35], [31], [20], [12], [33], [9], [31], [26], [2], [7], [13], [31], [4], [31], [20], [2], [7], [13], [31], [23], [31], [4], [2]]

 词汇字典,各token及其ID

vocabdict: {'Modifier': 0, 'Assignment': 1, '+': 2, 'j': 3, '1': 4, '-': 5, 'BlockStatement': 6, '0': 7, 'right_border': 8, 'MemberReference': 9, '++': 10, 'ForStatement': 11, '>': 12, 'temp': 13, 'length': 14, 'DoStatement': 15, 'FormalParameter': 16, 'ForControl': 17, 'float': 18, 'static': 19, 'IfStatement': 20, 'int': 21, 'ArraySelector': 22, 'BubbleSortFloat2': 23, '<': 24, 'public': 25, 'VariableDeclarator': 26, 'BasicType': 27, 'num': 28, 'MethodDeclaration': 29, '=': 30, 'VariableDeclaration': 31, 'Literal': 32, 'BinaryOperation': 33, 'StatementExpression': 34, 'last_exchange': 35, 'LocalVariableDeclaration': 36}

以上是纯AST,下面是作者加入了一些IfStatement、WhileStatement、ForStatement、SwitchStatement等一些控制流edges之后得到的“加强AST”的边:

edgesrc: [0, 1, 1, 2, 1, 3, 0, 4, 0, 5, 5, 6, 6, 7, 5, 8, 0, 9, 9, 10, 10, 11, 9, 12, 12, 13, 0, 14, 14, 15, 15, 16, 14, 17, 17, 18, 17, 19, 19, 20, 19, 21, 21, 22, 21, 23, 19, 24, 24, 25, 0, 26, 26, 27, 27, 28, 27, 29, 29, 30, 27, 31, 31, 32, 26, 33, 33, 34, 34, 35, 35, 36, 36, 37, 35, 38, 38, 39, 35, 40, 33, 41, 41, 42, 42, 43, 43, 44, 44, 45, 43, 46, 46, 47, 46, 48, 48, 49, 42, 50, 50, 51, 50, 52, 52, 53, 50, 54, 54, 55, 54, 56, 56, 57, 56, 58, 54, 59, 59, 60, 42, 61, 61, 62, 61, 63, 41, 64, 64, 65, 65, 66, 66, 67, 66, 68, 68, 69, 69, 70, 70, 71, 68, 72, 66, 73, 73, 74, 74, 75, 75, 76, 75, 77, 77, 78, 75, 79, 79, 80, 73, 81, 65, 82, 82, 83, 83, 84, 84, 85, 83, 86, 86, 87, 86, 88, 88, 89, 89, 90, 90, 91, 88, 92, 82, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 95, 99, 94, 100, 100, 101, 101, 102, 102, 103, 102, 104, 104, 105, 102, 106, 106, 107, 100, 108, 94, 109, 82, 110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 114, 116, 116, 117, 114, 118, 118, 119, 112, 120, 111, 121, 121, 122, 111, 123, 82, 124, 124, 125, 125, 126, 126, 127, 125, 128, 128, 129, 125, 130, 33, 131, 131, 132, 132, 133, 133, 134, 132, 135, 135, 136, 132, 137, 1, 4, 4, 5, 5, 9, 9, 14, 14, 26, 2, 3, 6, 8, 10, 12, 15, 17, 18, 19, 20, 21, 21, 24, 22, 23, 27, 33, 28, 29, 29, 31, 34, 41, 41, 131, 36, 38, 38, 40, 42, 64, 43, 50, 50, 61, 44, 46, 47, 48, 51, 52, 52, 54, 55, 56, 56, 59, 57, 58, 62, 63, 66, 82, 67, 68, 68, 73, 69, 72, 74, 81, 76, 77, 77, 79, 83, 93, 93, 110, 110, 124, 84, 86, 87, 88, 89, 92, 95, 100, 100, 109, 96, 99, 101, 108, 103, 104, 104, 106, 112, 121, 121, 123, 113, 120, 115, 116, 116, 118, 126, 128, 128, 130, 133, 135, 135, 137, 42, 64, 66, 82, 34, 41, 41, 131, 83, 93, 93, 110, 110, 124, 2, 3, 3, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 16, 16, 18, 18, 20, 20, 22, 22, 23, 23, 25, 25, 28, 28, 30, 30, 32, 32, 37, 37, 39, 39, 40, 40, 45, 45, 47, 47, 49, 49, 51, 51, 53, 53, 55, 55, 57, 57, 58, 58, 60, 60, 62, 62, 63, 63, 67, 67, 71, 71, 72, 72, 76, 76, 78, 78, 80, 80, 81, 81, 85, 85, 87, 87, 91, 91, 92, 92, 98, 98, 99, 99, 103, 103, 105, 105, 107, 107, 108, 108, 109, 109, 115, 115, 117, 117, 119, 119, 120, 120, 122, 122, 123, 123, 127, 127, 129, 129, 130, 130, 134, 134, 136, 136, 137, 23, 58, 30, 134, 37, 127, 127, 136, 53, 63, 63, 71, 71, 78, 78, 91, 91, 98, 98, 105, 105, 117, 117, 129, 72, 81, 81, 92, 92, 99, 99, 108, 108, 120]
edgetgt: [1, 0, 2, 1, 3, 1, 4, 0, 5, 0, 6, 5, 7, 6, 8, 5, 9, 0, 10, 9, 11, 10, 12, 9, 13, 12, 14, 0, 15, 14, 16, 15, 17, 14, 18, 17, 19, 17, 20, 19, 21, 19, 22, 21, 23, 21, 24, 19, 25, 24, 26, 0, 27, 26, 28, 27, 29, 27, 30, 29, 31, 27, 32, 31, 33, 26, 34, 33, 35, 34, 36, 35, 37, 36, 38, 35, 39, 38, 40, 35, 41, 33, 42, 41, 43, 42, 44, 43, 45, 44, 46, 43, 47, 46, 48, 46, 49, 48, 50, 42, 51, 50, 52, 50, 53, 52, 54, 50, 55, 54, 56, 54, 57, 56, 58, 56, 59, 54, 60, 59, 61, 42, 62, 61, 63, 61, 64, 41, 65, 64, 66, 65, 67, 66, 68, 66, 69, 68, 70, 69, 71, 70, 72, 68, 73, 66, 74, 73, 75, 74, 76, 75, 77, 75, 78, 77, 79, 75, 80, 79, 81, 73, 82, 65, 83, 82, 84, 83, 85, 84, 86, 83, 87, 86, 88, 86, 89, 88, 90, 89, 91, 90, 92, 88, 93, 82, 94, 93, 95, 94, 96, 95, 97, 96, 98, 97, 99, 95, 100, 94, 101, 100, 102, 101, 103, 102, 104, 102, 105, 104, 106, 102, 107, 106, 108, 100, 109, 94, 110, 82, 111, 110, 112, 111, 113, 112, 114, 113, 115, 114, 116, 114, 117, 116, 118, 114, 119, 118, 120, 112, 121, 111, 122, 121, 123, 111, 124, 82, 125, 124, 126, 125, 127, 126, 128, 125, 129, 128, 130, 125, 131, 33, 132, 131, 133, 132, 134, 133, 135, 132, 136, 135, 137, 132, 4, 1, 5, 4, 9, 5, 14, 9, 26, 14, 3, 2, 8, 6, 12, 10, 17, 15, 19, 18, 21, 20, 24, 21, 23, 22, 33, 27, 29, 28, 31, 29, 41, 34, 131, 41, 38, 36, 40, 38, 64, 42, 50, 43, 61, 50, 46, 44, 48, 47, 52, 51, 54, 52, 56, 55, 59, 56, 58, 57, 63, 62, 82, 66, 68, 67, 73, 68, 72, 69, 81, 74, 77, 76, 79, 77, 93, 83, 110, 93, 124, 110, 86, 84, 88, 87, 92, 89, 100, 95, 109, 100, 99, 96, 108, 101, 104, 103, 106, 104, 121, 112, 123, 121, 120, 113, 116, 115, 118, 116, 128, 126, 130, 128, 135, 133, 137, 135, 64, 42, 82, 66, 41, 34, 131, 41, 93, 83, 110, 93, 124, 110, 3, 2, 4, 3, 7, 4, 8, 7, 11, 8, 13, 11, 16, 13, 18, 16, 20, 18, 22, 20, 23, 22, 25, 23, 28, 25, 30, 28, 32, 30, 37, 32, 39, 37, 40, 39, 45, 40, 47, 45, 49, 47, 51, 49, 53, 51, 55, 53, 57, 55, 58, 57, 60, 58, 62, 60, 63, 62, 67, 63, 71, 67, 72, 71, 76, 72, 78, 76, 80, 78, 81, 80, 85, 81, 87, 85, 91, 87, 92, 91, 98, 92, 99, 98, 103, 99, 105, 103, 107, 105, 108, 107, 109, 108, 115, 109, 117, 115, 119, 117, 120, 119, 122, 120, 123, 122, 127, 123, 129, 127, 130, 129, 134, 130, 136, 134, 137, 136, 58, 23, 134, 30, 127, 37, 136, 127, 63, 53, 71, 63, 78, 71, 91, 78, 98, 91, 105, 98, 117, 105, 129, 117, 81, 72, 92, 81, 99, 92, 108, 99, 120, 108]
x:       [[33], [36], [25], [31], [11], [28], [5], [18], [29], [3], [5], [9], [12], [35], [3], [5], [9], [12], [0], [2], [23], [19], [29], [8], [16], [26], [27], [2], [13], [19], [0], [16], [22], [30], [1], [32], [19], [35], [16], [22], [14], [15], [34], [7], [5], [9], [12], [17], [16], [22], [2], [10], [19], [17], [2], [23], [19], [29], [8], [16], [26], [19], [6], [17], [30], [21], [2], [13], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [30], [3], [5], [18], [12], [4], [19], [24], [19], [17], [29], [1], [32], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [14], [1], [32], [19], [24], [2], [20], [19], [17], [16], [26], [29], [19], [4], [14], [1], [32], [19], [35], [19], [17], [14], [1], [32], [19], [0], [19], [35], [14]]

加强边代码:

import javalang
from javalang.ast import Node
import os
from anytree import AnyNode, RenderTree

#代码数据预处理
programfile=open("file/code.txt",encoding='utf-8')
#print(os.path.join(rt,file))
programtext=programfile.read()
#programtext=programtext.replace('\r','')
programtokens=javalang.tokenizer.tokenize(programtext)
#print("programtokens",list(programtokens))
parser=javalang.parse.Parser(programtokens)
programast=parser.parse_member_declaration()
programfile.close()
#print(programast)
tree = programast
#得到AST需要的数据,递归各节点遍历出一棵树 tree

def get_token(node):
    token = ''
    #print(isinstance(node, Node))
    #print(type(node))
    if isinstance(node, str):
        token = node
    elif isinstance(node, set):
        token = 'Modifier'
    elif isinstance(node, Node):
        token = node.__class__.__name__
    #print(node.__class__.__name__,str(node))
    #print(node.__class__.__name__, node)
    return token
def get_child(root):
    #print(root)
    if isinstance(root, Node):
        children = root.children
    elif isinstance(root, set):
        children = list(root)
    else:
        children = []

    def expand(nested_list):
        for item in nested_list:
            if isinstance(item, list):
                for sub_item in expand(item):
                    #print(sub_item)
                    yield sub_item
            elif item:
                #print(item)
                yield item
    return list(expand(children))
def createtree(root,node,nodelist,parent=None):
    id = len(nodelist)
    #print(id)
    token, children = get_token(node), get_child(node)
    if id==0:
        root.token=token
        root.data=node
    else:
        newnode=AnyNode(id=id,token=token,data=node,parent=parent)
    nodelist.append(node)
    for child in children:
        if id==0:
            createtree(root,child, nodelist, parent=root)
        else:
            createtree(root,child, nodelist, parent=newnode)


nodelist = []
newtree=AnyNode(id=0,token=None,data=None)
createtree(newtree, tree, nodelist)

#print(newtree)


def get_sequence(node, sequence):
    token, children = get_token(node), get_child(node)
    sequence.append(token)
    #print(len(sequence), token)
    for child in children:
        get_sequence(child, sequence)



alltokens=[]
get_sequence(programast,alltokens)

#统计各种语句数量#################################
ifcount=0
whilecount=0
forcount=0
blockcount=0
docount = 0
switchcount = 0
for token in alltokens:
    if token=='IfStatement':
        ifcount+=1
    if token=='WhileStatement':
        whilecount+=1
    if token=='ForStatement':
        forcount+=1
    if token=='BlockStatement':
        blockcount+=1
    if token=='DoStatement':
        docount+=1
    if token=='SwitchStatement':
        switchcount+=1
print(ifcount,whilecount,forcount,blockcount,docount,switchcount)
################################################
print('allnodes: ',len(alltokens))  #allnodes:  138
alltokens=list(set(alltokens))
print("token set size:", len(alltokens))  #去重后 37
vocabsize = len(alltokens)
tokenids = range(vocabsize)
vocabdict = dict(zip(alltokens, tokenids))
print("vocabsize: ",vocabsize) #token数量 #vocabsize:  37
print("vocabdict:",vocabdict)

def getnodeandedge(node,nodeindexlist,vocabdict,src,tgt,edgetype):
    token=node.token
    nodeindexlist.append([vocabdict[token]])
    for child in node.children:
        src.append(node.id)
        tgt.append(child.id)
        edgetype.append([0])
        src.append(child.id)
        tgt.append(node.id)
        edgetype.append([0])
        getnodeandedge(child,nodeindexlist,vocabdict,src,tgt,edgetype)

#Tools
edges={'Nexttoken':2,'Prevtoken':3,'Nextuse':4,'Prevuse':5,'If':6,'Ifelse':7,'While':8,'For':9,'Nextstmt':10,'Prevstmt':11,'Prevsib':12}
def getedge_nextsib(node,vocabdict,src,tgt,edgetype):
    token=node.token
    for i in range(len(node.children)-1):
        src.append(node.children[i].id)
        tgt.append(node.children[i+1].id)
        edgetype.append([1])
        src.append(node.children[i+1].id)
        tgt.append(node.children[i].id)
        edgetype.append([edges['Prevsib']])
    for child in node.children:
        getedge_nextsib(child,vocabdict,src,tgt,edgetype)
def getedge_flow(node,vocabdict,src,tgt,edgetype,ifedge=False,whileedge=False,foredge=False):
    token=node.token
    if whileedge==True:
        if token=='WhileStatement':
            src.append(node.children[0].id)
            tgt.append(node.children[1].id)
            edgetype.append([edges['While']])
            src.append(node.children[1].id)
            tgt.append(node.children[0].id)
            edgetype.append([edges['While']])
    if foredge==True:
        if token=='ForStatement':
            src.append(node.children[0].id)
            tgt.append(node.children[1].id)
            edgetype.append([edges['For']])
            src.append(node.children[1].id)
            tgt.append(node.children[0].id)
            edgetype.append([edges['For']])
            '''if len(node.children[1].children)!=0:
                src.append(node.children[0].id)
                tgt.append(node.children[1].children[0].id)
                edgetype.append(edges['For_loopstart'])
                src.append(node.children[1].children[0].id)
                tgt.append(node.children[0].id)
                edgetype.append(edges['For_loopstart'])
                src.append(node.children[1].children[-1].id)
                tgt.append(node.children[0].id)
                edgetype.append(edges['For_loopend'])
                src.append(node.children[0].id)
                tgt.append(node.children[1].children[-1].id)
                edgetype.append(edges['For_loopend'])'''
    #if token=='ForControl':
        #print(token,len(node.children))
    if ifedge==True:
        if token=='IfStatement':
            src.append(node.children[0].id)
            tgt.append(node.children[1].id)
            edgetype.append([edges['If']])
            src.append(node.children[1].id)
            tgt.append(node.children[0].id)
            edgetype.append([edges['If']])
            if len(node.children)==3:
                src.append(node.children[0].id)
                tgt.append(node.children[2].id)
                edgetype.append([edges['Ifelse']])
                src.append(node.children[2].id)
                tgt.append(node.children[0].id)
                edgetype.append([edges['Ifelse']])
    for child in node.children:
        getedge_flow(child,vocabdict,src,tgt,edgetype,ifedge,whileedge,foredge)
def getedge_nextstmt(node,vocabdict,src,tgt,edgetype):
    token=node.token
    if token=='BlockStatement':
        for i in range(len(node.children)-1):
            src.append(node.children[i].id)
            tgt.append(node.children[i+1].id)
            edgetype.append([edges['Nextstmt']])
            src.append(node.children[i+1].id)
            tgt.append(node.children[i].id)
            edgetype.append([edges['Prevstmt']])
    for child in node.children:
        getedge_nextstmt(child,vocabdict,src,tgt,edgetype)
def getedge_nexttoken(node,vocabdict,src,tgt,edgetype,tokenlist):
    def gettokenlist(node,vocabdict,edgetype,tokenlist):
        token=node.token
        if len(node.children)==0:
            tokenlist.append(node.id)
        for child in node.children:
            gettokenlist(child,vocabdict,edgetype,tokenlist)
    gettokenlist(node,vocabdict,edgetype,tokenlist)
    for i in range(len(tokenlist)-1):
            src.append(tokenlist[i])
            tgt.append(tokenlist[i+1])
            edgetype.append([edges['Nexttoken']])
            src.append(tokenlist[i+1])
            tgt.append(tokenlist[i])
            edgetype.append([edges['Prevtoken']])
def getedge_nextuse(node,vocabdict,src,tgt,edgetype,variabledict):
    def getvariables(node,vocabdict,edgetype,variabledict):
        token=node.token
        if token=='MemberReference':
            for child in node.children:
                if child.token==node.data.member:
                    variable=child.token
                    variablenode=child
            if not variabledict.__contains__(variable):
                variabledict[variable]=[variablenode.id]
            else:
                variabledict[variable].append(variablenode.id)      
        for child in node.children:
            getvariables(child,vocabdict,edgetype,variabledict)
    getvariables(node,vocabdict,edgetype,variabledict)
    #print(variabledict)
    for v in variabledict.keys():
        for i in range(len(variabledict[v])-1):
                src.append(variabledict[v][i])
                tgt.append(variabledict[v][i+1])
                edgetype.append([edges['Nextuse']])
                src.append(variabledict[v][i+1])
                tgt.append(variabledict[v][i])
                edgetype.append([edges['Prevuse']])  
# 遍历出树中所有的结点与边
x = []
edgesrc = []
edgetgt = []
edge_attr = []
nextsib=True
ifedge=True
whileedge=True
foredge=True
blockedge=True
nexttoken=True
nextuse=True
getnodeandedge(newtree, x, vocabdict, edgesrc, edgetgt, edge_attr)
if nextsib==True:
    getedge_nextsib(newtree,vocabdict,edgesrc,edgetgt,edge_attr)
getedge_flow(newtree,vocabdict,edgesrc,edgetgt,edge_attr,ifedge,whileedge,foredge)
if blockedge==True:
    getedge_nextstmt(newtree,vocabdict,edgesrc,edgetgt,edge_attr)
tokenlist=[]
if nexttoken==True:
    getedge_nexttoken(newtree,vocabdict,edgesrc,edgetgt,edge_attr,tokenlist)
variabledict={}
if nextuse==True:
    getedge_nextuse(newtree,vocabdict,edgesrc,edgetgt,edge_attr,variabledict)

#print("edgesrc:", edgesrc)
#print("edgetgt:", len(edgetgt))  #len = 558 
#print("x:",x) #138
#print("vocabdict:",vocabdict)
edge_index=[edgesrc, edgetgt]

print("tree:",newtree)
edgesrc: [0, 1, 1, 2, 1, 3, 0, 4, 0, 5, 5, 6, 6, 7, 5, 8, 0, 9, 9, 10, 10, 11, 9, 12, 12, 13, 0, 14, 14, 15, 15, 16, 14, 17, 17, 18, 17, 19, 19, 20, 19, 21, 21, 22, 21, 23, 19, 24, 24, 25, 0, 26, 26, 27, 27, 28, 27, 29, 29, 30, 27, 31, 31, 32, 26, 33, 33, 34, 34, 35, 35, 36, 36, 37, 35, 38, 38, 39, 35, 40, 33, 41, 41, 42, 42, 43, 43, 44, 44, 45, 43, 46, 46, 47, 46, 48, 48, 49, 42, 50, 50, 51, 50, 52, 52, 53, 50, 54, 54, 55, 54, 56, 56, 57, 56, 58, 54, 59, 59, 60, 42, 61, 61, 62, 61, 63, 41, 64, 64, 65, 65, 66, 66, 67, 66, 68, 68, 69, 69, 70, 70, 71, 68, 72, 66, 73, 73, 74, 74, 75, 75, 76, 75, 77, 77, 78, 75, 79, 79, 80, 73, 81, 65, 82, 82, 83, 83, 84, 84, 85, 83, 86, 86, 87, 86, 88, 88, 89, 89, 90, 90, 91, 88, 92, 82, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 95, 99, 94, 100, 100, 101, 101, 102, 102, 103, 102, 104, 104, 105, 102, 106, 106, 107, 100, 108, 94, 109, 82, 110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 114, 116, 116, 117, 114, 118, 118, 119, 112, 120, 111, 121, 121, 122, 111, 123, 82, 124, 124, 125, 125, 126, 126, 127, 125, 128, 128, 129, 125, 130, 33, 131, 131, 132, 132, 133, 133, 134, 132, 135, 135, 136, 132, 137, 1, 4, 4, 5, 5, 9, 9, 14, 14, 26, 2, 3, 6, 8, 10, 12, 15, 17, 18, 19, 20, 21, 21, 24, 22, 23, 27, 33, 28, 29, 29, 31, 34, 41, 41, 131, 36, 38, 38, 40, 42, 64, 43, 50, 50, 61, 44, 46, 47, 48, 51, 52, 52, 54, 55, 56, 56, 59, 57, 58, 62, 63, 66, 82, 67, 68, 68, 73, 69, 72, 74, 81, 76, 77, 77, 79, 83, 93, 93, 110, 110, 124, 84, 86, 87, 88, 89, 92, 95, 100, 100, 109, 96, 99, 101, 108, 103, 104, 104, 106, 112, 121, 121, 123, 113, 120, 115, 116, 116, 118, 126, 128, 128, 130, 133, 135, 135, 137, 42, 64, 66, 82, 34, 41, 41, 131, 83, 93, 93, 110, 110, 124, 2, 3, 3, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 16, 16, 18, 18, 20, 20, 22, 22, 23, 23, 25, 25, 28, 28, 30, 30, 32, 32, 37, 37, 39, 39, 40, 40, 45, 45, 47, 47, 49, 49, 51, 51, 53, 53, 55, 55, 57, 57, 58, 58, 60, 60, 62, 62, 63, 63, 67, 67, 71, 71, 72, 72, 76, 76, 78, 78, 80, 80, 81, 81, 85, 85, 87, 87, 91, 91, 92, 92, 98, 98, 99, 99, 103, 103, 105, 105, 107, 107, 108, 108, 109, 109, 115, 115, 117, 117, 119, 119, 120, 120, 122, 122, 123, 123, 127, 127, 129, 129, 130, 130, 134, 134, 136, 136, 137, 23, 58, 30, 134, 37, 127, 127, 136, 53, 63, 63, 71, 71, 78, 78, 91, 91, 98, 98, 105, 105, 117, 117, 129, 72, 81, 81, 92, 92, 99, 99, 108, 108, 120]
edgetgt: [1, 0, 2, 1, 3, 1, 4, 0, 5, 0, 6, 5, 7, 6, 8, 5, 9, 0, 10, 9, 11, 10, 12, 9, 13, 12, 14, 0, 15, 14, 16, 15, 17, 14, 18, 17, 19, 17, 20, 19, 21, 19, 22, 21, 23, 21, 24, 19, 25, 24, 26, 0, 27, 26, 28, 27, 29, 27, 30, 29, 31, 27, 32, 31, 33, 26, 34, 33, 35, 34, 36, 35, 37, 36, 38, 35, 39, 38, 40, 35, 41, 33, 42, 41, 43, 42, 44, 43, 45, 44, 46, 43, 47, 46, 48, 46, 49, 48, 50, 42, 51, 50, 52, 50, 53, 52, 54, 50, 55, 54, 56, 54, 57, 56, 58, 56, 59, 54, 60, 59, 61, 42, 62, 61, 63, 61, 64, 41, 65, 64, 66, 65, 67, 66, 68, 66, 69, 68, 70, 69, 71, 70, 72, 68, 73, 66, 74, 73, 75, 74, 76, 75, 77, 75, 78, 77, 79, 75, 80, 79, 81, 73, 82, 65, 83, 82, 84, 83, 85, 84, 86, 83, 87, 86, 88, 86, 89, 88, 90, 89, 91, 90, 92, 88, 93, 82, 94, 93, 95, 94, 96, 95, 97, 96, 98, 97, 99, 95, 100, 94, 101, 100, 102, 101, 103, 102, 104, 102, 105, 104, 106, 102, 107, 106, 108, 100, 109, 94, 110, 82, 111, 110, 112, 111, 113, 112, 114, 113, 115, 114, 116, 114, 117, 116, 118, 114, 119, 118, 120, 112, 121, 111, 122, 121, 123, 111, 124, 82, 125, 124, 126, 125, 127, 126, 128, 125, 129, 128, 130, 125, 131, 33, 132, 131, 133, 132, 134, 133, 135, 132, 136, 135, 137, 132, 4, 1, 5, 4, 9, 5, 14, 9, 26, 14, 3, 2, 8, 6, 12, 10, 17, 15, 19, 18, 21, 20, 24, 21, 23, 22, 33, 27, 29, 28, 31, 29, 41, 34, 131, 41, 38, 36, 40, 38, 64, 42, 50, 43, 61, 50, 46, 44, 48, 47, 52, 51, 54, 52, 56, 55, 59, 56, 58, 57, 63, 62, 82, 66, 68, 67, 73, 68, 72, 69, 81, 74, 77, 76, 79, 77, 93, 83, 110, 93, 124, 110, 86, 84, 88, 87, 92, 89, 100, 95, 109, 100, 99, 96, 108, 101, 104, 103, 106, 104, 121, 112, 123, 121, 120, 113, 116, 115, 118, 116, 128, 126, 130, 128, 135, 133, 137, 135, 64, 42, 82, 66, 41, 34, 131, 41, 93, 83, 110, 93, 124, 110, 3, 2, 4, 3, 7, 4, 8, 7, 11, 8, 13, 11, 16, 13, 18, 16, 20, 18, 22, 20, 23, 22, 25, 23, 28, 25, 30, 28, 32, 30, 37, 32, 39, 37, 40, 39, 45, 40, 47, 45, 49, 47, 51, 49, 53, 51, 55, 53, 57, 55, 58, 57, 60, 58, 62, 60, 63, 62, 67, 63, 71, 67, 72, 71, 76, 72, 78, 76, 80, 78, 81, 80, 85, 81, 87, 85, 91, 87, 92, 91, 98, 92, 99, 98, 103, 99, 105, 103, 107, 105, 108, 107, 109, 108, 115, 109, 117, 115, 119, 117, 120, 119, 122, 120, 123, 122, 127, 123, 129, 127, 130, 129, 134, 130, 136, 134, 137, 136, 58, 23, 134, 30, 127, 37, 136, 127, 63, 53, 71, 63, 78, 71, 91, 78, 98, 91, 105, 98, 117, 105, 129, 117, 81, 72, 92, 81, 99, 92, 108, 99, 120, 108]
x:       [[33], [36], [25], [31], [11], [28], [5], [18], [29], [3], [5], [9], [12], [35], [3], [5], [9], [12], [0], [2], [23], [19], [29], [8], [16], [26], [27], [2], [13], [19], [0], [16], [22], [30], [1], [32], [19], [35], [16], [22], [14], [15], [34], [7], [5], [9], [12], [17], [16], [22], [2], [10], [19], [17], [2], [23], [19], [29], [8], [16], [26], [19], [6], [17], [30], [21], [2], [13], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [30], [3], [5], [18], [12], [4], [19], [24], [19], [17], [29], [1], [32], [19], [24], [19], [17], [29], [19], [24], [2], [20], [19], [17], [16], [26], [29], [14], [1], [32], [19], [24], [2], [20], [19], [17], [16], [26], [29], [19], [4], [14], [1], [32], [19], [35], [19], [17], [14], [1], [32], [19], [0], [19], [35], [14]]
vocabdict: {'num': 0, 'FormalParameter': 1, '++': 2, 'right_border': 3, 'StatementExpression': 4, '-': 5, '0': 6, 'ArraySelector': 7, 'public': 8, '>': 9, 'DoStatement': 10, '+': 11, 'MethodDeclaration': 12, 'int': 13, '1': 14, 'last_exchange': 15, 'length': 16, 'ForControl': 17, 'float': 18, 'IfStatement': 19, 'Assignment': 20, 'MemberReference': 21, 'VariableDeclaration': 22, 'BubbleSortFloat2': 23, 'BinaryOperation': 24, 'LocalVariableDeclaration': 25, 'j': 26, 'static': 27, 'Literal': 28, 'BasicType': 29, 'temp': 30, 'Modifier': 31, 'ForStatement': 32, 'BlockStatement': 33, '=': 34, 'VariableDeclarator': 35, '<': 36}

 又回到最初的七点,Let me 康康最后得到的“带BUFF”的AST数据:确实,凭肉眼什么区别也看不出!!或许边多了乎??或许没有、只是记录到edge_index中了?最后得到的最有用的数据应该就是上面这四行了。over...

tree: AnyNode(data=MethodDeclaration(annotations=[], body=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=None, name=last_exchange)], modifiers=set(), type=BasicType(dimensions=[], name=int)), LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), name=right_border)], modifiers=set(), type=BasicType(dimensions=[], name=int)), DoStatement(body=BlockStatement(label=None, statements=[StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0)), label=None), ForStatement(body=BlockStatement(label=None, statements=[IfStatement(condition=BinaryOperation(operandl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), operandr=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), operator=>), else_statement=None, label=None, then_statement=BlockStatement(label=None, statements=[LocalVariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=[], initializer=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), name=temp)], modifiers=set(), type=BasicType(dimensions=[], name=float)), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]))]), type==, value=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=num, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[ArraySelector(index=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=+))]), type==, value=MemberReference(member=temp, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]))]), control=ForControl(condition=BinaryOperation(operandl=MemberReference(member=j, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=BinaryOperation(operandl=MemberReference(member=length, postfix_operators=[], prefix_operators=[], qualifier=num, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=1), operator=-), operator=<), init=VariableDeclaration(annotations=[], declarators=[VariableDeclarator(dimensions=None, initializer=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), name=j)], modifiers=set(), type=BasicType(dimensions=[], name=int)), update=[MemberReference(member=j, postfix_operators=['++'], prefix_operators=[], qualifier=, selectors=[])]), label=None), StatementExpression(expression=Assignment(expressionl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), type==, value=MemberReference(member=last_exchange, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[])), label=None)]), condition=BinaryOperation(operandl=MemberReference(member=right_border, postfix_operators=[], prefix_operators=[], qualifier=, selectors=[]), operandr=Literal(postfix_operators=[], prefix_operators=[], qualifier=None, selectors=[], value=0), operator=>), label=None)], documentation=None, modifiers={'static', 'public'}, name=BubbleSortFloat2, parameters=[FormalParameter(annotations=[], modifiers=set(), name=num, type=BasicType(dimensions=[None], name=float), varargs=False)], return_type=None, throws=None, type_parameters=None), id=0, token='MethodDeclaration')

 

 

 

 

 

标签:node,qualifier,AST,postfix,operators,selectors,----,prefix,源码
来源: https://blog.csdn.net/qq_35294564/article/details/115342407