Skip to content

Commit

Permalink
more powerful and faster version
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunlongs committed Mar 13, 2022
1 parent e5710db commit 24e02d2
Show file tree
Hide file tree
Showing 44 changed files with 37,943 additions and 35,516 deletions.
190 changes: 58 additions & 132 deletions auto_extract_func.py
Original file line number Diff line number Diff line change
@@ -1,172 +1,98 @@
import os
import multiprocessing
import subprocess
from parse_call_graph import remove_dup_caller, to_json
import argparse
#project_dir = "/home/lyl/source_code/openssl"
from parse_call_graph import remove_dup_caller
import config


def shell(cmd):
os.system(cmd)
print(cmd)

def dir_shell(dir,cmd):

def dir_shell(dir, cmd):
os.chdir(dir)
print(cmd)
#os.system(cmd)
subprocess.call(cmd,timeout=120, shell=True)

#plugin_dir = "/home/lyl/plugins"

def parser_cmd(in_file, flag):
assert flag == "print-fns" or flag == "point-memory" or flag == "point-memory-free-1" or "point-memory-free-2" or "free-check", "please choose the correct plugin name: print-fns or point-memory"
with open(in_file, "r") as f:
cmd = f.readline()
try:
cmd = cmd[cmd.index(".o.d") + 5:]
except:
print(cmd)
return ""
if cmd.find(".c") == -1:
return ""
if flag == "print-fns":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/PrintFunctionNames.so -Xclang -plugin -Xclang print-fns -Xclang -plugin-arg-print-fns -Xclang {0}/call_graph.json -Xclang -plugin-arg-print-fns -Xclang {0}/indirect_call.json ".format(plugin_dir)
elif flag == "point-memory":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/MemoryDataFlow.so -Xclang -plugin -Xclang point-memory -Xclang -plugin-arg-point-memory -Xclang {0}/allocation_set -Xclang -plugin-arg-point-memory -Xclang {0}/memory_flow_alloc.json ".format(plugin_dir)
elif flag == "point-memory-free-1":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/MemoryDataFlowFree.so -Xclang -plugin -Xclang point-memory-free -Xclang -plugin-arg-point-memory-free -Xclang {0}/free_set.txt -Xclang -plugin-arg-point-memory-free -Xclang {0}/seed_free.txt -Xclang -plugin-arg-point-memory-free -Xclang {0}/memory_flow_free.json ".format(plugin_dir)
elif flag == "point-memory-free-2":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/MemoryDataFlowFree.so -Xclang -plugin -Xclang point-memory-free -Xclang -plugin-arg-point-memory-free -Xclang {0}/overlap_func.txt -Xclang -plugin-arg-point-memory-free -Xclang 2 -Xclang -plugin-arg-point-memory-free -Xclang {0}/memory_flow_free_checked.json ".format(plugin_dir)
else:
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/FreeNullCheck.so -Xclang -plugin -Xclang free-check -Xclang -plugin-arg-free-check -Xclang {0}/free_funcs.txt -Xclang -plugin-arg-free-check -Xclang {0}/free_check.txt ".format(plugin_dir)
new_cmd += cmd
return new_cmd
subprocess.call(cmd, timeout=120, shell=True)


def walk_dir(in_dir, flag):
pool = multiprocessing.Pool(multiprocessing.cpu_count())
for root, dirs, files in os.walk(in_dir):
for file in files:
if file.endswith(".cmd"):
file_path = os.path.join(root, file)
cmd = parser_cmd(file_path, flag)
pool.apply_async(shell, (cmd,))
pool.close()
pool.join()
def AddFreePluginArg(arg):
return " -Xclang -plugin-arg-point-memory-free -Xclang " + arg


def FreePluginCmd(step):
basic_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/MemoryDataFlowFree.so -Xclang -plugin " \
"-Xclang point-memory-free ".format(config.plugin_dir)
cmd = " ".join([basic_cmd, AddFreePluginArg(step), AddFreePluginArg(config.candidate_free_path),
AddFreePluginArg(config.seed_free_path),
AddFreePluginArg(config.mos_seed_path), AddFreePluginArg(config.mos_free_outpath),
AddFreePluginArg(config.visited_file_path)])
return cmd + " "


def walk_compile_database(file, flag, plugin_dir, temp_dir):
assert flag == "print-fns" or flag == "point-memory" or flag == "point-memory-free-1" or flag =="point-memory-free-2" or flag == "free-check", "please choose the correct plugin name: print-fns or point-memory"
def format_clang_command(plugin_dir, temp_dir, plugin, plugin_name, *plugin_args):
arg_list = ["clang -fsyntax-only -Xclang -load -Xclang", plugin, "-Xclang -plugin -Xclang", plugin_name]
for arg in plugin_args:
arg_list.append("-Xclang -plugin-arg-" + plugin_name)
arg_list.append("-Xclang")
arg_list.append(arg)
cmd = " ".join(arg_list).format(plugin_dir, temp_dir) + " "
return cmd


def walking_compile_database(file, flag, plugin_dir=config.plugin_dir, temp_dir=config.temp_dir, next_setp_TU=None):
assert flag == "extract-funcs" or flag == "point-memory-alloc" or flag == "point-memory-free-1" or flag == "point-memory-free-2" or flag == "free-check", "please choose the correct plugin name: print-fns or point-memory"
pool = multiprocessing.Pool(multiprocessing.cpu_count())
import json
with open(file,"r") as f:
compile_database = json.load(f)
if next_setp_TU is None:
with open(file, "r") as f:
compile_database = json.load(f)
else:
compile_database = next_setp_TU
for i, command in enumerate(compile_database):
cmd = " ".join(command["command"].split(" ")[1:])
if flag == "print-fns":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/PrintFunctionNames.so -Xclang -plugin -Xclang print-fns -Xclang -plugin-arg-print-fns -Xclang {1}/call_graph.json -Xclang -plugin-arg-print-fns -Xclang {1}/indirect_call.json ".format(
plugin_dir, temp_dir)
elif flag == "point-memory":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/MemoryDataFlow.so -Xclang -plugin -Xclang point-memory -Xclang -plugin-arg-point-memory -Xclang {1}/allocation_set -Xclang -plugin-arg-point-memory -Xclang {1}/memory_flow_alloc.json ".format(
plugin_dir, temp_dir)
if flag == "extract-funcs":
new_cmd = format_clang_command(plugin_dir, temp_dir, "{0}/ExtractFunctionPrototypes.so", "extract-funcs", "{1}/call_graph.json", "{1}/indirect_call.json")
elif flag == "point-memory-alloc":
new_cmd = format_clang_command(plugin_dir, temp_dir, "{0}/MemoryDataFlow.so", "point-memory", "{1}/allocation_set", "{1}/memory_flow_alloc.json")
elif flag == "point-memory-free-1":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/MemoryDataFlowFree.so -Xclang -plugin -Xclang point-memory-free -Xclang -plugin-arg-point-memory-free -Xclang {1}/free_set.txt -Xclang -plugin-arg-point-memory-free -Xclang {1}/seed_free.txt -Xclang -plugin-arg-point-memory-free -Xclang {1}/memory_flow_free.json ".format(
plugin_dir, temp_dir)
new_cmd = FreePluginCmd(step="1")
elif flag == "point-memory-free-2":
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/MemoryDataFlowFree.so -Xclang -plugin -Xclang point-memory-free -Xclang -plugin-arg-point-memory-free -Xclang {1}/overlap_func.txt -Xclang -plugin-arg-point-memory-free -Xclang 2 -Xclang -plugin-arg-point-memory-free -Xclang {1}/memory_flow_free_checked.json ".format(
plugin_dir,temp_dir)
new_cmd = FreePluginCmd(step="2")
else:
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/FreeNullCheck.so -Xclang -plugin -Xclang free-check -Xclang -plugin-arg-free-check -Xclang {1}/free_funcs.txt -Xclang -plugin-arg-free-check -Xclang {1}/free_check.txt ".format(plugin_dir, temp_dir)
new_cmd = "clang -fsyntax-only -Xclang -load -Xclang {0}/FreeNullCheck.so -Xclang -plugin -Xclang free-check -Xclang -plugin-arg-free-check -Xclang {1}/candidate_free.txt -Xclang -plugin-arg-free-check -Xclang {1}/free_check.txt -Xclang -plugin-arg-free-check -Xclang {2} ".format(
plugin_dir, temp_dir, config.visited_file_path)

new_cmd += cmd
dir = command["directory"]
pool.apply_async(dir_shell, (dir,new_cmd,))
if i%1000 ==0 and i!=0 and flag == "print-fns":
pool.apply_async(dir_shell, (dir, new_cmd,))
if i % 3000 == 0 and i != 0 and flag == "extract-funcs":
pool.close()
pool.join()
call_graph_path = temp_dir + os.sep + "call_graph.json"
to_json(call_graph_path)
call_graph_path = config.call_graph_path
remove_dup_caller(call_graph_path, call_graph_path)
pool = multiprocessing.Pool(multiprocessing.cpu_count())
pool.close()
pool.join()


def delete_exist_file(file):
if os.path.exists(file):
os.remove(file)

def check_size():
func_file = "{}/call_graph.json".format(plugin_dir)
if not os.path.exists(func_file):
return
size = os.path.getsize(func_file)
if size / 1000000000 > 4:
to_json(func_file)
remove_dup_caller(func_file, func_file)

def for_kernel(plugin_dir, temp_dir, project_dir, flag):
#flag = "point-memory-free-2"
if os.path.exists("/tmp/visited"):
os.remove("/tmp/visited")
f = open("/tmp/visited", "w")
f.close()
if os.path.exists("{0}/memory_flow_free.json".format(plugin_dir)):
os.remove("{0}/memory_flow_free.json".format(plugin_dir))
if os.path.exists("{0}/call_graph.json".format(plugin_dir)):
os.remove("{0}/call_graph.json".format(plugin_dir))
if os.path.exists("{0}/memory_flow_alloc.json".format(plugin_dir)):
os.remove("{0}/memory_flow_alloc.json".format(plugin_dir))

#in_dir = "/home/lyl/source_code/linux-5.9.10"
in_dir = project_dir
os.chdir(in_dir)
path_list = os.listdir(in_dir)
for path in path_list:
p = os.path.join(in_dir, path)
if os.path.isdir(p):
print(p)
walk_dir(p, flag)
if flag == "print-fns":
check_size()

def for_others(plugin_dir, temp_dir, project_dir, flag):
#flag = "print-fns"
if os.path.exists("/tmp/visited"):
os.remove("/tmp/visited")
f = open("/tmp/visited", "w")
f.close()
if os.path.exists("{0}/memory_flow_free.json".format(plugin_dir)):
os.remove("{0}/memory_flow_free.json".format(plugin_dir))
if os.path.exists("{0}/call_graph.json".format(plugin_dir)):
os.remove("{0}/call_graph.json".format(plugin_dir))
if os.path.exists("{0}/memory_flow_alloc.json".format(plugin_dir)):
os.remove("{0}/memory_flow_alloc.json".format(plugin_dir))

def plugin_run(project_dir, flag, next_setp_TU=None):
delete_exist_file(config.visited_file_path)
os.system("touch %s" % config.visited_file_path)

compile_database_file = project_dir + os.sep + "compilation.json"
if not os.path.exists(compile_database_file):
print("compile database not exist!")
walk_compile_database(compile_database_file, flag, plugin_dir, temp_dir)
print("\ncompile database not exist! Please make sure that there is a compilation.json under the project "
"directory!\n")
exit(-1)
walking_compile_database(compile_database_file, flag, next_setp_TU=next_setp_TU)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process CSA data flow plugins.')
parser.add_argument('plugin_dir', metavar='/home/lyl/plugins', type=str, nargs=1, default="/home/lyl/plugins",
help='The dir of plugins you are stored in.')
parser.add_argument("project_dir", metavar="/xxx/linux-5.12", type=str, nargs=1,
help="The dir of project you want to analyze.")
parser.add_argument("flag", type=str, nargs=1,
help="print-fns or point-memory or point-memory-free-1 or point-memory-free-2")
parser.add_argument("isKernel", type=int, nargs=1, help="Whether this project is a huge project, such as kernel.")
args = parser.parse_args()
plugin_dir = args.plugin_dir[0]
print("plugin_dir:", plugin_dir)
project_dir = args.project_dir[0]
print("project_dir:", project_dir)
flag = args.flag[0]
print("flag: ", flag)
isKernel = args.isKernel[0]
print("is Kernel: ",isKernel)
temp_dir = plugin_dir

if isKernel == 1:
for_kernel(plugin_dir,temp_dir, project_dir,flag)
else:
for_others(plugin_dir,temp_dir, project_dir,flag)
cmd = format_clang_command(config.plugin_dir, config.temp_dir, "{0}/ExtractFunctionPrototypes.so", "extract-funcs", "{1}/call_graph.json", "{1}/indirect_call.json")
print(cmd)
29 changes: 29 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,32 @@

# Frontend Checker config
strong_belief_threshold = 0.95
#min_call = 10
min_reset = 20 # kernel for 20, others for 2


import os
cur_work_dir = os.getcwd()
plugin_dir = cur_work_dir + "/plugins"
temp_dir = cur_work_dir + "/temp"

#Call Graph
call_graph_path= temp_dir + os.sep + "call_graph.json"

# MOS Free Plugin
candidate_free_path = cur_work_dir + "/temp/candidate_free.txt"
seed_free_path = cur_work_dir + "/temp/seed_free.txt"
mos_seed_path = cur_work_dir + "/temp/last_step_mos.json"
mos_free_outpath = cur_work_dir + "/temp/memory_flow_free.json"
visited_file_path = cur_work_dir + "/temp/visited.txt"

# MOS Alloc Plugin
candidate_alloc_path = cur_work_dir + "/temp/candidate_alloc.txt"
seed_alloc_path = cur_work_dir + "/temp/seed_alloc.txt"
mos_alloc_outpath = cur_work_dir + "/temp/memory_flow_alloc.json"

max_iteration = 15

# temporary files
free_check_file = "temp/free_check.txt"
time_record_file = "temp/time_record.txt"
66 changes: 66 additions & 0 deletions csa_report_clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
class csa_report_cleaner(object):
def __init__(self, html_path):
self.html_path = html_path
self.table_list = []
self.new_table_list = []
self.table_content =""
self.html_front = ""
self.html_back = ""
self.hash_list = []

def init(self):
with open(self.html_path, "r") as f:
data = f.read()
table_start = data.find("<tr>")
if table_start == -1:
print("There is no bug report.")
table_end = data.rfind("</tr>") + len("</tr>")
self.table_content = data[table_start: table_end]
self.html_front = data[:table_start]
self.html_back = data[table_end: ]

def parse_table(self):
while(1):
tr_start = self.table_content.find("<tr>")
if tr_start == -1:
break
tr_end = self.table_content.find("</tr>") + len("</tr>")
tr_contend = self.table_content[tr_start:tr_end]
self.table_list.append(tr_contend)
self.table_content = self.table_content[tr_end:]
i = 0
for table in self.table_list:
hash_start = table.find("reportHash=") + len("reportHash=")
new_table = table[hash_start:]
hash_end = new_table.find("\">")
hash = new_table[:hash_end-1]
if hash in self.hash_list:
continue
i +=1
self.hash_list.append(hash)
table = self.replace_table_id(table,i)
self.new_table_list.append(table)

def replace_table_id(self, table, new_id):
td_start = table.find("<td>") + len("<td>")
td_end = table.find("</td>")
prev = table[:td_start]
back = table[td_end:]
return prev + str(new_id) + back


def format_new_report(self):
data = self.html_front
index = 1
for table in self.new_table_list:
table = self.replace_table_id(table,index)
index += 1
data += table
data += self.html_back
with open(self.html_path, "w") as f:
f.write(data)

def clean(self):
self.init()
self.parse_table()
self.format_new_report()
Loading

0 comments on commit 24e02d2

Please sign in to comment.