Objectives:
Extract function information in java code (function name, function return value, function return value type, function parameter)
thinking
1. Match function header (public static void name())
Normal function headers start with public or private or protected+
Not all functions have static - \ s(static)?
Function return value (string) - (\ w +)
Function name (string) - (\ w +)
Space between different keywords - \ s
Parameter (optional, must start with a letter) - (\ w+.*\w *)?
2. Match return value variable name
First, only non void will have a return value
Next, the return keyword is before the return value
{} package
Between the two functions (after extracting the function name, match the function name before and after)
code implementation
information extraction
#Regular expression matching method
def process_method(text):
# Build a regular expression that starts with public or private or protected, with a few letter word in the middle, and ends with (parameter)
#(public|private|protected) + s (static)? Do you have or not have the static \ s? (\ W +) \ s (\ W +) function name parameter (\ w+.*\w *)?
pattern = re.compile(r"(public|private|protected)+\s(static)?\s?(\w+)\s(\w+)\((\w+.*\w*)?\)")
method_info = pattern.findall(text)
count = 1
for method in method_info:
#Extract return value name
rname = [""]
if method[2]!='void':
#Handle the last function
if count >= method_info.__len__():
#re.DOTALL or re.S enables. * to match all characters, including line breaks
pattern = re.compile(r"" + method[3] + "\(.*\).*\s*\{.*return\s(\w+);.*\}", re.S)
else:
pattern = re.compile(r""+method[3]+"\(.*\).*\s*\{.*return\s(\w+);.*\}.*"+method_info[count][3]+"\(.*\)", re.S)
rname = pattern.findall(content)
res = ""
for str in rname:
res = str + "\\" +res
tuple = (res,)
method_info[count-1] = tuple+method_info[count-1]
count = count+1
return method_info
Save the extracted information to Excel
#Store method information in excel
def saveInExcel(method_info,name,path):
xls = xlwt.Workbook(encoding="utf-8", style_compression=0)
api_sheet = xls.add_sheet(name, cell_overwrite_ok=True)
api_sheet.write(0, 0, "method_name")
api_sheet.write(0, 1, "return_type")
api_sheet.write(0, 2, "return_vname")
api_sheet.write(0, 3, "method_para")
count = 1
for method in method_info:
print(method)
api_sheet.write(count, 0, method[4])
api_sheet.write(count, 1, method[3])
api_sheet.write(count, 2, method[0])
api_sheet.write(count, 3, method[5])
count = count + 1
xls.save(path + name + ".xls")