Update code extractor of Java.

This commit is contained in:
krahets 2023-02-07 19:05:19 +08:00
parent 95bffcfb36
commit 8f512c2840
9 changed files with 99 additions and 87 deletions

View file

@ -8,7 +8,7 @@
#define MAX_SIZE 5000 #define MAX_SIZE 5000
// 大顶堆 /* 大顶堆 */
typedef struct maxHeap { typedef struct maxHeap {
// size 代表的是实际元素的个数 // size 代表的是实际元素的个数
int size; int size;
@ -20,14 +20,6 @@ void siftDown(maxHeap *h, int i);
void siftUp(maxHeap *h, int i); void siftUp(maxHeap *h, int i);
/* 构造空堆 */
maxHeap *newEmptyMaxHeap() {
// 所有元素入堆
maxHeap *h = (maxHeap *) malloc(sizeof(maxHeap));
h->size = 0;
return h;
}
/* 构造函数,根据切片建堆 */ /* 构造函数,根据切片建堆 */
maxHeap *newMaxHeap(int nums[], int size) { maxHeap *newMaxHeap(int nums[], int size) {
// 所有元素入堆 // 所有元素入堆

View file

@ -6,7 +6,7 @@
#include "../include/include.hpp" #include "../include/include.hpp"
/* 大堆 */ /* 堆 */
class MaxHeap { class MaxHeap {
private: private:
// 使用动态数组,这样无需考虑扩容问题 // 使用动态数组,这样无需考虑扩容问题
@ -62,9 +62,6 @@ private:
} }
public: public:
/* 构造函数,建立空堆 */
MaxHeap() {}
/* 构造函数,根据输入列表建堆 */ /* 构造函数,根据输入列表建堆 */
MaxHeap(vector<int> nums) { MaxHeap(vector<int> nums) {
// 将列表元素原封不动添加进堆 // 将列表元素原封不动添加进堆

View file

@ -9,16 +9,11 @@ package chapter_heap;
import include.*; import include.*;
import java.util.*; import java.util.*;
/* 大堆 */ /* 堆 */
class MaxHeap { class MaxHeap {
// 使用列表而非数组这样无需考虑扩容问题 // 使用列表而非数组这样无需考虑扩容问题
private List<Integer> maxHeap; private List<Integer> maxHeap;
/* 构造函数,建立空堆 */
public MaxHeap() {
maxHeap = new ArrayList<>();
}
/* 构造函数,根据输入列表建堆 */ /* 构造函数,根据输入列表建堆 */
public MaxHeap(List<Integer> nums) { public MaxHeap(List<Integer> nums) {
// 将列表元素原封不动添加进堆 // 将列表元素原封不动添加进堆

View file

@ -6,14 +6,10 @@
import utils import utils
/* */
class MaxHeap { class MaxHeap {
private var maxHeap: [Int] private var maxHeap: [Int]
/* */
init() {
maxHeap = []
}
/* */ /* */
init(nums: [Int]) { init(nums: [Int]) {
// //

View file

@ -260,11 +260,6 @@ comments: true
=== "Java" === "Java"
```java title="my_heap.java" ```java title="my_heap.java"
// 使用列表而非数组,这样无需考虑扩容问题
List<Integer> maxHeap;
[class]{MaxHeap}-[func]{MaxHeap}
[class]{MaxHeap}-[func]{left} [class]{MaxHeap}-[func]{left}
[class]{MaxHeap}-[func]{right} [class]{MaxHeap}-[func]{right}

View file

@ -15,7 +15,7 @@ comments: true
=== "Java" === "Java"
```java title="linear_search.java" ```java title="linear_search.java"
[class]{hashing_search}-[func]{linearSearchArray} [class]{linear_search}-[func]{linearSearchArray}
``` ```
=== "C++" === "C++"
@ -145,7 +145,7 @@ comments: true
=== "Java" === "Java"
```java title="linear_search.java" ```java title="linear_search.java"
[class]{hashing_search}-[func]{linearSearchLinkedList} [class]{linear_search}-[func]{linearSearchLinkedList}
``` ```
=== "C++" === "C++"

View file

@ -8,6 +8,7 @@ import re
import os import os
import os.path as osp import os.path as osp
class ExtractCodeBlocksJava: class ExtractCodeBlocksJava:
def __init__(self) -> None: def __init__(self) -> None:
self.langs = ["java"] self.langs = ["java"]
@ -19,37 +20,35 @@ class ExtractCodeBlocksJava:
self.block_start_pattern = '^\s{ind}\/\*.+\*\/' self.block_start_pattern = '^\s{ind}\/\*.+\*\/'
def extract(self, file_path): def extract(self, file_path):
"""
Extract classes and functions from a markdown document
"""
self.file_path = file_path self.file_path = file_path
with open(file_path) as f: with open(file_path) as f:
self.lines = f.readlines() self.lines = f.readlines()
self.content = "".join(self.lines) self.content = "".join(self.lines)
# Detect and extract all the classes along with its fucntions
# Detect and extract all the classes and fucntions
classes = self.extract_class_blocks() classes = self.extract_class_blocks()
# Remove 'static' funcs = self.extract_function_blocks()
self.post_process(classes)
self.post_process(classes, funcs)
return { return {
"classes": classes "classes": classes,
"funcs": funcs,
} }
def post_process(self, classes):
for clas in classes.values():
funcs = clas["funcs"]
for func in funcs.values():
for i, line in enumerate(func["block"]):
if "static " in line:
func["block"][i] = line.replace("static ", "")
break
def search_block(self, header_line, indentation): def search_block(self, header_line, indentation):
""" """
Search class/function block given the header_line and indentation Search class/function block given the header_line and indentation
""" """
start_line, end_line = 0, len(self.lines) start_line, end_line = 0, len(self.lines)
block_end_pattern = re.compile(self.block_end_pattern.replace("ind", str(indentation))) block_end_pattern = re.compile(
block_start_pattern = re.compile(self.block_start_pattern.replace("ind", str(indentation))) self.block_end_pattern.replace("ind", str(indentation)))
block_start_pattern = re.compile(
self.block_start_pattern.replace("ind", str(indentation)))
# Search the code # Search the code
for i in range(header_line + 1, len(self.lines)): for i in range(header_line + 1, len(self.lines)):
@ -61,16 +60,9 @@ class ExtractCodeBlocksJava:
if re.search(block_start_pattern, self.lines[i]) is not None: if re.search(block_start_pattern, self.lines[i]) is not None:
start_line = i start_line = i
break break
code_block = self.lines[start_line:end_line + 1]
# Remove empty lines at bottom
for i in range(len(code_block) - 1, -1, -1):
if re.search("^\s*\n", code_block[i]) is None:
break
end_line -= 1
return start_line, end_line, self.lines[start_line:end_line + 1] return start_line, end_line, self.lines[start_line:end_line + 1]
def extract_function_blocks(self, indentation=4, start_line=-1, end_line=-1): def extract_function_blocks(self, indentation=4, start_line=-1, end_line=-1):
""" """
Extract all the functions with given indentation Extract all the functions with given indentation
@ -87,13 +79,16 @@ class ExtractCodeBlocksJava:
for line_num in range(start_line, end_line + 1): for line_num in range(start_line, end_line + 1):
# Search the function header # Search the function header
func_match = func_pattern.match(self.lines[line_num]) func_match = func_pattern.match(self.lines[line_num])
if func_match is None: continue if func_match is None:
continue
# The function should match the input indentation # The function should match the input indentation
if len(func_match.group(1)) != indentation: continue if len(func_match.group(1)) != indentation:
continue
header_line = line_num header_line = line_num
# Search the block from the header line # Search the block from the header line
start_line, end_line, func_block = self.search_block(header_line, indentation) start_line, end_line, func_block = self.search_block(
header_line, indentation)
# Construct the funcs dict # Construct the funcs dict
func_label = func_match.group(5) func_label = func_match.group(5)
funcs[func_label] = { funcs[func_label] = {
@ -119,11 +114,13 @@ class ExtractCodeBlocksJava:
for line_num, line in enumerate(self.lines): for line_num, line in enumerate(self.lines):
# Search the class header # Search the class header
class_match = class_pattern.match(line) class_match = class_pattern.match(line)
if class_match is None: continue if class_match is None:
continue
header_line = line_num header_line = line_num
# Search the block from the header line # Search the block from the header line
start_line, end_line, class_block = self.search_block(header_line, 0) start_line, end_line, class_block = self.search_block(
header_line, 0)
# Construct the classes dict # Construct the classes dict
class_label = class_match.group(2) class_label = class_match.group(2)
classes[class_label] = { classes[class_label] = {
@ -140,6 +137,21 @@ class ExtractCodeBlocksJava:
return classes return classes
def post_process(self, classes, funcs):
"""
Process the classes and functions
"""
def remove_keyword(func):
block = func["block"]
header_line = func["line_number"]["header"] - \
func["line_number"]["start"]
block[header_line] = block[header_line] \
.replace("static ", "").replace("public ", "").replace("private ", "")
for clas in classes.values():
for func in clas["funcs"].values():
remove_keyword(func)
for func in funcs.values():
remove_keyword(func)
# ext = ExtractCodeBlocksJava() # ext = ExtractCodeBlocksJava()
# ext.extract("codes/java/chapter_array_and_linkedlist/array.java")
# ext.extract("codes/java/chapter_array_and_linkedlist/my_list.java") # ext.extract("codes/java/chapter_array_and_linkedlist/my_list.java")

View file

@ -19,6 +19,9 @@ class ExtractCodeBlocksPython:
self.block_start_pattern = '^\s{ind}""".+' self.block_start_pattern = '^\s{ind}""".+'
def extract(self, file_path): def extract(self, file_path):
"""
Extract classes and functions from a markdown document
"""
self.file_path = file_path self.file_path = file_path
with open(file_path) as f: with open(file_path) as f:
self.lines = f.readlines() self.lines = f.readlines()
@ -28,6 +31,8 @@ class ExtractCodeBlocksPython:
classes = self.extract_class_blocks() classes = self.extract_class_blocks()
funcs = self.extract_function_blocks() funcs = self.extract_function_blocks()
self.post_process(classes, funcs)
return { return {
"classes": classes, "classes": classes,
"funcs": funcs, "funcs": funcs,
@ -45,21 +50,15 @@ class ExtractCodeBlocksPython:
# Search the code # Search the code
for i in range(header_line + 1, len(self.lines)): for i in range(header_line + 1, len(self.lines)):
if re.match(block_end_pattern, self.lines[i]) is not None: if re.match(block_end_pattern, self.lines[i]) is not None:
end_line = i end_line = i - 1
break break
# Search the header comment # Search the header comment
for i in range(header_line - 1, -1, -1): for i in range(header_line - 1, -1, -1):
if re.search(block_start_pattern, self.lines[i]) is not None: if re.search(block_start_pattern, self.lines[i]) is not None:
start_line = i start_line = i
break break
func_block = self.lines[start_line:end_line]
# Remove empty lines at bottom
for i in range(len(func_block) - 1, -1, -1):
if re.search("^\s*\n", func_block[i]) is None:
break
end_line -= 1
return start_line, end_line, self.lines[start_line:end_line] return start_line, end_line, self.lines[start_line:end_line + 1]
def extract_function_blocks(self, indentation=0, start_line=-1, end_line=-1): def extract_function_blocks(self, indentation=0, start_line=-1, end_line=-1):
@ -130,3 +129,29 @@ class ExtractCodeBlocksPython:
} }
return classes return classes
def post_process(self, classes, funcs):
"""
Process the classes and functions
"""
def remove_empty_lines(func):
start_line, end_line = func["line_number"]["start"], func["line_number"]["end"]
block = func["block"]
# Remove empty lines at bottom
for i in range(len(block) - 1, -1, -1):
if re.search("^\s*\n", block[i]) is None:
break
end_line -= 1
func["line_number"]["end"] = end_line
func["block"] = block[:end_line - start_line + 1]
for clas in classes.values():
remove_empty_lines(clas)
for func in clas["funcs"].values():
remove_empty_lines(func)
for func in funcs.values():
remove_empty_lines(func)
# ext = ExtractCodeBlocksPython()
# ext.extract("codes/python/chapter_array_and_linkedlist/my_list.py")