Bug fixes and improvements (#1348)

* Add "reference" for EN version. Bug fixes. * Unify the figure reference as "the figure below" and "the figure above". Bug fixes. * Format the EN markdown files. * Replace "" with <u></u> for EN version and bug fixes * Fix biary_tree_dfs.png * Fix biary_tree_dfs.png * Fix zh-hant/biary_tree_dfs.png * Fix heap_sort_step1.png * Sync zh and zh-hant versions. * Bug fixes * Fix EN figures * Bug fixes * Fix the figure labels for EN version
2024-12-24 02:46:28 +08:00 · 2024-05-06 14:44:48 +08:00 · 2024-05-06 14:44:48 +08:00 · c4a7966882
commit c4a7966882
parent 8e60d12151
99 changed files with 615 additions and 259 deletions
--- a/codes/c/chapter_hashing/simple_hash.c
+++ b/codes/c/chapter_hashing/simple_hash.c
@ -50,7 +50,7 @@ int rotHash(char *key) {

 /* Driver Code */
 int main() {
-    char *key = "Hello dsad3241241dsa算123法";
+    char *key = "Hello 算法";

    int hash = addHash(key);
    printf("加法哈希值为 %d\n", hash);
--- a/codes/cpp/chapter_hashing/simple_hash.cpp
+++ b/codes/cpp/chapter_hashing/simple_hash.cpp
@ -48,7 +48,7 @@ int rotHash(string key) {

 /* Driver Code */
 int main() {
-    string key = "Hello dsad3241241dsa算123法";
+    string key = "Hello 算法";

    int hash = addHash(key);
    cout << "加法哈希值为 " << hash << endl;
--- a/codes/javascript/chapter_dynamic_programming/coin_change.js
+++ b/codes/javascript/chapter_dynamic_programming/coin_change.js
@ -31,7 +31,7 @@ function coinChangeDP(coins, amt) {
    return dp[n][amt] !== MAX ? dp[n][amt] : -1;
 }

-/* 零钱兑换：状态压缩后的动态规划 */
+/* 零钱兑换：空间优化后的动态规划 */
 function coinChangeDPComp(coins, amt) {
    const n = coins.length;
    const MAX = amt + 1;
@ -61,6 +61,6 @@ const amt = 4;
 let res = coinChangeDP(coins, amt);
 console.log(`凑到目标金额所需的最少硬币数量为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = coinChangeDPComp(coins, amt);
 console.log(`凑到目标金额所需的最少硬币数量为 ${res}`);
--- a/codes/javascript/chapter_dynamic_programming/coin_change_ii.js
+++ b/codes/javascript/chapter_dynamic_programming/coin_change_ii.js
@ -30,7 +30,7 @@ function coinChangeIIDP(coins, amt) {
    return dp[n][amt];
 }

-/* 零钱兑换 II：状态压缩后的动态规划 */
+/* 零钱兑换 II：空间优化后的动态规划 */
 function coinChangeIIDPComp(coins, amt) {
    const n = coins.length;
    // 初始化 dp 表
@ -59,6 +59,6 @@ const amt = 5;
 let res = coinChangeIIDP(coins, amt);
 console.log(`凑出目标金额的硬币组合数量为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = coinChangeIIDPComp(coins, amt);
 console.log(`凑出目标金额的硬币组合数量为 ${res}`);
--- a/codes/javascript/chapter_dynamic_programming/edit_distance.js
+++ b/codes/javascript/chapter_dynamic_programming/edit_distance.js
@ -82,7 +82,7 @@ function editDistanceDP(s, t) {
    return dp[n][m];
 }

-/* 编辑距离：状态压缩后的动态规划 */
+/* 编辑距离：空间优化后的动态规划 */
 function editDistanceDPComp(s, t) {
    const n = s.length,
        m = t.length;
@ -130,6 +130,6 @@ console.log(`将 ${s} 更改为 ${t} 最少需要编辑 ${res} 步`);
 res = editDistanceDP(s, t);
 console.log(`将 ${s} 更改为 ${t} 最少需要编辑 ${res} 步`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = editDistanceDPComp(s, t);
 console.log(`将 ${s} 更改为 ${t} 最少需要编辑 ${res} 步`);
--- a/codes/javascript/chapter_dynamic_programming/knapsack.js
+++ b/codes/javascript/chapter_dynamic_programming/knapsack.js
@ -69,7 +69,7 @@ function knapsackDP(wgt, val, cap) {
    return dp[n][cap];
 }

-/* 0-1 背包：状态压缩后的动态规划 */
+/* 0-1 背包：空间优化后的动态规划 */
 function knapsackDPComp(wgt, val, cap) {
    const n = wgt.length;
    // 初始化 dp 表
@ -108,6 +108,6 @@ console.log(`不超过背包容量的最大物品价值为 ${res}`);
 res = knapsackDP(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = knapsackDPComp(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);
--- a/codes/javascript/chapter_dynamic_programming/min_cost_climbing_stairs_dp.js
+++ b/codes/javascript/chapter_dynamic_programming/min_cost_climbing_stairs_dp.js
@ -22,7 +22,7 @@ function minCostClimbingStairsDP(cost) {
    return dp[n];
 }

-/* 爬楼梯最小代价：状态压缩后的动态规划 */
+/* 爬楼梯最小代价：空间优化后的动态规划 */
 function minCostClimbingStairsDPComp(cost) {
    const n = cost.length - 1;
    if (n === 1 || n === 2) {
--- a/codes/javascript/chapter_dynamic_programming/min_path_sum.js
+++ b/codes/javascript/chapter_dynamic_programming/min_path_sum.js
@ -69,7 +69,7 @@ function minPathSumDP(grid) {
    return dp[n - 1][m - 1];
 }

-/* 最小路径和：状态压缩后的动态规划 */
+/* 最小路径和：空间优化后的动态规划 */
 function minPathSumDPComp(grid) {
    const n = grid.length,
        m = grid[0].length;
@ -116,6 +116,6 @@ console.log(`从左上角到右下角的最小路径和为 ${res}`);
 res = minPathSumDP(grid);
 console.log(`从左上角到右下角的最小路径和为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = minPathSumDPComp(grid);
 console.log(`从左上角到右下角的最小路径和为 ${res}`);
--- a/codes/javascript/chapter_dynamic_programming/unbounded_knapsack.js
+++ b/codes/javascript/chapter_dynamic_programming/unbounded_knapsack.js
@ -29,7 +29,7 @@ function unboundedKnapsackDP(wgt, val, cap) {
    return dp[n][cap];
 }

-/* 完全背包：状态压缩后的动态规划 */
+/* 完全背包：空间优化后的动态规划 */
 function unboundedKnapsackDPComp(wgt, val, cap) {
    const n = wgt.length;
    // 初始化 dp 表
@ -58,6 +58,6 @@ const cap = 4;
 let res = unboundedKnapsackDP(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = unboundedKnapsackDPComp(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);
--- a/codes/ruby/chapter_sorting/quick_sort.rb
+++ b/codes/ruby/chapter_sorting/quick_sort.rb
@ -9,7 +9,6 @@ class QuickSort
  class << self
    ### 哨兵划分 ###
    def partition(nums, left, right)
-
      # 以 nums[left] 为基准数
      i, j = left, right
      while i < j
@ -116,7 +115,7 @@ class QuickSortTailCall
      i # 返回基准数的索引
    end

-    ### 快速排序（尾递归优化）
+    ### 快速排序（尾递归优化）###
    def quick_sort(nums, left, right)
      # 子数组长度不为 1 时递归
      while left < right
--- a/codes/typescript/chapter_dynamic_programming/coin_change.ts
+++ b/codes/typescript/chapter_dynamic_programming/coin_change.ts
@ -31,7 +31,7 @@ function coinChangeDP(coins: Array<number>, amt: number): number {
    return dp[n][amt] !== MAX ? dp[n][amt] : -1;
 }

-/* 零钱兑换：状态压缩后的动态规划 */
+/* 零钱兑换：空间优化后的动态规划 */
 function coinChangeDPComp(coins: Array<number>, amt: number): number {
    const n = coins.length;
    const MAX = amt + 1;
@ -61,7 +61,7 @@ const amt = 4;
 let res = coinChangeDP(coins, amt);
 console.log(`凑到目标金额所需的最少硬币数量为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = coinChangeDPComp(coins, amt);
 console.log(`凑到目标金额所需的最少硬币数量为 ${res}`);

--- a/codes/typescript/chapter_dynamic_programming/coin_change_ii.ts
+++ b/codes/typescript/chapter_dynamic_programming/coin_change_ii.ts
@ -30,7 +30,7 @@ function coinChangeIIDP(coins: Array<number>, amt: number): number {
    return dp[n][amt];
 }

-/* 零钱兑换 II：状态压缩后的动态规划 */
+/* 零钱兑换 II：空间优化后的动态规划 */
 function coinChangeIIDPComp(coins: Array<number>, amt: number): number {
    const n = coins.length;
    // 初始化 dp 表
@ -59,7 +59,7 @@ const amt = 5;
 let res = coinChangeIIDP(coins, amt);
 console.log(`凑出目标金额的硬币组合数量为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = coinChangeIIDPComp(coins, amt);
 console.log(`凑出目标金额的硬币组合数量为 ${res}`);

--- a/codes/typescript/chapter_dynamic_programming/edit_distance.ts
+++ b/codes/typescript/chapter_dynamic_programming/edit_distance.ts
@ -90,7 +90,7 @@ function editDistanceDP(s: string, t: string): number {
    return dp[n][m];
 }

-/* 编辑距离：状态压缩后的动态规划 */
+/* 编辑距离：空间优化后的动态规划 */
 function editDistanceDPComp(s: string, t: string): number {
    const n = s.length,
        m = t.length;
@ -141,7 +141,7 @@ console.log(`将 ${s} 更改为 ${t} 最少需要编辑 ${res} 步`);
 res = editDistanceDP(s, t);
 console.log(`将 ${s} 更改为 ${t} 最少需要编辑 ${res} 步`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = editDistanceDPComp(s, t);
 console.log(`将 ${s} 更改为 ${t} 最少需要编辑 ${res} 步`);

--- a/codes/typescript/chapter_dynamic_programming/knapsack.ts
+++ b/codes/typescript/chapter_dynamic_programming/knapsack.ts
@ -84,7 +84,7 @@ function knapsackDP(
    return dp[n][cap];
 }

-/* 0-1 背包：状态压缩后的动态规划 */
+/* 0-1 背包：空间优化后的动态规划 */
 function knapsackDPComp(
    wgt: Array<number>,
    val: Array<number>,
@ -127,7 +127,7 @@ console.log(`不超过背包容量的最大物品价值为 ${res}`);
 res = knapsackDP(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = knapsackDPComp(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);

--- a/codes/typescript/chapter_dynamic_programming/min_cost_climbing_stairs_dp.ts
+++ b/codes/typescript/chapter_dynamic_programming/min_cost_climbing_stairs_dp.ts
@ -22,7 +22,7 @@ function minCostClimbingStairsDP(cost: Array<number>): number {
    return dp[n];
 }

-/* 爬楼梯最小代价：状态压缩后的动态规划 */
+/* 爬楼梯最小代价：空间优化后的动态规划 */
 function minCostClimbingStairsDPComp(cost: Array<number>): number {
    const n = cost.length - 1;
    if (n === 1 || n === 2) {
--- a/codes/typescript/chapter_dynamic_programming/min_path_sum.ts
+++ b/codes/typescript/chapter_dynamic_programming/min_path_sum.ts
@ -78,7 +78,7 @@ function minPathSumDP(grid: Array<Array<number>>): number {
    return dp[n - 1][m - 1];
 }

-/* 最小路径和：状态压缩后的动态规划 */
+/* 最小路径和：空间优化后的动态规划 */
 function minPathSumDPComp(grid: Array<Array<number>>): number {
    const n = grid.length,
        m = grid[0].length;
@ -125,7 +125,7 @@ console.log(`从左上角到右下角的最小路径和为 ${res}`);
 res = minPathSumDP(grid);
 console.log(`从左上角到右下角的最小路径和为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = minPathSumDPComp(grid);
 console.log(`从左上角到右下角的最小路径和为 ${res}`);

--- a/codes/typescript/chapter_dynamic_programming/unbounded_knapsack.ts
+++ b/codes/typescript/chapter_dynamic_programming/unbounded_knapsack.ts
@ -33,7 +33,7 @@ function unboundedKnapsackDP(
    return dp[n][cap];
 }

-/* 完全背包：状态压缩后的动态规划 */
+/* 完全背包：空间优化后的动态规划 */
 function unboundedKnapsackDPComp(
    wgt: Array<number>,
    val: Array<number>,
@ -66,7 +66,7 @@ const cap = 4;
 let res = unboundedKnapsackDP(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);

-// 状态压缩后的动态规划
+// 空间优化后的动态规划
 res = unboundedKnapsackDPComp(wgt, val, cap);
 console.log(`不超过背包容量的最大物品价值为 ${res}`);

--- a/docs/chapter_backtracking/n_queens_problem.md
+++ b/docs/chapter_backtracking/n_queens_problem.md
@ -18,7 +18,7 @@

 也就是说，我们可以采取逐行放置策略：从第一行开始，在每行放置一个皇后，直至最后一行结束。

-下图所示为 $4$ 皇后问题的逐行放置过程。受画幅限制，下图仅展开了第一行的其中一个搜索分支，并且将不满足列约束和对角线约束的方案都进行了剪枝。
+下图所示为 4 皇后问题的逐行放置过程。受画幅限制，下图仅展开了第一行的其中一个搜索分支，并且将不满足列约束和对角线约束的方案都进行了剪枝。

 ![逐行放置策略](n_queens_problem.assets/n_queens_placing.png)

--- a/docs/chapter_sorting/heap_sort.assets/heap_sort_step1.png
+++ b/docs/chapter_sorting/heap_sort.assets/heap_sort_step1.png
--- a/docs/chapter_tree/binary_tree_traversal.assets/binary_tree_dfs.png
+++ b/docs/chapter_tree/binary_tree_traversal.assets/binary_tree_dfs.png
--- a/en/docs/chapter_appendix/contribution.assets/edit_markdown.png
+++ b/en/docs/chapter_appendix/contribution.assets/edit_markdown.png
--- a/en/docs/chapter_appendix/contribution.md
+++ b/en/docs/chapter_appendix/contribution.md
@ -20,7 +20,7 @@ As shown in the figure below, there is an "edit icon" in the upper right corner

 ![Edit page button](contribution.assets/edit_markdown.png)

-Images cannot be directly modified and require the creation of a new [Issue](https://github.com/krahets/hello-algo/issues) or a comment to describe the problem. We will redraw and replace the images as soon as possible.
+Figures cannot be directly modified and require the creation of a new [Issue](https://github.com/krahets/hello-algo/issues) or a comment to describe the problem. We will redraw and replace the figures as soon as possible.

 ### Content creation

--- a/en/docs/chapter_appendix/installation.assets/vscode_extension_installation.png
+++ b/en/docs/chapter_appendix/installation.assets/vscode_extension_installation.png
--- a/en/docs/chapter_appendix/installation.assets/vscode_installation.png
+++ b/en/docs/chapter_appendix/installation.assets/vscode_installation.png
--- a/en/docs/chapter_appendix/installation.md
+++ b/en/docs/chapter_appendix/installation.md
@ -6,7 +6,7 @@ We recommend using the open-source, lightweight VS Code as your local Integrated

 ![Download VS Code from the official website](installation.assets/vscode_installation.png)

-VS Code has a powerful extension ecosystem, supporting the execution and debugging of most programming languages. For example, after installing the "Python Extension Pack," you can debug Python code. The installation steps are shown in the following figure.
+VS Code has a powerful extension ecosystem, supporting the execution and debugging of most programming languages. For example, after installing the "Python Extension Pack," you can debug Python code. The installation steps are shown in the figure below.

 ![Install VS Code Extension Pack](installation.assets/vscode_extension_installation.png)

--- a/en/docs/chapter_array_and_linkedlist/array.md
+++ b/en/docs/chapter_array_and_linkedlist/array.md
@ -1,6 +1,6 @@
 # Array

-An "array" is a linear data structure that operates as a lineup of similar items, stored together in a computer's memory in contiguous spaces. It's like a sequence that maintains organized storage. Each item in this lineup has its unique 'spot' known as an "index". Please refer to the figure below to observe how arrays work and grasp these key terms.
+An <u>array</u> is a linear data structure that operates as a lineup of similar items, stored together in a computer's memory in contiguous spaces. It's like a sequence that maintains organized storage. Each item in this lineup has its unique 'spot' known as an <u>index</u>. Please refer to the figure below to observe how arrays work and grasp these key terms.

 ![Array definition and storage method](array.assets/array_definition.png)

@ -125,7 +125,7 @@ Elements in an array are stored in contiguous memory spaces, making it simpler t

 ![Memory address calculation for array elements](array.assets/array_memory_location_calculation.png)

-As observed in the above illustration, array indexing conventionally begins at $0$. While this might appear counterintuitive, considering counting usually starts at $1$, within the address calculation formula, **an index is essentially an offset from the memory address**. For the first element's address, this offset is $0$, validating its index as $0$.
+As observed in the figure above, array indexing conventionally begins at $0$. While this might appear counterintuitive, considering counting usually starts at $1$, within the address calculation formula, **an index is essentially an offset from the memory address**. For the first element's address, this offset is $0$, validating its index as $0$.

 Accessing elements in an array is highly efficient, allowing us to randomly access any element in $O(1)$ time.

@ -135,7 +135,7 @@ Accessing elements in an array is highly efficient, allowing us to randomly acce

 ### Inserting elements

-Array elements are tightly packed in memory, with no space available to accommodate additional data between them. Illustrated in Figure below, inserting an element in the middle of an array requires shifting all subsequent elements back by one position to create room for the new element.
+Array elements are tightly packed in memory, with no space available to accommodate additional data between them. As illustrated in the figure below, inserting an element in the middle of an array requires shifting all subsequent elements back by one position to create room for the new element.

 ![Array element insertion example](array.assets/array_insert_element.png)

--- a/en/docs/chapter_array_and_linkedlist/linked_list.md
+++ b/en/docs/chapter_array_and_linkedlist/linked_list.md
@ -2,13 +2,13 @@

 Memory space is a shared resource among all programs. In a complex system environment, available memory can be dispersed throughout the memory space. We understand that the memory allocated for an array must be continuous. However, for very large arrays, finding a sufficiently large contiguous memory space might be challenging. This is where the flexible advantage of linked lists becomes evident.

-A "linked list" is a linear data structure in which each element is a node object, and the nodes are interconnected through "references". These references hold the memory addresses of subsequent nodes, enabling navigation from one node to the next.
+A <u>linked list</u> is a linear data structure in which each element is a node object, and the nodes are interconnected through "references". These references hold the memory addresses of subsequent nodes, enabling navigation from one node to the next.

 The design of linked lists allows for their nodes to be distributed across memory locations without requiring contiguous memory addresses.

 ![Linked list definition and storage method](linked_list.assets/linkedlist_definition.png)

-As shown in the figure, we see that the basic building block of a linked list is the "node" object. Each node comprises two key components: the node's "value" and a "reference" to the next node.
+As shown in the figure above, we see that the basic building block of a linked list is the <u>node</u> object. Each node comprises two key components: the node's "value" and a "reference" to the next node.

 - The first node in a linked list is the "head node", and the final one is the "tail node".
 - The tail node points to "null", designated as `null` in Java, `nullptr` in C++, and `None` in Python.
@ -406,7 +406,7 @@ The array as a whole is a variable, for instance, the array `nums` includes elem

 ### Inserting nodes

-Inserting a node into a linked list is very easy. As shown in the figure, let's assume we aim to insert a new node `P` between two adjacent nodes `n0` and `n1`. **This can be achieved by simply modifying two node references (pointers)**, with a time complexity of $O(1)$.
+Inserting a node into a linked list is very easy. As shown in the figure below, let's assume we aim to insert a new node `P` between two adjacent nodes `n0` and `n1`. **This can be achieved by simply modifying two node references (pointers)**, with a time complexity of $O(1)$.

 By comparison, inserting an element into an array has a time complexity of $O(n)$, which becomes less efficient when dealing with large data volumes.

@ -418,7 +418,7 @@ By comparison, inserting an element into an array has a time complexity of $O(n)

 ### Deleting nodes

-As shown in the figure, deleting a node from a linked list is also very easy, **involving only the modification of a single node's reference (pointer)**.
+As shown in the figure below, deleting a node from a linked list is also very easy, **involving only the modification of a single node's reference (pointer)**.

 It's important to note that even though node `P` continues to point to `n1` after being deleted, it becomes inaccessible during linked list traversal. This effectively means that `P` is no longer a part of the linked list.

@ -461,7 +461,7 @@ The table below summarizes the characteristics of arrays and linked lists, and i

 ## Common types of linked lists

-As shown in the figure, there are three common types of linked lists.
+As shown in the figure below, there are three common types of linked lists.

 - **Singly linked list**: This is the standard linked list described earlier. Nodes in a singly linked list include a value and a reference to the next node. The first node is known as the head node, and the last node, which points to null (`None`), is the tail node.
 - **Circular linked list**: This is formed when the tail node of a singly linked list points back to the head node, creating a loop. In a circular linked list, any node can function as the head node.
--- a/en/docs/chapter_array_and_linkedlist/list.md
+++ b/en/docs/chapter_array_and_linkedlist/list.md
@ -1,13 +1,13 @@
 # List

-A "list" is an abstract data structure concept that represents an ordered collection of elements, supporting operations such as element access, modification, addition, deletion, and traversal, without requiring users to consider capacity limitations. Lists can be implemented based on linked lists or arrays.
+A <u>list</u> is an abstract data structure concept that represents an ordered collection of elements, supporting operations such as element access, modification, addition, deletion, and traversal, without requiring users to consider capacity limitations. Lists can be implemented based on linked lists or arrays.

 - A linked list inherently serves as a list, supporting operations for adding, deleting, searching, and modifying elements, with the flexibility to dynamically adjust its size.
 - Arrays also support these operations, but due to their immutable length, they can be considered as a list with a length limit.

 When implementing lists using arrays, **the immutability of length reduces the practicality of the list**. This is because predicting the amount of data to be stored in advance is often challenging, making it difficult to choose an appropriate list length. If the length is too small, it may not meet the requirements; if too large, it may waste memory space.

-To solve this problem, we can implement lists using a "dynamic array." It inherits the advantages of arrays and can dynamically expand during program execution.
+To solve this problem, we can implement lists using a <u>dynamic array</u>. It inherits the advantages of arrays and can dynamically expand during program execution.

 In fact, **many programming languages' standard libraries implement lists using dynamic arrays**, such as Python's `list`, Java's `ArrayList`, C++'s `vector`, and C#'s `List`. In the following discussion, we will consider "list" and "dynamic array" as synonymous concepts.

--- a/en/docs/chapter_array_and_linkedlist/ram_and_cache.md
+++ b/en/docs/chapter_array_and_linkedlist/ram_and_cache.md
@ -6,7 +6,7 @@ In fact, **the physical structure largely determines the efficiency of a program

 ## Computer storage devices

-There are three types of storage devices in computers: "hard disk," "random-access memory (RAM)," and "cache memory." The following table shows their different roles and performance characteristics in computer systems.
+There are three types of storage devices in computers: <u>hard disk</u>, <u>random-access memory (RAM)</u>, and <u>cache memory</u>. The following table shows their different roles and performance characteristics in computer systems.

 <p align="center"> Table <id> &nbsp; Computer storage devices </p>

@ -45,9 +45,9 @@ On the other hand, during program execution, **as memory is repeatedly allocated

 ## Cache efficiency of data structures

-Although caches are much smaller in space capacity than memory, they are much faster and play a crucial role in program execution speed. Since the cache's capacity is limited and can only store a small part of frequently accessed data, when the CPU tries to access data not in the cache, a "cache miss" occurs, forcing the CPU to load the needed data from slower memory.
+Although caches are much smaller in space capacity than memory, they are much faster and play a crucial role in program execution speed. Since the cache's capacity is limited and can only store a small part of frequently accessed data, when the CPU tries to access data not in the cache, a <u>cache miss</u> occurs, forcing the CPU to load the needed data from slower memory.

-Clearly, **the fewer the cache misses, the higher the CPU's data read-write efficiency**, and the better the program performance. The proportion of successful data retrieval from the cache by the CPU is called the "cache hit rate," a metric often used to measure cache efficiency.
+Clearly, **the fewer the cache misses, the higher the CPU's data read-write efficiency**, and the better the program performance. The proportion of successful data retrieval from the cache by the CPU is called the <u>cache hit rate</u>, a metric often used to measure cache efficiency.

 To achieve higher efficiency, caches adopt the following data loading mechanisms.

--- a/en/docs/chapter_array_and_linkedlist/summary.md
+++ b/en/docs/chapter_array_and_linkedlist/summary.md
@ -42,9 +42,9 @@ From a garbage collection perspective, for languages with automatic garbage coll

 If an element is searched first and then deleted, the time complexity is indeed `O(n)`. However, the `O(1)` advantage of linked lists in insertion and deletion can be realized in other applications. For example, in the implementation of double-ended queues using linked lists, we maintain pointers always pointing to the head and tail nodes, making each insertion and deletion operation `O(1)`.

-**Q**: In the image "Linked List Definition and Storage Method", do the light blue storage nodes occupy a single memory address, or do they share half with the node value?
+**Q**: In the figure "Linked List Definition and Storage Method", do the light blue storage nodes occupy a single memory address, or do they share half with the node value?

-The diagram is just a qualitative representation; quantitative analysis depends on specific situations.
+The figure is just a qualitative representation; quantitative analysis depends on specific situations.

 - Different types of node values occupy different amounts of space, such as int, long, double, and object instances.
 - The memory space occupied by pointer variables depends on the operating system and compilation environment used, usually 8 bytes or 4 bytes.
--- a/en/docs/chapter_backtracking/backtracking_algorithm.md
+++ b/en/docs/chapter_backtracking/backtracking_algorithm.md
@ -8,7 +8,7 @@ Backtracking typically employs "depth-first search" to traverse the solution spa

    Given a binary tree, search and record all nodes with a value of $7$, please return a list of nodes.

-For this problem, we traverse this tree in preorder and check if the current node's value is $7$. If it is, we add the node's value to the result list `res`. The relevant process is shown in the following diagram and code:
+For this problem, we traverse this tree in preorder and check if the current node's value is $7$. If it is, we add the node's value to the result list `res`. The relevant process is shown in the figure below:

 ```src
 [file]{preorder_traversal_i_compact}-[class]{}-[func]{pre_order}
@ -36,7 +36,7 @@ Based on the code from Example One, we need to use a list `path` to record the v

 In each "try", we record the path by adding the current node to `path`; before "retreating", we need to pop the node from `path` **to restore the state before this attempt**.

-Observe the process shown below, **we can understand trying and retreating as "advancing" and "undoing"**, two operations that are reverse to each other.
+Observe the process shown in the figure below, **we can understand trying and retreating as "advancing" and "undoing"**, two operations that are reverse to each other.

 === "<1>"
    ![Trying and retreating](backtracking_algorithm.assets/preorder_find_paths_step1.png)
@ -85,7 +85,7 @@ To meet the above constraints, **we need to add a pruning operation**: during th
 [file]{preorder_traversal_iii_compact}-[class]{}-[func]{pre_order}
 ```

-"Pruning" is a very vivid noun. As shown in the diagram below, in the search process, **we "cut off" the search branches that do not meet the constraints**, avoiding many meaningless attempts, thus enhancing the search efficiency.
+"Pruning" is a very vivid noun. As shown in the figure below, in the search process, **we "cut off" the search branches that do not meet the constraints**, avoiding many meaningless attempts, thus enhancing the search efficiency.

 ![Pruning based on constraints](backtracking_algorithm.assets/preorder_find_constrained_paths.png)

@ -421,7 +421,7 @@ Next, we solve Example Three based on the framework code. The `state` is the nod
 [file]{preorder_traversal_iii_template}-[class]{}-[func]{backtrack}
 ```

-As per the requirements, after finding a node with a value of $7$, the search should continue, **thus the `return` statement after recording the solution should be removed**. The following diagram compares the search processes with and without retaining the `return` statement.
+As per the requirements, after finding a node with a value of $7$, the search should continue, **thus the `return` statement after recording the solution should be removed**. The figure below compares the search processes with and without retaining the `return` statement.

 ![Comparison of retaining and removing the return in the search process](backtracking_algorithm.assets/backtrack_remove_return_or_not.png)

--- a/en/docs/chapter_backtracking/n_queens_problem.md
+++ b/en/docs/chapter_backtracking/n_queens_problem.md
@ -8,7 +8,7 @@ As shown in the figure below, when $n = 4$, there are two solutions. From the pe

 ![Solution to the 4 queens problem](n_queens_problem.assets/solution_4_queens.png)

-The following image shows the three constraints of this problem: **multiple queens cannot be on the same row, column, or diagonal**. It is important to note that diagonals are divided into the main diagonal `\` and the secondary diagonal `/`.
+The figure below shows the three constraints of this problem: **multiple queens cannot be on the same row, column, or diagonal**. It is important to note that diagonals are divided into the main diagonal `\` and the secondary diagonal `/`.

 ![Constraints of the n queens problem](n_queens_problem.assets/n_queens_constraints.png)

@ -18,7 +18,7 @@ As the number of queens equals the number of rows on the chessboard, both being

 This means that we can adopt a row-by-row placing strategy: starting from the first row, place one queen per row until the last row is reached.

-The image below shows the row-by-row placing process for the 4 queens problem. Due to space limitations, the image only expands one search branch of the first row, and prunes any placements that do not meet the column and diagonal constraints.
+The figure below shows the row-by-row placing process for the 4 queens problem. Due to space limitations, the figure only expands one search branch of the first row, and prunes any placements that do not meet the column and diagonal constraints.

 ![Row-by-row placing strategy](n_queens_problem.assets/n_queens_placing.png)

@ -30,7 +30,7 @@ To satisfy column constraints, we can use a boolean array `cols` of length $n$ t

 How about the diagonal constraints? Let the row and column indices of a cell on the chessboard be $(row, col)$. By selecting a specific main diagonal, we notice that the difference $row - col$ is the same for all cells on that diagonal, **meaning that $row - col$ is a constant value on that diagonal**.

-Thus, if two cells satisfy $row_1 - col_1 = row_2 - col_2$, they are definitely on the same main diagonal. Using this pattern, we can utilize the array `diags1` shown below to track whether a queen is on any main diagonal.
+Thus, if two cells satisfy $row_1 - col_1 = row_2 - col_2$, they are definitely on the same main diagonal. Using this pattern, we can utilize the array `diags1` shown in the figure below to track whether a queen is on any main diagonal.

 Similarly, **the sum $row + col$ is a constant value for all cells on a secondary diagonal**. We can also use the array `diags2` to handle secondary diagonal constraints.

--- a/en/docs/chapter_backtracking/permutations_problem.md
+++ b/en/docs/chapter_backtracking/permutations_problem.md
@ -6,7 +6,7 @@ The table below lists several example data, including the input arrays and their

 <p align="center"> Table <id> &nbsp; Permutation examples </p>

-| Input array | Permutations                                                   |
+| Input array | Permutations                                                       |
 | :---------- | :----------------------------------------------------------------- |
 | $[1]$       | $[1]$                                                              |
 | $[1, 2]$    | $[1, 2], [2, 1]$                                                   |
@ -22,7 +22,7 @@ From the perspective of the backtracking algorithm, **we can imagine the process

 From the code perspective, the candidate set `choices` contains all elements of the input array, and the state `state` contains elements that have been selected so far. Please note that each element can only be chosen once, **thus all elements in `state` must be unique**.

-As shown in the following figure, we can unfold the search process into a recursive tree, where each node represents the current state `state`. Starting from the root node, after three rounds of choices, we reach the leaf nodes, each corresponding to a permutation.
+As shown in the figure below, we can unfold the search process into a recursive tree, where each node represents the current state `state`. Starting from the root node, after three rounds of choices, we reach the leaf nodes, each corresponding to a permutation.

 ![Permutation recursive tree](permutations_problem.assets/permutations_i.png)

@ -33,11 +33,11 @@ To ensure that each element is selected only once, we consider introducing a boo
 - After making the choice `choice[i]`, we set `selected[i]` to $\text{True}$, indicating it has been chosen.
 - When iterating through the choice list `choices`, skip all nodes that have already been selected, i.e., prune.

-As shown in the following figure, suppose we choose 1 in the first round, 3 in the second round, and 2 in the third round, we need to prune the branch of element 1 in the second round and elements 1 and 3 in the third round.
+As shown in the figure below, suppose we choose 1 in the first round, 3 in the second round, and 2 in the third round, we need to prune the branch of element 1 in the second round and elements 1 and 3 in the third round.

 ![Permutation pruning example](permutations_problem.assets/permutations_i_pruning.png)

-Observing the above figure, this pruning operation reduces the search space size from $O(n^n)$ to $O(n!)$.
+Observing the figure above, this pruning operation reduces the search space size from $O(n^n)$ to $O(n!)$.

 ### Code implementation

@ -55,7 +55,7 @@ After understanding the above information, we can "fill in the blanks" in the fr

 Suppose the input array is $[1, 1, 2]$. To differentiate the two duplicate elements $1$, we mark the second $1$ as $\hat{1}$.

-As shown in the following figure, half of the permutations generated by the above method are duplicates.
+As shown in the figure below, half of the permutations generated by the above method are duplicates.

 ![Duplicate permutations](permutations_problem.assets/permutations_ii.png)

@ -63,7 +63,7 @@ So, how do we eliminate duplicate permutations? Most directly, consider using a

 ### Pruning of equal elements

-Observing the following figure, in the first round, choosing $1$ or $\hat{1}$ results in identical permutations under both choices, thus we should prune $\hat{1}$.
+Observing the figure below, in the first round, choosing $1$ or $\hat{1}$ results in identical permutations under both choices, thus we should prune $\hat{1}$.

 Similarly, after choosing $2$ in the first round, choosing $1$ and $\hat{1}$ in the second round also produces duplicate branches, so we should also prune $\hat{1}$ in the second round.

@ -90,6 +90,6 @@ Please note, although both `selected` and `duplicated` are used for pruning, the
 - **Repeated choice pruning**: There is only one `selected` throughout the search process. It records which elements are currently in the state, aiming to prevent an element from appearing repeatedly in `state`.
 - **Equal element pruning**: Each round of choices (each call to the `backtrack` function) contains a `duplicated`. It records which elements have been chosen in the current traversal (`for` loop), aiming to ensure equal elements are selected only once.

-The following figure shows the scope of the two pruning conditions. Note, each node in the tree represents a choice, and the nodes from the root to the leaf form a permutation.
+The figure below shows the scope of the two pruning conditions. Note, each node in the tree represents a choice, and the nodes from the root to the leaf form a permutation.

 ![Scope of the two pruning conditions](permutations_problem.assets/permutations_ii_pruning_summary.png)
--- a/en/docs/chapter_backtracking/subset_sum_problem.md
+++ b/en/docs/chapter_backtracking/subset_sum_problem.md
@ -23,7 +23,7 @@ Unlike the permutation problem, **elements in this problem can be chosen an unli

 Inputting the array $[3, 4, 5]$ and target element $9$ into the above code yields the results $[3, 3, 3], [4, 5], [5, 4]$. **Although it successfully finds all subsets with a sum of $9$, it includes the duplicate subset $[4, 5]$ and $[5, 4]$**.

-This is because the search process distinguishes the order of choices, however, subsets do not distinguish the choice order. As shown in the following figure, choosing $4$ before $5$ and choosing $5$ before $4$ are different branches, but correspond to the same subset.
+This is because the search process distinguishes the order of choices, however, subsets do not distinguish the choice order. As shown in the figure below, choosing $4$ before $5$ and choosing $5$ before $4$ are different branches, but correspond to the same subset.

 ![Subset search and pruning out of bounds](subset_sum_problem.assets/subset_sum_i_naive.png)

@ -34,7 +34,7 @@ To eliminate duplicate subsets, **a straightforward idea is to deduplicate the r

 ### Duplicate subset pruning

-**We consider deduplication during the search process through pruning**. Observing the following figure, duplicate subsets are generated when choosing array elements in different orders, for example in the following situations.
+**We consider deduplication during the search process through pruning**. Observing the figure below, duplicate subsets are generated when choosing array elements in different orders, for example in the following situations.

 1. When choosing $3$ in the first round and $4$ in the second round, all subsets containing these two elements are generated, denoted as $[3, 4, \dots]$.
 2. Later, when $4$ is chosen in the first round, **the second round should skip $3$** because the subset $[4, 3, \dots]$ generated by this choice completely duplicates the subset from step `1.`.
@ -62,7 +62,7 @@ Besides, we have made the following two optimizations to the code.
 [file]{subset_sum_i}-[class]{}-[func]{subset_sum_i}
 ```

-The following figure shows the overall backtracking process after inputting the array $[3, 4, 5]$ and target element $9$ into the above code.
+The figure below shows the overall backtracking process after inputting the array $[3, 4, 5]$ and target element $9$ into the above code.

 ![Subset sum I backtracking process](subset_sum_problem.assets/subset_sum_i.png)

@ -74,7 +74,7 @@ The following figure shows the overall backtracking process after inputting the

 Compared to the previous question, **this question's input array may contain duplicate elements**, introducing new problems. For example, given the array $[4, \hat{4}, 5]$ and target element $9$, the existing code's output results in $[4, 5], [\hat{4}, 5]$, resulting in duplicate subsets.

-**The reason for this duplication is that equal elements are chosen multiple times in a certain round**. In the following figure, the first round has three choices, two of which are $4$, generating two duplicate search branches, thus outputting duplicate subsets; similarly, the two $4$s in the second round also produce duplicate subsets.
+**The reason for this duplication is that equal elements are chosen multiple times in a certain round**. In the figure below, the first round has three choices, two of which are $4$, generating two duplicate search branches, thus outputting duplicate subsets; similarly, the two $4$s in the second round also produce duplicate subsets.

 ![Duplicate subsets caused by equal elements](subset_sum_problem.assets/subset_sum_ii_repeat.png)

@ -90,6 +90,6 @@ At the same time, **this question stipulates that each array element can only be
 [file]{subset_sum_ii}-[class]{}-[func]{subset_sum_ii}
 ```

-The following figure shows the backtracking process for the array $[4, 4, 5]$ and target element $9$, including four types of pruning operations. Please combine the illustration with the code comments to understand the entire search process and how each type of pruning operation works.
+The figure below shows the backtracking process for the array $[4, 4, 5]$ and target element $9$, including four types of pruning operations. Please combine the illustration with the code comments to understand the entire search process and how each type of pruning operation works.

 ![Subset sum II backtracking process](subset_sum_problem.assets/subset_sum_ii.png)
--- a/en/docs/chapter_computational_complexity/iteration_and_recursion.md
+++ b/en/docs/chapter_computational_complexity/iteration_and_recursion.md
@ -4,7 +4,7 @@ In algorithms, the repeated execution of a task is quite common and is closely r

 ## Iteration

-"Iteration" is a control structure for repeatedly performing a task. In iteration, a program repeats a block of code as long as a certain condition is met until this condition is no longer satisfied.
+<u>Iteration</u> is a control structure for repeatedly performing a task. In iteration, a program repeats a block of code as long as a certain condition is met until this condition is no longer satisfied.

 ### For loops

@ -16,11 +16,11 @@ The following function uses a `for` loop to perform a summation of $1 + 2 + \dot
 [file]{iteration}-[class]{}-[func]{for_loop}
 ```

-The flowchart below represents this sum function.
+The figure below represents this sum function.

 ![Flowchart of the sum function](iteration_and_recursion.assets/iteration.png)

-The number of operations in this summation function is proportional to the size of the input data $n$, or in other words, it has a "linear relationship." This "linear relationship" is what time complexity describes. This topic will be discussed in more detail in the next section.
+The number of operations in this summation function is proportional to the size of the input data $n$, or in other words, it has a linear relationship. **This "linear relationship" is what time complexity describes**. This topic will be discussed in more detail in the next section.

 ### While loops

@ -32,7 +32,7 @@ Below we use a `while` loop to implement the sum $1 + 2 + \dots + n$.
 [file]{iteration}-[class]{}-[func]{while_loop}
 ```

-**`While` loops provide more flexibility than `for` loops**, especially since they allow for custom initialization and modification of the condition variable at each step.
+**`while` loops provide more flexibility than `for` loops**, especially since they allow for custom initialization and modification of the condition variable at each step.

 For example, in the following code, the condition variable $i$ is updated twice each round, which would be inconvenient to implement with a `for` loop.

@ -50,7 +50,7 @@ We can nest one loop structure within another. Below is an example using `for` l
 [file]{iteration}-[class]{}-[func]{nested_for_loop}
 ```

-The flowchart below represents this nested loop.
+The figure below represents this nested loop.

 ![Flowchart of the nested loop](iteration_and_recursion.assets/nested_iteration.png)

@ -60,7 +60,7 @@ We can further increase the complexity by adding more nested loops, each level o

 ## Recursion

-"Recursion" is an algorithmic strategy where a function solves a problem by calling itself. It primarily involves two phases:
+<u>Recursion</u> is an algorithmic strategy where a function solves a problem by calling itself. It primarily involves two phases:

 1. **Calling**: This is where the program repeatedly calls itself, often with progressively smaller or simpler arguments, moving towards the "termination condition."
 2. **Returning**: Upon triggering the "termination condition," the program begins to return from the deepest recursive function, aggregating the results of each layer.
@ -106,7 +106,7 @@ In practice, the depth of recursion allowed by programming languages is usually

 ### Tail recursion

-Interestingly, **if a function performs its recursive call as the very last step before returning,** it can be optimized by the compiler or interpreter to be as space-efficient as iteration. This scenario is known as "tail recursion."
+Interestingly, **if a function performs its recursive call as the very last step before returning,** it can be optimized by the compiler or interpreter to be as space-efficient as iteration. This scenario is known as <u>tail recursion</u>.

 - **Regular recursion**: In standard recursion, when the function returns to the previous level, it continues to execute more code, requiring the system to save the context of the previous call.
 - **Tail recursion**: Here, the recursive call is the final operation before the function returns. This means that upon returning to the previous level, no further actions are needed, so the system does not need to save the context of the previous level.
@ -117,7 +117,7 @@ For example, in calculating $1 + 2 + \dots + n$, we can make the result variable
 [file]{recursion}-[class]{}-[func]{tail_recur}
 ```

-The execution process of tail recursion is shown in the following figure. Comparing regular recursion and tail recursion, the point of the summation operation is different.
+The execution process of tail recursion is shown in the figure below. Comparing regular recursion and tail recursion, the point of the summation operation is different.

 - **Regular recursion**: The summation operation occurs during the "returning" phase, requiring another summation after each layer returns.
 - **Tail recursion**: The summation operation occurs during the "calling" phase, and the "returning" phase only involves returning through each layer.
@ -147,7 +147,7 @@ Using the recursive relation, and considering the first two numbers as terminati
 [file]{recursion}-[class]{}-[func]{fib}
 ```

-Observing the above code, we see that it recursively calls two functions within itself, **meaning that one call generates two branching calls**. As illustrated below, this continuous recursive calling eventually creates a "recursion tree" with a depth of $n$.
+Observing the above code, we see that it recursively calls two functions within itself, **meaning that one call generates two branching calls**. As illustrated in the figure below, this continuous recursive calling eventually creates a <u>recursion tree</u> with a depth of $n$.

 ![Fibonacci sequence recursion tree](iteration_and_recursion.assets/recursion_tree.png)

--- a/en/docs/chapter_computational_complexity/performance_evaluation.md
+++ b/en/docs/chapter_computational_complexity/performance_evaluation.md
@ -24,11 +24,11 @@ On the other hand, **conducting a full test is very resource-intensive**. As the

 ## Theoretical estimation

-Due to the significant limitations of actual testing, we can consider evaluating algorithm efficiency solely through calculations. This estimation method is known as "asymptotic complexity analysis," or simply "complexity analysis."
+Due to the significant limitations of actual testing, we can consider evaluating algorithm efficiency solely through calculations. This estimation method is known as <u>asymptotic complexity analysis</u>, or simply <u>complexity analysis</u>.

 Complexity analysis reflects the relationship between the time and space resources required for algorithm execution and the size of the input data. **It describes the trend of growth in the time and space required by the algorithm as the size of the input data increases**. This definition might sound complex, but we can break it down into three key points to understand it better.

- "Time and space resources" correspond to "time complexity" and "space complexity," respectively.
+- "Time and space resources" correspond to <u>time complexity</u> and <u>space complexity</u>, respectively.
 - "As the size of input data increases" means that complexity reflects the relationship between algorithm efficiency and the volume of input data.
 - "The trend of growth in time and space" indicates that complexity analysis focuses not on the specific values of runtime or space occupied but on the "rate" at which time or space grows.

--- a/en/docs/chapter_computational_complexity/space_complexity.md
+++ b/en/docs/chapter_computational_complexity/space_complexity.md
@ -1,6 +1,6 @@
 # Space complexity

-"Space complexity" is used to measure the growth trend of the memory space occupied by an algorithm as the amount of data increases. This concept is very similar to time complexity, except that "running time" is replaced with "occupied memory space".
+<u>Space complexity</u> is used to measure the growth trend of the memory space occupied by an algorithm as the amount of data increases. This concept is very similar to time complexity, except that "running time" is replaced with "occupied memory space".

 ## Space related to algorithms

@ -725,12 +725,12 @@ The time complexity of both `loop()` and `recur()` functions is $O(n)$, but thei

 ## Common types

-Let the size of the input data be $n$, the following chart displays common types of space complexities (arranged from low to high).
+Let the size of the input data be $n$, the figure below displays common types of space complexities (arranged from low to high).

 $$
 \begin{aligned}
-O(1) < O(\log n) < O(n) < O(n^2) < O(2^n) \newline
-\text{Constant Order} < \text{Logarithmic Order} < \text{Linear Order} < \text{Quadratic Order} < \text{Exponential Order}
+& O(1) < O(\log n) < O(n) < O(n^2) < O(2^n) \newline
+& \text{Constant} < \text{Logarithmic} < \text{Linear} < \text{Quadratic} < \text{Exponential}
 \end{aligned}
 $$

@ -754,7 +754,7 @@ Linear order is common in arrays, linked lists, stacks, queues, etc., where the
 [file]{space_complexity}-[class]{}-[func]{linear}
 ```

-As shown below, this function's recursive depth is $n$, meaning there are $n$ instances of unreturned `linear_recur()` function, using $O(n)$ size of stack frame space:
+As shown in the figure below, this function's recursive depth is $n$, meaning there are $n$ instances of unreturned `linear_recur()` function, using $O(n)$ size of stack frame space:

 ```src
 [file]{space_complexity}-[class]{}-[func]{linear_recur}
@ -770,7 +770,7 @@ Quadratic order is common in matrices and graphs, where the number of elements i
 [file]{space_complexity}-[class]{}-[func]{quadratic}
 ```

-As shown below, the recursive depth of this function is $n$, and in each recursive call, an array is initialized with lengths $n$, $n-1$, $\dots$, $2$, $1$, averaging $n/2$, thus overall occupying $O(n^2)$ space:
+As shown in the figure below, the recursive depth of this function is $n$, and in each recursive call, an array is initialized with lengths $n$, $n-1$, $\dots$, $2$, $1$, averaging $n/2$, thus overall occupying $O(n^2)$ space:

 ```src
 [file]{space_complexity}-[class]{}-[func]{quadratic_recur}
@ -780,7 +780,7 @@ As shown below, the recursive depth of this function is $n$, and in each recursi

 ### Exponential order $O(2^n)$

-Exponential order is common in binary trees. Observe the below image, a "full binary tree" with $n$ levels has $2^n - 1$ nodes, occupying $O(2^n)$ space:
+Exponential order is common in binary trees. Observe the figure below, a "full binary tree" with $n$ levels has $2^n - 1$ nodes, occupying $O(2^n)$ space:

 ```src
 [file]{space_complexity}-[class]{}-[func]{build_tree}
--- a/en/docs/chapter_computational_complexity/summary.md
+++ b/en/docs/chapter_computational_complexity/summary.md
@ -11,7 +11,7 @@
 **Time Complexity**

 - Time complexity measures the trend of an algorithm's running time with the increase in data volume, effectively assessing algorithm efficiency. However, it can fail in certain cases, such as with small input data volumes or when time complexities are the same, making it challenging to precisely compare the efficiency of algorithms.
- Worst-case time complexity is denoted using big O notation, representing the asymptotic upper bound, reflecting the growth level of the number of operations $T(n)$ as $n$ approaches infinity.
+- Worst-case time complexity is denoted using big-$O$ notation, representing the asymptotic upper bound, reflecting the growth level of the number of operations $T(n)$ as $n$ approaches infinity.
 - Calculating time complexity involves two steps: first counting the number of operations, then determining the asymptotic upper bound.
 - Common time complexities, arranged from low to high, include $O(1)$, $O(\log n)$, $O(n)$, $O(n \log n)$, $O(n^2)$, $O(2^n)$, and $O(n!)$, among others.
 - The time complexity of some algorithms is not fixed and depends on the distribution of input data. Time complexities are divided into worst, best, and average cases. The best case is rarely used because input data generally needs to meet strict conditions to achieve the best case.
@ -32,7 +32,7 @@ Theoretically, the space complexity of a tail-recursive function can be optimize

 **Q**: What is the difference between the terms "function" and "method"?

-A "function" can be executed independently, with all parameters passed explicitly. A "method" is associated with an object and is implicitly passed to the object calling it, able to operate on the data contained within an instance of a class.
+A <u>function</u> can be executed independently, with all parameters passed explicitly. A <u>method</u> is associated with an object and is implicitly passed to the object calling it, able to operate on the data contained within an instance of a class.

 Here are some examples from common programming languages:

--- a/en/docs/chapter_computational_complexity/time_complexity.md
+++ b/en/docs/chapter_computational_complexity/time_complexity.md
@ -464,7 +464,7 @@ Let's understand this concept of "time growth trend" with an example. Assume the
    }
    ```

-The following figure shows the time complexities of these three algorithms.
+The figure below shows the time complexities of these three algorithms.

 - Algorithm `A` has just one print operation, and its run time does not grow with $n$. Its time complexity is considered "constant order."
 - Algorithm `B` involves a print operation looping $n$ times, and its run time grows linearly with $n$. Its time complexity is "linear order."
@ -661,7 +661,7 @@ $$
 T(n) = 3 + 2n
 $$

-Since $T(n)$ is a linear function, its growth trend is linear, and therefore, its time complexity is of linear order, denoted as $O(n)$. This mathematical notation, known as "big-O notation," represents the "asymptotic upper bound" of the function $T(n)$.
+Since $T(n)$ is a linear function, its growth trend is linear, and therefore, its time complexity is of linear order, denoted as $O(n)$. This mathematical notation, known as <u>big-O notation</u>, represents the <u>asymptotic upper bound</u> of the function $T(n)$.

 In essence, time complexity analysis is about finding the asymptotic upper bound of the "number of operations $T(n)$". It has a precise mathematical definition.

@ -669,7 +669,7 @@ In essence, time complexity analysis is about finding the asymptotic upper bound

    If there exist positive real numbers $c$ and $n_0$ such that for all $n > n_0$, $T(n) \leq c \cdot f(n)$, then $f(n)$ is considered an asymptotic upper bound of $T(n)$, denoted as $T(n) = O(f(n))$.

-As illustrated below, calculating the asymptotic upper bound involves finding a function $f(n)$ such that, as $n$ approaches infinity, $T(n)$ and $f(n)$ have the same growth order, differing only by a constant factor $c$.
+As shown in the figure below, calculating the asymptotic upper bound involves finding a function $f(n)$ such that, as $n$ approaches infinity, $T(n)$ and $f(n)$ have the same growth order, differing only by a constant factor $c$.

 ![Asymptotic upper bound of a function](time_complexity.assets/asymptotic_upper_bound.png)

@ -951,12 +951,12 @@ The following table illustrates examples of different operation counts and their

 ## Common types of time complexity

-Let's consider the input data size as $n$. The common types of time complexities are illustrated below, arranged from lowest to highest:
+Let's consider the input data size as $n$. The common types of time complexities are shown in the figure below, arranged from lowest to highest:

 $$
 \begin{aligned}
-O(1) < O(\log n) < O(n) < O(n \log n) < O(n^2) < O(2^n) < O(n!) \newline
-\text{Constant Order} < \text{Logarithmic Order} < \text{Linear Order} < \text{Linear-Logarithmic Order} < \text{Quadratic Order} < \text{Exponential Order} < \text{Factorial Order}
+& O(1) < O(\log n) < O(n) < O(n \log n) < O(n^2) < O(2^n) < O(n!) \newline
+& \text{Constant} < \text{Log} < \text{Linear} < \text{Linear-Log} < \text{Quadratic} < \text{Exp} < \text{Factorial}
 \end{aligned}
 $$

@ -994,7 +994,7 @@ Quadratic order means the number of operations grows quadratically with the inpu
 [file]{time_complexity}-[class]{}-[func]{quadratic}
 ```

-The following image compares constant order, linear order, and quadratic order time complexities.
+The figure below compares constant order, linear order, and quadratic order time complexities.

 ![Constant, linear, and quadratic order time complexities](time_complexity.assets/time_complexity_constant_linear_quadratic.png)

@ -1008,7 +1008,7 @@ For instance, in bubble sort, the outer loop runs $n - 1$ times, and the inner l

 Biological "cell division" is a classic example of exponential order growth: starting with one cell, it becomes two after one division, four after two divisions, and so on, resulting in $2^n$ cells after $n$ divisions.

-The following image and code simulate the cell division process, with a time complexity of $O(2^n)$:
+The figure below and code simulate the cell division process, with a time complexity of $O(2^n)$:

 ```src
 [file]{time_complexity}-[class]{}-[func]{exponential}
@ -1028,7 +1028,7 @@ Exponential order growth is extremely rapid and is commonly seen in exhaustive s

 In contrast to exponential order, logarithmic order reflects situations where "the size is halved each round." Given an input data size $n$, since the size is halved each round, the number of iterations is $\log_2 n$, the inverse function of $2^n$.

-The following image and code simulate the "halving each round" process, with a time complexity of $O(\log_2 n)$, commonly abbreviated as $O(\log n)$:
+The figure below and code simulate the "halving each round" process, with a time complexity of $O(\log_2 n)$, commonly abbreviated as $O(\log n)$:

 ```src
 [file]{time_complexity}-[class]{}-[func]{logarithmic}
@ -1062,7 +1062,7 @@ Linear-logarithmic order often appears in nested loops, with the complexities of
 [file]{time_complexity}-[class]{}-[func]{linear_log_recur}
 ```

-The image below demonstrates how linear-logarithmic order is generated. Each level of a binary tree has $n$ operations, and the tree has $\log_2 n + 1$ levels, resulting in a time complexity of $O(n \log n)$.
+The figure below demonstrates how linear-logarithmic order is generated. Each level of a binary tree has $n$ operations, and the tree has $\log_2 n + 1$ levels, resulting in a time complexity of $O(n \log n)$.

 ![Linear-logarithmic order time complexity](time_complexity.assets/time_complexity_logarithmic_linear.png)

@ -1076,7 +1076,7 @@ $$
 n! = n \times (n - 1) \times (n - 2) \times \dots \times 2 \times 1
 $$

-Factorials are typically implemented using recursion. As shown in the image and code below, the first level splits into $n$ branches, the second level into $n - 1$ branches, and so on, stopping after the $n$th level:
+Factorials are typically implemented using recursion. As shown in the code and the figure below, the first level splits into $n$ branches, the second level into $n - 1$ branches, and so on, stopping after the $n$th level:

 ```src
 [file]{time_complexity}-[class]{}-[func]{factorial_recur}
--- a/en/docs/chapter_data_structure/character_encoding.md
+++ b/en/docs/chapter_data_structure/character_encoding.md
@ -1,22 +1,22 @@
 # Character encoding *

-In the computer system, all data is stored in binary form, and characters (represented by char) are no exception. To represent characters, we need to develop a "character set" that defines a one-to-one mapping between each character and binary numbers. With the character set, computers can convert binary numbers to characters by looking up the table.
+In the computer system, all data is stored in binary form, and `char` is no exception. To represent characters, we need to develop a "character set" that defines a one-to-one mapping between each character and binary numbers. With the character set, computers can convert binary numbers to characters by looking up the table.

 ## ASCII character set

-The "ASCII code" is one of the earliest character sets, officially known as the American Standard Code for Information Interchange. It uses 7 binary digits (the lower 7 bits of a byte) to represent a character, allowing for a maximum of 128 different characters. As shown in the figure below, ASCII includes uppercase and lowercase English letters, numbers 0 ~ 9, various punctuation marks, and certain control characters (such as newline and tab).
+The <u>ASCII code</u> is one of the earliest character sets, officially known as the American Standard Code for Information Interchange. It uses 7 binary digits (the lower 7 bits of a byte) to represent a character, allowing for a maximum of 128 different characters. As shown in the figure below, ASCII includes uppercase and lowercase English letters, numbers 0 ~ 9, various punctuation marks, and certain control characters (such as newline and tab).

 ![ASCII code](character_encoding.assets/ascii_table.png)

-However, **ASCII can only represent English characters**. With the globalization of computers, a character set called "EASCII" was developed to represent more languages. It expands from the 7-bit structure of ASCII to 8 bits, enabling the representation of 256 characters.
+However, **ASCII can only represent English characters**. With the globalization of computers, a character set called <u>EASCII</u> was developed to represent more languages. It expands from the 7-bit structure of ASCII to 8 bits, enabling the representation of 256 characters.

 Globally, various region-specific EASCII character sets have been introduced. The first 128 characters of these sets are consistent with the ASCII, while the remaining 128 characters are defined differently to accommodate the requirements of different languages.

 ## GBK character set

-Later, it was found that **EASCII still could not meet the character requirements of many languages**. For instance, there are nearly a hundred thousand Chinese characters, with several thousand used regularly. In 1980, the Standardization Administration of China released the "GB2312" character set, which included 6763 Chinese characters, essentially fulfilling the computer processing needs for the Chinese language.
+Later, it was found that **EASCII still could not meet the character requirements of many languages**. For instance, there are nearly a hundred thousand Chinese characters, with several thousand used regularly. In 1980, the Standardization Administration of China released the <u>GB2312</u> character set, which included 6763 Chinese characters, essentially fulfilling the computer processing needs for the Chinese language.

-However, GB2312 could not handle some rare and traditional characters. The "GBK" character set expands GB2312 and includes 21886 Chinese characters. In the GBK encoding scheme, ASCII characters are represented with one byte, while Chinese characters use two bytes.
+However, GB2312 could not handle some rare and traditional characters. The <u>GBK</u> character set expands GB2312 and includes 21886 Chinese characters. In the GBK encoding scheme, ASCII characters are represented with one byte, while Chinese characters use two bytes.

 ## Unicode character set

@ -24,13 +24,13 @@ With the rapid evolution of computer technology and a plethora of character sets

 Researchers of that era thought: **What if a comprehensive character set encompassing all global languages and symbols was developed? Wouldn't this resolve the issues associated with cross-linguistic environments and garbled text?** Inspired by this idea, the extensive character set, Unicode, was born.

-"Unicode" is referred to as "统一码" (Unified Code) in Chinese, theoretically capable of accommodating over a million characters. It aims to incorporate characters from all over the world into a single set, providing a universal character set for processing and displaying various languages and reducing the issues of garbled text due to different encoding standards.
+<u>Unicode</u> is referred to as "统一码" (Unified Code) in Chinese, theoretically capable of accommodating over a million characters. It aims to incorporate characters from all over the world into a single set, providing a universal character set for processing and displaying various languages and reducing the issues of garbled text due to different encoding standards.

 Since its release in 1991, Unicode has continually expanded to include new languages and characters. As of September 2022, Unicode contains 149,186 characters, including characters, symbols, and even emojis from various languages. In the vast Unicode character set, commonly used characters occupy 2 bytes, while some rare characters may occupy 3 or even 4 bytes.

 Unicode is a universal character set that assigns a number (called a "code point") to each character, **but it does not specify how these character code points should be stored in a computer system**. One might ask: How does a system interpret Unicode code points of varying lengths within a text? For example, given a 2-byte code, how does the system determine if it represents a single 2-byte character or two 1-byte characters?

-A straightforward solution to this problem is to store all characters as equal-length encodings. As shown in the figure below, each character in "Hello" occupies 1 byte, while each character in "算法" (algorithm) occupies 2 bytes. We could encode all characters in "Hello 算法" as 2 bytes by padding the higher bits with zeros. This method would enable the system to interpret a character every 2 bytes, recovering the content of the phrase.
+**A straightforward solution to this problem is to store all characters as equal-length encodings**. As shown in the figure below, each character in "Hello" occupies 1 byte, while each character in "算法" (algorithm) occupies 2 bytes. We could encode all characters in "Hello 算法" as 2 bytes by padding the higher bits with zeros. This method would enable the system to interpret a character every 2 bytes, recovering the content of the phrase.

 ![Unicode encoding example](character_encoding.assets/unicode_hello_algo.png)

--- a/en/docs/chapter_data_structure/classification_of_data_structure.md
+++ b/en/docs/chapter_data_structure/classification_of_data_structure.md
@ -11,14 +11,13 @@ As shown in the figure below, logical structures can be divided into two major c
 - **Linear data structures**: Arrays, Linked Lists, Stacks, Queues, Hash Tables.
 - **Non-linear data structures**: Trees, Heaps, Graphs, Hash Tables.

-![Linear and non-linear data structures](classification_of_data_structure.assets/classification_logic_structure.png)
-
 Non-linear data structures can be further divided into tree structures and network structures.

- **Linear structures**: Arrays, linked lists, queues, stacks, and hash tables, where elements have a one-to-one sequential relationship.
 - **Tree structures**: Trees, Heaps, Hash Tables, where elements have a one-to-many relationship.
 - **Network structures**: Graphs, where elements have a many-to-many relationships.

+![Linear and non-linear data structures](classification_of_data_structure.assets/classification_logic_structure.png)
+
 ## Physical structure: contiguous and dispersed

 **During the execution of an algorithm, the data being processed is stored in memory**. The figure below shows a computer memory stick where each black square is a physical memory space. We can think of memory as a vast Excel spreadsheet, with each cell capable of storing a certain amount of data.
@ -38,6 +37,7 @@ As illustrated in the figure below, **the physical structure reflects the way da
 ![Contiguous space storage and dispersed space storage](classification_of_data_structure.assets/classification_phisical_structure.png)

 **It is worth noting that all data structures are implemented based on arrays, linked lists, or a combination of both**. For example, stacks and queues can be implemented using either arrays or linked lists; while implementations of hash tables may involve both arrays and linked lists.
+
 - **Array-based implementations**: Stacks, Queues, Hash Tables, Trees, Heaps, Graphs, Matrices, Tensors (arrays with dimensions $\geq 3$).
 - **Linked-list-based implementations**: Stacks, Queues, Hash Tables, Trees, Heaps, Graphs, etc.

--- a/en/docs/chapter_data_structure/number_encoding.md
+++ b/en/docs/chapter_data_structure/number_encoding.md
@ -14,11 +14,11 @@ Firstly, it's important to note that **numbers are stored in computers using the
 - **One's complement**: The one's complement of a positive number is the same as its sign-magnitude. For negative numbers, it's obtained by inverting all bits except the sign bit.
 - **Two's complement**: The two's complement of a positive number is the same as its sign-magnitude. For negative numbers, it's obtained by adding $1$ to their one's complement.

-The following diagram illustrates the conversions among sign-magnitude, one's complement, and two's complement:
+The figure below illustrates the conversions among sign-magnitude, one's complement, and two's complement:

 ![Conversions between sign-magnitude, one's complement, and two's complement](number_encoding.assets/1s_2s_complement.png)

-Although sign-magnitude is the most intuitive, it has limitations. For one, **negative numbers in sign-magnitude cannot be directly used in calculations**. For example, in sign-magnitude, calculating $1 + (-2)$ results in $-3$, which is incorrect.
+Although <u>sign-magnitude</u> is the most intuitive, it has limitations. For one, **negative numbers in sign-magnitude cannot be directly used in calculations**. For example, in sign-magnitude, calculating $1 + (-2)$ results in $-3$, which is incorrect.

 $$
 \begin{aligned}
@ -29,7 +29,7 @@ $$
 \end{aligned}
 $$

-To address this, computers introduced the **one's complement**. If we convert to one's complement and calculate $1 + (-2)$, then convert the result back to sign-magnitude, we get the correct result of $-1$.
+To address this, computers introduced the <u>one's complement</u>. If we convert to one's complement and calculate $1 + (-2)$, then convert the result back to sign-magnitude, we get the correct result of $-1$.

 $$
 \begin{aligned}
@ -51,7 +51,7 @@ $$
 \end{aligned}
 $$

-Like sign-magnitude, one's complement also suffers from the positive and negative zero ambiguity. Therefore, computers further introduced the **two's complement**. Let's observe the conversion process for negative zero in sign-magnitude, one's complement, and two's complement:
+Like sign-magnitude, one's complement also suffers from the positive and negative zero ambiguity. Therefore, computers further introduced the <u>two's complement</u>. Let's observe the conversion process for negative zero in sign-magnitude, one's complement, and two's complement:

 $$
 \begin{aligned}
@ -125,7 +125,7 @@ $$

 ![Example calculation of a float in IEEE 754 standard](number_encoding.assets/ieee_754_float.png)

-Observing the diagram, given an example data $\mathrm{S} = 0$, $\mathrm{E} = 124$, $\mathrm{N} = 2^{-2} + 2^{-3} = 0.375$, we have:
+Observing the figure above, given an example data $\mathrm{S} = 0$, $\mathrm{E} = 124$, $\mathrm{N} = 2^{-2} + 2^{-3} = 0.375$, we have:

 $$
 \text{val} = (-1)^0 \times 2^{124 - 127} \times (1 + 0.375) = 0.171875
--- a/en/docs/chapter_divide_and_conquer/binary_search_recur.md
+++ b/en/docs/chapter_divide_and_conquer/binary_search_recur.md
@ -34,7 +34,7 @@ Starting from the original problem $f(0, n-1)$, perform the binary search throug
 2. Recursively solve the subproblem reduced by half in size, which could be $f(i, m-1)$ or $f(m+1, j)$.
 3. Repeat steps `1.` and `2.`, until `target` is found or the interval is empty and returns.

-The diagram below shows the divide-and-conquer process of binary search for element $6$ in an array.
+The figure below shows the divide-and-conquer process of binary search for element $6$ in an array.

 ![The divide-and-conquer process of binary search](binary_search_recur.assets/binary_search_recur.png)

--- a/en/docs/chapter_divide_and_conquer/build_binary_tree_problem.md
+++ b/en/docs/chapter_divide_and_conquer/build_binary_tree_problem.md
@ -2,7 +2,7 @@

 !!! question

-    Given the preorder traversal `preorder` and inorder traversal `inorder` of a binary tree, construct the binary tree and return the root node of the binary tree. Assume that there are no duplicate values in the nodes of the binary tree (as shown in the diagram below).
+    Given the preorder traversal `preorder` and inorder traversal `inorder` of a binary tree, construct the binary tree and return the root node of the binary tree. Assume that there are no duplicate values in the nodes of the binary tree (as shown in the figure below).

 ![Example data for building a binary tree](build_binary_tree_problem.assets/build_tree_example.png)

@ -20,10 +20,10 @@ Based on the above analysis, this problem can be solved using divide and conquer

 By definition, `preorder` and `inorder` can be divided into three parts.

- Preorder traversal: `[ Root | Left Subtree | Right Subtree ]`, for example, the tree in the diagram corresponds to `[ 3 | 9 | 2 1 7 ]`.
- Inorder traversal: `[ Left Subtree | Root | Right Subtree ]`, for example, the tree in the diagram corresponds to `[ 9 | 3 | 1 2 7 ]`.
+- Preorder traversal: `[ Root | Left Subtree | Right Subtree ]`, for example, the tree in the figure corresponds to `[ 3 | 9 | 2 1 7 ]`.
+- Inorder traversal: `[ Left Subtree | Root | Right Subtree ]`, for example, the tree in the figure corresponds to `[ 9 | 3 | 1 2 7 ]`.

-Using the data in the diagram above, we can obtain the division results as shown in the steps below.
+Using the data in the figure above, we can obtain the division results as shown in the figure below.

 1. The first element 3 in the preorder traversal is the value of the root node.
 2. Find the index of the root node 3 in `inorder`, and use this index to divide `inorder` into `[ 9 | 3 ｜ 1 2 7 ]`.
@ -49,7 +49,7 @@ As shown in the table below, the above variables can represent the index of the
 | Left subtree  | $i + 1$                       | $[l, m-1]$                          |
 | Right subtree | $i + 1 + (m - l)$             | $[m+1, r]$                          |

-Please note, the meaning of $(m-l)$ in the right subtree root index is "the number of nodes in the left subtree", which is suggested to be understood in conjunction with the diagram below.
+Please note, the meaning of $(m-l)$ in the right subtree root index is "the number of nodes in the left subtree", which is suggested to be understood in conjunction with the figure below.

 ![Indexes of the root node and left and right subtrees](build_binary_tree_problem.assets/build_tree_division_pointers.png)

@ -61,7 +61,7 @@ To improve the efficiency of querying $m$, we use a hash table `hmap` to store t
 [file]{build_tree}-[class]{}-[func]{build_tree}
 ```

-The diagram below shows the recursive process of building the binary tree, where each node is established during the "descending" process, and each edge (reference) is established during the "ascending" process.
+The figure below shows the recursive process of building the binary tree, where each node is established during the "descending" process, and each edge (reference) is established during the "ascending" process.

 === "<1>"
    ![Recursive process of building a binary tree](build_binary_tree_problem.assets/built_tree_step1.png)
@ -90,7 +90,7 @@ The diagram below shows the recursive process of building the binary tree, where
 === "<9>"
    ![built_tree_step9](build_binary_tree_problem.assets/built_tree_step9.png)

-Each recursive function's division results of `preorder` and `inorder` are shown in the diagram below.
+Each recursive function's division results of `preorder` and `inorder` are shown in the figure below.

 ![Division results in each recursive function](build_binary_tree_problem.assets/built_tree_overall.png)

--- a/en/docs/chapter_divide_and_conquer/hanota_problem.md
+++ b/en/docs/chapter_divide_and_conquer/hanota_problem.md
@ -4,7 +4,7 @@ In both merge sorting and building binary trees, we decompose the original probl

 !!! question

-    Given three pillars, denoted as `A`, `B`, and `C`. Initially, pillar `A` is stacked with $n$ discs, arranged in order from top to bottom from smallest to largest. Our task is to move these $n$ discs to pillar `C`, maintaining their original order (as shown below). The following rules must be followed during the disc movement process:
+    Given three pillars, denoted as `A`, `B`, and `C`. Initially, pillar `A` is stacked with $n$ discs, arranged in order from top to bottom from smallest to largest. Our task is to move these $n$ discs to pillar `C`, maintaining their original order (as shown in the figure below). The following rules must be followed during the disc movement process:
    
    1. A disc can only be picked up from the top of a pillar and placed on top of another pillar.
    2. Only one disc can be moved at a time.
@ -16,7 +16,7 @@ In both merge sorting and building binary trees, we decompose the original probl

 ### Consider the base case

-As shown below, for the problem $f(1)$, i.e., when there is only one disc, we can directly move it from `A` to `C`.
+As shown in the figure below, for the problem $f(1)$, i.e., when there is only one disc, we can directly move it from `A` to `C`.

 === "<1>"
    ![Solution for a problem of size 1](hanota_problem.assets/hanota_f1_step1.png)
@ -24,7 +24,7 @@ As shown below, for the problem $f(1)$, i.e., when there is only one disc, we ca
 === "<2>"
    ![hanota_f1_step2](hanota_problem.assets/hanota_f1_step2.png)

-As shown below, for the problem $f(2)$, i.e., when there are two discs, **since the smaller disc must always be above the larger disc, `B` is needed to assist in the movement**.
+As shown in the figure below, for the problem $f(2)$, i.e., when there are two discs, **since the smaller disc must always be above the larger disc, `B` is needed to assist in the movement**.

 1. First, move the smaller disc from `A` to `B`.
 2. Then move the larger disc from `A` to `C`.
@ -48,7 +48,7 @@ The process of solving the problem $f(2)$ can be summarized as: **moving two dis

 For the problem $f(3)$, i.e., when there are three discs, the situation becomes slightly more complicated.

-Since we already know the solutions to $f(1)$ and $f(2)$, we can think from a divide-and-conquer perspective and **consider the two top discs on `A` as a unit**, performing the steps shown below. This way, the three discs are successfully moved from `A` to `C`.
+Since we already know the solutions to $f(1)$ and $f(2)$, we can think from a divide-and-conquer perspective and **consider the two top discs on `A` as a unit**, performing the steps shown in the figure below. This way, the three discs are successfully moved from `A` to `C`.

 1. Let `B` be the target pillar and `C` the buffer pillar, and move the two discs from `A` to `B`.
 2. Move the remaining disc from `A` directly to `C`.
@ -68,7 +68,7 @@ Since we already know the solutions to $f(1)$ and $f(2)$, we can think from a di

 Essentially, **we divide the problem $f(3)$ into two subproblems $f(2)$ and one subproblem $f(1)$**. By solving these three subproblems in order, the original problem is resolved. This indicates that the subproblems are independent, and their solutions can be merged.

-From this, we can summarize the divide-and-conquer strategy for solving the Tower of Hanoi shown in the following image: divide the original problem $f(n)$ into two subproblems $f(n-1)$ and one subproblem $f(1)$, and solve these three subproblems in the following order.
+From this, we can summarize the divide-and-conquer strategy for solving the Tower of Hanoi shown in the figure below: divide the original problem $f(n)$ into two subproblems $f(n-1)$ and one subproblem $f(1)$, and solve these three subproblems in the following order.

 1. Move $n-1$ discs with the help of `C` from `A` to `B`.
 2. Move the remaining one disc directly from `A` to `C`.
@ -86,7 +86,7 @@ In the code, we declare a recursive function `dfs(i, src, buf, tar)` whose role
 [file]{hanota}-[class]{}-[func]{solve_hanota}
 ```

-As shown below, the Tower of Hanoi forms a recursive tree with a height of $n$, each node representing a subproblem, corresponding to an open `dfs()` function, **thus the time complexity is $O(2^n)$, and the space complexity is $O(n)$**.
+As shown in the figure below, the Tower of Hanoi forms a recursive tree with a height of $n$, each node representing a subproblem, corresponding to an open `dfs()` function, **thus the time complexity is $O(2^n)$, and the space complexity is $O(n)$**.

 ![Recursive tree of the Tower of Hanoi](hanota_problem.assets/hanota_recursive_tree.png)

--- a/en/docs/chapter_dynamic_programming/dp_solution_pipeline.md
+++ b/en/docs/chapter_dynamic_programming/dp_solution_pipeline.md
@ -35,7 +35,7 @@ To illustrate the problem-solving steps more vividly, we use a classic problem,

    Given an $n \times m$ two-dimensional grid `grid`, each cell in the grid contains a non-negative integer representing the cost of that cell. The robot starts from the top-left cell and can only move down or right at each step until it reaches the bottom-right cell. Return the minimum path sum from the top-left to the bottom-right.

-The following figure shows an example, where the given grid's minimum path sum is $13$.
+The figure below shows an example, where the given grid's minimum path sum is $13$.

 ![Minimum Path Sum Example Data](dp_solution_pipeline.assets/min_path_sum_example.png)

@ -45,7 +45,7 @@ Each round of decisions in this problem is to move one step down or right from t

 The state $[i, j]$ corresponds to the subproblem: the minimum path sum from the starting point $[0, 0]$ to $[i, j]$, denoted as $dp[i, j]$.

-Thus, we obtain the two-dimensional $dp$ matrix shown below, whose size is the same as the input grid $grid$.
+Thus, we obtain the two-dimensional $dp$ matrix shown in the figure below, whose size is the same as the input grid $grid$.

 ![State definition and DP table](dp_solution_pipeline.assets/min_path_sum_solution_state_definition.png)

@ -59,7 +59,7 @@ Thus, we obtain the two-dimensional $dp$ matrix shown below, whose size is the s

 For the state $[i, j]$, it can only be derived from the cell above $[i-1, j]$ or the cell to the left $[i, j-1]$. Therefore, the optimal substructure is: the minimum path sum to reach $[i, j]$ is determined by the smaller of the minimum path sums of $[i, j-1]$ and $[i-1, j]$.

-Based on the above analysis, the state transition equation shown in the following figure can be derived:
+Based on the above analysis, the state transition equation shown in the figure below can be derived:

 $$
 dp[i, j] = \min(dp[i-1, j], dp[i, j-1]) + grid[i, j]
@ -104,7 +104,7 @@ Implementation code as follows:
 [file]{min_path_sum}-[class]{}-[func]{min_path_sum_dfs}
 ```

-The following figure shows the recursive tree rooted at $dp[2, 1]$, which includes some overlapping subproblems, the number of which increases sharply as the size of the grid `grid` increases.
+The figure below shows the recursive tree rooted at $dp[2, 1]$, which includes some overlapping subproblems, the number of which increases sharply as the size of the grid `grid` increases.

 Essentially, the reason for overlapping subproblems is: **there are multiple paths to reach a certain cell from the top-left corner**.

@ -132,7 +132,7 @@ Implement the dynamic programming solution iteratively, code as shown below:
 [file]{min_path_sum}-[class]{}-[func]{min_path_sum_dp}
 ```

-The following figures show the state transition process of the minimum path sum, traversing the entire grid, **thus the time complexity is $O(nm)$**.
+The figure below show the state transition process of the minimum path sum, traversing the entire grid, **thus the time complexity is $O(nm)$**.

 The array `dp` is of size $n \times m$, **therefore the space complexity is $O(nm)$**.

--- a/en/docs/chapter_dynamic_programming/edit_distance_problem.md
+++ b/en/docs/chapter_dynamic_programming/edit_distance_problem.md
@ -39,7 +39,7 @@ From this, we obtain a two-dimensional $dp$ table of size $(i+1) \times (j+1)$.

 **Step two: Identify the optimal substructure and then derive the state transition equation**

-Consider the subproblem $dp[i, j]$, whose corresponding tail characters of the two strings are $s[i-1]$ and $t[j-1]$, which can be divided into three scenarios as shown below.
+Consider the subproblem $dp[i, j]$, whose corresponding tail characters of the two strings are $s[i-1]$ and $t[j-1]$, which can be divided into three scenarios as shown in the figure below.

 1. Add $t[j-1]$ after $s[i-1]$, then the remaining subproblem is $dp[i, j-1]$.
 2. Delete $s[i-1]$, then the remaining subproblem is $dp[i-1, j]$.
@ -71,7 +71,7 @@ Observing the state transition equation, solving $dp[i, j]$ depends on the solut
 [file]{edit_distance}-[class]{}-[func]{edit_distance_dp}
 ```

-As shown below, the process of state transition in the edit distance problem is very similar to that in the knapsack problem, which can be seen as filling a two-dimensional grid.
+As shown in the figure below, the process of state transition in the edit distance problem is very similar to that in the knapsack problem, which can be seen as filling a two-dimensional grid.

 === "<1>"
    ![Dynamic programming process of edit distance](edit_distance_problem.assets/edit_distance_dp_step1.png)
--- a/en/docs/chapter_dynamic_programming/intro_to_dynamic_programming.md
+++ b/en/docs/chapter_dynamic_programming/intro_to_dynamic_programming.md
@ -36,7 +36,7 @@ $$
 dp[i] = dp[i-1] + dp[i-2]
 $$

-This means that in the stair climbing problem, there is a recursive relationship between the subproblems, **the solution to the original problem can be constructed from the solutions to the subproblems**. The following image shows this recursive relationship.
+This means that in the stair climbing problem, there is a recursive relationship between the subproblems, **the solution to the original problem can be constructed from the solutions to the subproblems**. The figure below shows this recursive relationship.

 ![Recursive relationship of solution counts](intro_to_dynamic_programming.assets/climbing_stairs_state_transfer.png)

@ -48,11 +48,11 @@ Observe the following code, which, like standard backtracking code, belongs to d
 [file]{climbing_stairs_dfs}-[class]{}-[func]{climbing_stairs_dfs}
 ```

-The following image shows the recursive tree formed by brute force search. For the problem $dp[n]$, the depth of its recursive tree is $n$, with a time complexity of $O(2^n)$. Exponential order represents explosive growth, and entering a long wait if a relatively large $n$ is input.
+The figure below shows the recursive tree formed by brute force search. For the problem $dp[n]$, the depth of its recursive tree is $n$, with a time complexity of $O(2^n)$. Exponential order represents explosive growth, and entering a long wait if a relatively large $n$ is input.

 ![Recursive tree for climbing stairs](intro_to_dynamic_programming.assets/climbing_stairs_dfs_tree.png)

-Observing the above image, **the exponential time complexity is caused by 'overlapping subproblems'**. For example, $dp[9]$ is decomposed into $dp[8]$ and $dp[7]$, $dp[8]$ into $dp[7]$ and $dp[6]$, both containing the subproblem $dp[7]$.
+Observing the figure above, **the exponential time complexity is caused by 'overlapping subproblems'**. For example, $dp[9]$ is decomposed into $dp[8]$ and $dp[7]$, $dp[8]$ into $dp[7]$ and $dp[6]$, both containing the subproblem $dp[7]$.

 Thus, subproblems include even smaller overlapping subproblems, endlessly. A vast majority of computational resources are wasted on these overlapping subproblems.

@ -69,7 +69,7 @@ The code is as follows:
 [file]{climbing_stairs_dfs_mem}-[class]{}-[func]{climbing_stairs_dfs_mem}
 ```

-Observe the following image, **after memoization, all overlapping subproblems need to be calculated only once, optimizing the time complexity to $O(n)$**, which is a significant leap.
+Observe the figure below, **after memoization, all overlapping subproblems need to be calculated only once, optimizing the time complexity to $O(n)$**, which is a significant leap.

 ![Recursive tree with memoized search](intro_to_dynamic_programming.assets/climbing_stairs_dfs_memo_tree.png)

@ -85,7 +85,7 @@ Since dynamic programming does not include a backtracking process, it only requi
 [file]{climbing_stairs_dp}-[class]{}-[func]{climbing_stairs_dp}
 ```

-The image below simulates the execution process of the above code.
+The figure below simulates the execution process of the above code.

 ![Dynamic programming process for climbing stairs](intro_to_dynamic_programming.assets/climbing_stairs_dp.png)

--- a/en/docs/chapter_dynamic_programming/knapsack_problem.md
+++ b/en/docs/chapter_dynamic_programming/knapsack_problem.md
@ -8,7 +8,7 @@ In this section, we will first solve the most common 0-1 knapsack problem.

    Given $n$ items, the weight of the $i$-th item is $wgt[i-1]$ and its value is $val[i-1]$, and a knapsack with a capacity of $cap$. Each item can be chosen only once. What is the maximum value of items that can be placed in the knapsack under the capacity limit?

-Observe the following figure, since the item number $i$ starts counting from 1, and the array index starts from 0, thus the weight of item $i$ corresponds to $wgt[i-1]$ and the value corresponds to $val[i-1]$.
+Observe the figure below, since the item number $i$ starts counting from 1, and the array index starts from 0, thus the weight of item $i$ corresponds to $wgt[i-1]$ and the value corresponds to $val[i-1]$.

 ![Example data of the 0-1 knapsack](knapsack_problem.assets/knapsack_example.png)

@ -76,19 +76,19 @@ After introducing memoization, **the time complexity depends on the number of su
 [file]{knapsack}-[class]{}-[func]{knapsack_dfs_mem}
 ```

-The following figure shows the search branches that are pruned in memoized search.
+The figure below shows the search branches that are pruned in memoized search.

 ![The memoized search recursive tree of the 0-1 knapsack problem](knapsack_problem.assets/knapsack_dfs_mem.png)

 ### Method three: Dynamic programming

-Dynamic programming essentially involves filling the $dp$ table during the state transition, the code is shown below:
+Dynamic programming essentially involves filling the $dp$ table during the state transition, the code is shown in the figure below:

 ```src
 [file]{knapsack}-[class]{}-[func]{knapsack_dp}
 ```

-As shown in the figures below, both the time complexity and space complexity are determined by the size of the array `dp`, i.e., $O(n \times cap)$.
+As shown in the figure below, both the time complexity and space complexity are determined by the size of the array `dp`, i.e., $O(n \times cap)$.

 === "<1>"
    ![The dynamic programming process of the 0-1 knapsack problem](knapsack_problem.assets/knapsack_dp_step1.png)
--- a/en/docs/chapter_dynamic_programming/unbounded_knapsack_problem.md
+++ b/en/docs/chapter_dynamic_programming/unbounded_knapsack_problem.md
@ -40,7 +40,7 @@ Comparing the code for the two problems, the state transition changes from $i-1$

 Since the current state comes from the state to the left and above, **the space-optimized solution should perform a forward traversal for each row in the $dp$ table**.

-This traversal order is the opposite of that for the 0-1 knapsack. Please refer to the following figures to understand the difference.
+This traversal order is the opposite of that for the 0-1 knapsack. Please refer to the figure below to understand the difference.

 === "<1>"
    ![Dynamic programming process for the unbounded knapsack problem after space optimization](unbounded_knapsack_problem.assets/unbounded_knapsack_dp_comp_step1.png)
@ -117,7 +117,7 @@ For this reason, we use the number $amt + 1$ to represent an invalid solution, b
 [file]{coin_change}-[class]{}-[func]{coin_change_dp}
 ```

-The following images show the dynamic programming process for the coin change problem, which is very similar to the unbounded knapsack problem.
+The figure below show the dynamic programming process for the coin change problem, which is very similar to the unbounded knapsack problem.

 === "<1>"
    ![Dynamic programming process for the coin change problem](unbounded_knapsack_problem.assets/coin_change_dp_step1.png)
@ -176,7 +176,7 @@ The space optimization for the coin change problem is handled in the same way as

 !!! question

-    Given $n$ types of coins, where the denomination of the $i^{th}$ type of coin is $coins[i - 1]$, and the target amount is $amt$. **Each type of coin can be selected multiple times**, **ask how many combinations of coins can make up the target amount**. See the example below.
+    Given $n$ types of coins, where the denomination of the $i^{th}$ type of coin is $coins[i - 1]$, and the target amount is $amt$. Each type of coin can be selected multiple times, **ask how many combinations of coins can make up the target amount**. See the example below.

 ![Example data for Coin Change Problem II](unbounded_knapsack_problem.assets/coin_change_ii_example.png)

--- a/en/docs/chapter_graph/graph.md
+++ b/en/docs/chapter_graph/graph.md
@ -1,6 +1,6 @@
 # Graph

-A "graph" is a type of nonlinear data structure, consisting of "vertices" and "edges". A graph $G$ can be abstractly represented as a collection of a set of vertices $V$ and a set of edges $E$. The following example shows a graph containing 5 vertices and 7 edges.
+A <u>graph</u> is a type of nonlinear data structure, consisting of <u>vertices</u> and <u>edges</u>. A graph $G$ can be abstractly represented as a collection of a set of vertices $V$ and a set of edges $E$. The following example shows a graph containing 5 vertices and 7 edges.

 $$
 \begin{aligned}
@ -10,35 +10,35 @@ G & = \{ V, E \} \newline
 \end{aligned}
 $$

-If vertices are viewed as nodes and edges as references (pointers) connecting the nodes, graphs can be seen as a data structure that extends from linked lists. As shown below, **compared to linear relationships (linked lists) and divide-and-conquer relationships (trees), network relationships (graphs) are more complex due to their higher degree of freedom**.
+If vertices are viewed as nodes and edges as references (pointers) connecting the nodes, graphs can be seen as a data structure that extends from linked lists. As shown in the figure below, **compared to linear relationships (linked lists) and divide-and-conquer relationships (trees), network relationships (graphs) are more complex due to their higher degree of freedom**.

 ![Relationship between linked lists, trees, and graphs](graph.assets/linkedlist_tree_graph.png)

 ## Common types of graphs

-Based on whether edges have direction, graphs can be divided into "undirected graphs" and "directed graphs", as shown below.
+Based on whether edges have direction, graphs can be divided into <u>undirected graphs</u> and <u>directed graphs</u>, as shown in the figure below.

 - In undirected graphs, edges represent a "bidirectional" connection between two vertices, for example, the "friendship" in WeChat or QQ.
 - In directed graphs, edges have directionality, that is, the edges $A \rightarrow B$ and $A \leftarrow B$ are independent of each other, for example, the "follow" and "be followed" relationship on Weibo or TikTok.

 ![Directed and undirected graphs](graph.assets/directed_graph.png)

-Based on whether all vertices are connected, graphs can be divided into "connected graphs" and "disconnected graphs", as shown below.
+Based on whether all vertices are connected, graphs can be divided into <u>connected graphs</u> and <u>disconnected graphs</u>, as shown in the figure below.

 - For connected graphs, it is possible to reach any other vertex starting from a certain vertex.
 - For disconnected graphs, there is at least one vertex that cannot be reached from a certain starting vertex.

 ![Connected and disconnected graphs](graph.assets/connected_graph.png)

-We can also add a "weight" variable to edges, resulting in "weighted graphs" as shown below. For example, in mobile games like "Honor of Kings", the system calculates the "closeness" between players based on shared gaming time, and this closeness network can be represented with a weighted graph.
+We can also add a weight variable to edges, resulting in <u>weighted graphs</u> as shown in the figure below. For example, in mobile games like "Honor of Kings", the system calculates the "closeness" between players based on shared gaming time, and this closeness network can be represented with a weighted graph.

 ![Weighted and unweighted graphs](graph.assets/weighted_graph.png)

 Graph data structures include the following commonly used terms.

- "Adjacency": When there is an edge connecting two vertices, these two vertices are said to be "adjacent". In the above figure, the adjacent vertices of vertex 1 are vertices 2, 3, and 5.
- "Path": The sequence of edges passed from vertex A to vertex B is called a "path" from A to B. In the above figure, the edge sequence 1-5-2-4 is a path from vertex 1 to vertex 4.
- "Degree": The number of edges a vertex has. For directed graphs, "in-degree" refers to how many edges point to the vertex, and "out-degree" refers to how many edges point out from the vertex.
+- <u>Adjacency</u>: When there is an edge connecting two vertices, these two vertices are said to be "adjacent". In the figure above, the adjacent vertices of vertex 1 are vertices 2, 3, and 5.
+- <u>Path</u>: The sequence of edges passed from vertex A to vertex B is called a path from A to B. In the figure above, the edge sequence 1-5-2-4 is a path from vertex 1 to vertex 4.
+- <u>Degree</u>: The number of edges a vertex has. For directed graphs, <u>in-degree</u> refers to how many edges point to the vertex, and <u>out-degree</u> refers to how many edges point out from the vertex.

 ## Representation of graphs

@ -46,9 +46,9 @@ Common representations of graphs include "adjacency matrices" and "adjacency lis

 ### Adjacency matrix

-Let the number of vertices in the graph be $n$, the "adjacency matrix" uses an $n \times n$ matrix to represent the graph, where each row (column) represents a vertex, and the matrix elements represent edges, with $1$ or $0$ indicating whether there is an edge between two vertices.
+Let the number of vertices in the graph be $n$, the <u>adjacency matrix</u> uses an $n \times n$ matrix to represent the graph, where each row (column) represents a vertex, and the matrix elements represent edges, with $1$ or $0$ indicating whether there is an edge between two vertices.

-As shown below, let the adjacency matrix be $M$, and the list of vertices be $V$, then the matrix element $M[i, j] = 1$ indicates there is an edge between vertex $V[i]$ and vertex $V[j]$, conversely $M[i, j] = 0$ indicates there is no edge between the two vertices.
+As shown in the figure below, let the adjacency matrix be $M$, and the list of vertices be $V$, then the matrix element $M[i, j] = 1$ indicates there is an edge between vertex $V[i]$ and vertex $V[j]$, conversely $M[i, j] = 0$ indicates there is no edge between the two vertices.

 ![Representation of a graph with an adjacency matrix](graph.assets/adjacency_matrix.png)

@ -62,13 +62,13 @@ When representing graphs with adjacency matrices, it is possible to directly acc

 ### Adjacency list

-The "adjacency list" uses $n$ linked lists to represent the graph, with each linked list node representing a vertex. The $i$-th linked list corresponds to vertex $i$ and contains all adjacent vertices (vertices connected to that vertex). The figure below shows an example of a graph stored using an adjacency list.
+The <u>adjacency list</u> uses $n$ linked lists to represent the graph, with each linked list node representing a vertex. The $i$-th linked list corresponds to vertex $i$ and contains all adjacent vertices (vertices connected to that vertex). The figure below shows an example of a graph stored using an adjacency list.

 ![Representation of a graph with an adjacency list](graph.assets/adjacency_list.png)

 The adjacency list only stores actual edges, and the total number of edges is often much less than $n^2$, making it more space-efficient. However, finding edges in the adjacency list requires traversing the linked list, so its time efficiency is not as good as that of the adjacency matrix.

-Observing the above figure, **the structure of the adjacency list is very similar to the "chaining" in hash tables, hence we can use similar methods to optimize efficiency**. For example, when the linked list is long, it can be transformed into an AVL tree or red-black tree, thus optimizing the time efficiency from $O(n)$ to $O(\log n)$; the linked list can also be transformed into a hash table, thus reducing the time complexity to $O(1)$.
+Observing the figure above, **the structure of the adjacency list is very similar to the "chaining" in hash tables, hence we can use similar methods to optimize efficiency**. For example, when the linked list is long, it can be transformed into an AVL tree or red-black tree, thus optimizing the time efficiency from $O(n)$ to $O(\log n)$; the linked list can also be transformed into a hash table, thus reducing the time complexity to $O(1)$.

 ## Common applications of graphs

--- a/en/docs/chapter_graph/graph_operations.md
+++ b/en/docs/chapter_graph/graph_operations.md
@ -57,7 +57,7 @@ Given an undirected graph with a total of $n$ vertices and $m$ edges, the variou
 === "Remove a vertex"
    ![adjacency_list_remove_vertex](graph_operations.assets/adjacency_list_step5_remove_vertex.png)

-Below is the adjacency list code implementation. Compared to the above diagram, the actual code has the following differences.
+Below is the adjacency list code implementation. Compared to the figure above, the actual code has the following differences.

 - For convenience in adding and removing vertices, and to simplify the code, we use lists (dynamic arrays) instead of linked lists.
 - Use a hash table to store the adjacency list, `key` being the vertex instance, `value` being the list (linked list) of adjacent vertices of that vertex.
--- a/en/docs/chapter_graph/graph_traversal.md
+++ b/en/docs/chapter_graph/graph_traversal.md
@ -2,7 +2,7 @@

 Trees represent a "one-to-many" relationship, while graphs have a higher degree of freedom and can represent any "many-to-many" relationship. Therefore, we can consider trees as a special case of graphs. Clearly, **tree traversal operations are also a special case of graph traversal operations**.

-Both graphs and trees require the application of search algorithms to implement traversal operations. Graph traversal can be divided into two types: "Breadth-First Search (BFS)" and "Depth-First Search (DFS)".
+Both graphs and trees require the application of search algorithms to implement traversal operations. Graph traversal can be divided into two types: <u>Breadth-First Search (BFS)</u> and <u>Depth-First Search (DFS)</u>.

 ## Breadth-first search

@ -24,7 +24,7 @@ To prevent revisiting vertices, we use a hash set `visited` to record which node
 [file]{graph_bfs}-[class]{}-[func]{graph_bfs}
 ```

-The code is relatively abstract, it is suggested to compare with the following figure to deepen the understanding.
+The code is relatively abstract, it is suggested to compare with the figure below to deepen the understanding.

 === "<1>"
    ![Steps of breadth-first search of a graph](graph_traversal.assets/graph_bfs_step1.png)
@ -61,7 +61,7 @@ The code is relatively abstract, it is suggested to compare with the following f

 !!! question "Is the sequence of breadth-first traversal unique?"

-    Not unique. Breadth-first traversal only requires traversing in a "from near to far" order, **and the traversal order of multiple vertices at the same distance can be arbitrarily shuffled**. For example, in the above figure, the visitation order of vertices $1$ and $3$ can be switched, as can the order of vertices $2$, $4$, and $6$.
+    Not unique. Breadth-first traversal only requires traversing in a "from near to far" order, **and the traversal order of multiple vertices at the same distance can be arbitrarily shuffled**. For example, in the figure above, the visitation order of vertices $1$ and $3$ can be switched, as can the order of vertices $2$, $4$, and $6$.

 ### Complexity analysis

@ -83,12 +83,12 @@ This "go as far as possible and then return" algorithm paradigm is usually imple
 [file]{graph_dfs}-[class]{}-[func]{graph_dfs}
 ```

-The algorithm process of depth-first search is shown in the following figure.
+The algorithm process of depth-first search is shown in the figure below.

 - **Dashed lines represent downward recursion**, indicating that a new recursive method has been initiated to visit a new vertex.
 - **Curved dashed lines represent upward backtracking**, indicating that this recursive method has returned to the position where this method was initiated.

-To deepen the understanding, it is suggested to combine the following figure with the code to simulate (or draw) the entire DFS process in your mind, including when each recursive method is initiated and when it returns.
+To deepen the understanding, it is suggested to combine the figure below with the code to simulate (or draw) the entire DFS process in your mind, including when each recursive method is initiated and when it returns.

 === "<1>"
    ![Steps of depth-first search of a graph](graph_traversal.assets/graph_dfs_step1.png)
--- a/en/docs/chapter_greedy/fractional_knapsack_problem.md
+++ b/en/docs/chapter_greedy/fractional_knapsack_problem.md
@ -2,7 +2,7 @@

 !!! question

-    Given $n$ items, the weight of the $i$-th item is $wgt[i-1]$ and its value is $val[i-1]$, and a knapsack with a capacity of $cap$. Each item can be chosen only once, **but a part of the item can be selected, with its value calculated based on the proportion of the weight chosen**, what is the maximum value of the items in the knapsack under the limited capacity? An example is shown below.
+    Given $n$ items, the weight of the $i$-th item is $wgt[i-1]$ and its value is $val[i-1]$, and a knapsack with a capacity of $cap$. Each item can be chosen only once, **but a part of the item can be selected, with its value calculated based on the proportion of the weight chosen**, what is the maximum value of the items in the knapsack under the limited capacity? An example is shown in the figure below.

 ![Example data of the fractional knapsack problem](fractional_knapsack_problem.assets/fractional_knapsack_example.png)

@ -17,7 +17,7 @@ The difference is that, in this problem, only a part of an item can be chosen. A

 ### Greedy strategy determination

-Maximizing the total value of the items in the knapsack essentially means maximizing the value per unit weight. From this, the greedy strategy shown below can be deduced.
+Maximizing the total value of the items in the knapsack **essentially means maximizing the value per unit weight**. From this, the greedy strategy shown in the figure below can be deduced.

 1. Sort the items by their unit value from high to low.
 2. Iterate over all items, **greedily choosing the item with the highest unit value in each round**.
--- a/en/docs/chapter_greedy/greedy_algorithm.md
+++ b/en/docs/chapter_greedy/greedy_algorithm.md
@ -13,7 +13,7 @@ Let's first understand the working principle of the greedy algorithm through the

    Given $n$ types of coins, where the denomination of the $i$th type of coin is $coins[i - 1]$, and the target amount is $amt$, with each type of coin available indefinitely, what is the minimum number of coins needed to make up the target amount? If it is not possible to make up the target amount, return $-1$.

-The greedy strategy adopted in this problem is shown in the following figure. Given the target amount, **we greedily choose the coin that is closest to and not greater than it**, repeatedly following this step until the target amount is met.
+The greedy strategy adopted in this problem is shown in the figure below. Given the target amount, **we greedily choose the coin that is closest to and not greater than it**, repeatedly following this step until the target amount is met.

 ![Greedy strategy for coin change](greedy_algorithm.assets/coin_change_greedy_strategy.png)

@ -29,7 +29,7 @@ You might exclaim: So clean! The greedy algorithm solves the coin change problem

 **Greedy algorithms are not only straightforward and simple to implement, but they are also usually very efficient**. In the code above, if the smallest coin denomination is $\min(coins)$, the greedy choice loops at most $amt / \min(coins)$ times, giving a time complexity of $O(amt / \min(coins))$. This is an order of magnitude smaller than the time complexity of the dynamic programming solution, which is $O(n \times amt)$.

-However, **for some combinations of coin denominations, greedy algorithms cannot find the optimal solution**. The following figure provides two examples.
+However, **for some combinations of coin denominations, greedy algorithms cannot find the optimal solution**. The figure below provides two examples.

 - **Positive example $coins = [1, 5, 10, 20, 50, 100]$**: In this coin combination, given any $amt$, the greedy algorithm can find the optimal solution.
 - **Negative example $coins = [1, 20, 50]$**: Suppose $amt = 60$, the greedy algorithm can only find the combination $50 + 1 \times 10$, totaling 11 coins, but dynamic programming can find the optimal solution of $20 + 20 + 20$, needing only 3 coins.
--- a/en/docs/chapter_greedy/max_capacity_problem.md
+++ b/en/docs/chapter_greedy/max_capacity_problem.md
@ -6,7 +6,7 @@
    
    The capacity of the container is the product of the height and the width (area), where the height is determined by the shorter partition, and the width is the difference in array indices between the two partitions.
    
-    Please select two partitions in the array that maximize the container's capacity and return this maximum capacity. An example is shown in the following figure.
+    Please select two partitions in the array that maximize the container's capacity and return this maximum capacity. An example is shown in the figure below.

 ![Example data for the maximum capacity problem](max_capacity_problem.assets/max_capacity_example.png)

@ -22,11 +22,11 @@ Assuming the length of the array is $n$, the number of combinations of two parti

 ### Determination of a greedy strategy

-There is a more efficient solution to this problem. As shown in the following figure, we select a state $[i, j]$ where the indices $i < j$ and the height $ht[i] < ht[j]$, meaning $i$ is the shorter partition, and $j$ is the taller one.
+There is a more efficient solution to this problem. As shown in the figure below, we select a state $[i, j]$ where the indices $i < j$ and the height $ht[i] < ht[j]$, meaning $i$ is the shorter partition, and $j$ is the taller one.

 ![Initial state](max_capacity_problem.assets/max_capacity_initial_state.png)

-As shown in the following figure, **if we move the taller partition $j$ closer to the shorter partition $i$, the capacity will definitely decrease**.
+As shown in the figure below, **if we move the taller partition $j$ closer to the shorter partition $i$, the capacity will definitely decrease**.

 This is because when moving the taller partition $j$, the width $j-i$ definitely decreases; and since the height is determined by the shorter partition, the height can only remain the same (if $i$ remains the shorter partition) or decrease (if the moved $j$ becomes the shorter partition).

@ -38,7 +38,7 @@ Conversely, **we can only possibly increase the capacity by moving the shorter p

 This leads us to the greedy strategy for this problem: initialize two pointers at the ends of the container, and in each round, move the pointer corresponding to the shorter partition inward until the two pointers meet.

-The following figures illustrate the execution of the greedy strategy.
+The figure below illustrate the execution of the greedy strategy.

 1. Initially, the pointers $i$ and $j$ are positioned at the ends of the array.
 2. Calculate the current state's capacity $cap[i, j]$ and update the maximum capacity.
@ -86,7 +86,7 @@ The variables $i$, $j$, and $res$ use a constant amount of extra space, **thus t

 The reason why the greedy method is faster than enumeration is that each round of greedy selection "skips" some states.

-For example, under the state $cap[i, j]$ where $i$ is the shorter partition and $j$ is the taller partition, greedily moving the shorter partition $i$ inward by one step leads to the "skipped" states shown below. **This means that these states' capacities cannot be verified later**.
+For example, under the state $cap[i, j]$ where $i$ is the shorter partition and $j$ is the taller partition, greedily moving the shorter partition $i$ inward by one step leads to the "skipped" states shown in the figure below. **This means that these states' capacities cannot be verified later**.

 $$
 cap[i, i+1], cap[i, i+2], \dots, cap[i, j-2], cap[i, j-1]
--- a/en/docs/chapter_greedy/max_product_cutting_problem.md
+++ b/en/docs/chapter_greedy/max_product_cutting_problem.md
@ -2,7 +2,7 @@

 !!! question

-    Given a positive integer $n$, split it into at least two positive integers that sum up to $n$, and find the maximum product of these integers, as illustrated below.
+    Given a positive integer $n$, split it into at least two positive integers that sum up to $n$, and find the maximum product of these integers, as illustrated in the figure below.

 ![Definition of the maximum product cutting problem](max_product_cutting_problem.assets/max_product_cutting_definition.png)

@ -32,7 +32,7 @@ n & \geq 4
 \end{aligned}
 $$

-As shown below, when $n \geq 4$, splitting out a $2$ increases the product, **which indicates that integers greater than or equal to $4$ should be split**.
+As shown in the figure below, when $n \geq 4$, splitting out a $2$ increases the product, **which indicates that integers greater than or equal to $4$ should be split**.

 **Greedy strategy one**: If the splitting scheme includes factors $\geq 4$, they should be further split. The final split should only include factors $1$, $2$, and $3$.

@ -40,7 +40,7 @@ As shown below, when $n \geq 4$, splitting out a $2$ increases the product, **wh

 Next, consider which factor is optimal. Among the factors $1$, $2$, and $3$, clearly $1$ is the worst, as $1 \times (n-1) < n$ always holds, meaning splitting out $1$ actually decreases the product.

-As shown below, when $n = 6$, $3 \times 3 > 2 \times 2 \times 2$. **This means splitting out $3$ is better than splitting out $2$**.
+As shown in the figure below, when $n = 6$, $3 \times 3 > 2 \times 2 \times 2$. **This means splitting out $3$ is better than splitting out $2$**.

 **Greedy strategy two**: In the splitting scheme, there should be at most two $2$s. Because three $2$s can always be replaced by two $3$s to obtain a higher product.

@ -55,7 +55,7 @@ From the above, the following greedy strategies can be derived.

 ### Code implementation

-As shown below, we do not need to use loops to split the integer but can use the floor division operation to get the number of $3$s, $a$, and the modulo operation to get the remainder, $b$, thus:
+As shown in the figure below, we do not need to use loops to split the integer but can use the floor division operation to get the number of $3$s, $a$, and the modulo operation to get the remainder, $b$, thus:

 $$
 n = 3a + b
--- a/en/docs/chapter_hashing/hash_collision.md
+++ b/en/docs/chapter_hashing/hash_collision.md
@ -11,7 +11,7 @@ There are mainly two methods for improving the structure of hash tables: "Separa

 ## Separate chaining

-In the original hash table, each bucket can store only one key-value pair. "Separate chaining" converts a single element into a linked list, treating key-value pairs as list nodes, storing all colliding key-value pairs in the same linked list. The figure below shows an example of a hash table with separate chaining.
+In the original hash table, each bucket can store only one key-value pair. <u>Separate chaining</u> converts a single element into a linked list, treating key-value pairs as list nodes, storing all colliding key-value pairs in the same linked list. The figure below shows an example of a hash table with separate chaining.

 ![Separate chaining hash table](hash_collision.assets/hash_table_chaining.png)

@ -26,7 +26,6 @@ Separate chaining has the following limitations:
 - **Increased Space Usage**: The linked list contains node pointers, which consume more memory space than arrays.
 - **Reduced Query Efficiency**: This is because linear traversal of the linked list is required to find the corresponding element.

-
 The code below provides a simple implementation of a separate chaining hash table, with two things to note:

 - Lists (dynamic arrays) are used instead of linked lists for simplicity. In this setup, the hash table (array) contains multiple buckets, each of which is a list.
@ -40,7 +39,7 @@ It's worth noting that when the linked list is very long, the query efficiency $

 ## Open addressing

-"Open addressing" does not introduce additional data structures but instead handles hash collisions through "multiple probing". The probing methods mainly include linear probing, quadratic probing, and double hashing.
+<u>Open addressing</u> does not introduce additional data structures but instead handles hash collisions through "multiple probing". The probing methods mainly include linear probing, quadratic probing, and double hashing.

 Let's use linear probing as an example to introduce the mechanism of open addressing hash tables.

@ -51,7 +50,6 @@ Linear probing uses a fixed-step linear search for probing, differing from ordin
 - **Inserting Elements**: Calculate the bucket index using the hash function. If the bucket already contains an element, linearly traverse forward from the conflict position (usually with a step size of $1$) until an empty bucket is found, then insert the element.
 - **Searching for Elements**: If a hash collision is encountered, use the same step size to linearly traverse forward until the corresponding element is found and return `value`; if an empty bucket is encountered, it means the target element is not in the hash table, so return `None`.

-
 The figure below shows the distribution of key-value pairs in an open addressing (linear probing) hash table. According to this hash function, keys with the same last two digits will be mapped to the same bucket. Through linear probing, they are stored sequentially in that bucket and the buckets below it.

 ![Distribution of key-value pairs in open addressing (linear probing) hash table](hash_collision.assets/hash_table_linear_probing.png)
@ -62,7 +60,7 @@ It's important to note that **we cannot directly delete elements in an open addr

 ![Query issues caused by deletion in open addressing](hash_collision.assets/hash_table_open_addressing_deletion.png)

-To solve this problem, we can adopt the "lazy deletion" mechanism: instead of directly removing elements from the hash table, **use a constant `TOMBSTONE` to mark the bucket**. In this mechanism, both `None` and `TOMBSTONE` represent empty buckets and can hold key-value pairs. However, when linear probing encounters `TOMBSTONE`, it should continue traversing since there may still be key-value pairs below it.
+To solve this problem, we can adopt the <u>lazy deletion</u> mechanism: instead of directly removing elements from the hash table, **use a constant `TOMBSTONE` to mark the bucket**. In this mechanism, both `None` and `TOMBSTONE` represent empty buckets and can hold key-value pairs. However, when linear probing encounters `TOMBSTONE`, it should continue traversing since there may still be key-value pairs below it.

 However, **lazy deletion may accelerate the performance degradation of the hash table**. Every deletion operation produces a delete mark, and as `TOMBSTONE` increases, the search time will also increase because linear probing may need to skip multiple `TOMBSTONE` to find the target element.

@ -95,7 +93,6 @@ As the name suggests, the double hashing method uses multiple hash functions $f_
 - **Inserting Elements**: If hash function $f_1(x)$ encounters a conflict, it tries $f_2(x)$, and so on, until an empty position is found and the element is inserted.
 - **Searching for Elements**: Search in the same order of hash functions until the target element is found and returned; if an empty position is encountered or all hash functions have been tried, it indicates the element is not in the hash table, then return `None`.

-
 Compared to linear probing, the double hashing method is less prone to clustering, but multiple hash functions introduce additional computational overhead.

 !!! tip
--- a/en/docs/chapter_hashing/hash_map.md
+++ b/en/docs/chapter_hashing/hash_map.md
@ -1,6 +1,6 @@
 # Hash table

-A "hash table", also known as a "hash map", achieves efficient element querying by establishing a mapping between keys and values. Specifically, when we input a `key` into the hash table, we can retrieve the corresponding `value` in $O(1)$ time.
+A <u>hash table</u> achieves efficient element querying by establishing a mapping between keys and values. Specifically, when we input a `key` into the hash table, we can retrieve the corresponding `value` in $O(1)$ time.

 As shown in the figure below, given $n$ students, each with two pieces of data: "name" and "student number". If we want to implement a query feature that returns the corresponding name when given a student number, we can use the hash table shown in the figure below.

@ -486,9 +486,9 @@ There are three common ways to traverse a hash table: traversing key-value pairs

 ## Simple implementation of hash table

-First, let's consider the simplest case: **implementing a hash table using just an array**. In the hash table, each empty slot in the array is called a "bucket", and each bucket can store one key-value pair. Therefore, the query operation involves finding the bucket corresponding to the `key` and retrieving the `value` from it.
+First, let's consider the simplest case: **implementing a hash table using just an array**. In the hash table, each empty slot in the array is called a <u>bucket</u>, and each bucket can store one key-value pair. Therefore, the query operation involves finding the bucket corresponding to the `key` and retrieving the `value` from it.

-So, how do we locate the appropriate bucket based on the `key`? This is achieved through a "hash function". The role of the hash function is to map a larger input space to a smaller output space. In a hash table, the input space is all possible keys, and the output space is all buckets (array indices). In other words, input a `key`, **and we can use the hash function to determine the storage location of the corresponding key-value pair in the array**.
+So, how do we locate the appropriate bucket based on the `key`? This is achieved through a <u>hash function</u>. The role of the hash function is to map a larger input space to a smaller output space. In a hash table, the input space is all possible keys, and the output space is all buckets (array indices). In other words, input a `key`, **and we can use the hash function to determine the storage location of the corresponding key-value pair in the array**.

 The calculation process of the hash function for a given `key` is divided into the following two steps:

@ -522,7 +522,7 @@ For the hash function in the above example, if the last two digits of the input
 20336 % 100 = 36
 ```

-As shown in the figure below, both student numbers point to the same name, which is obviously incorrect. This situation where multiple inputs correspond to the same output is known as "hash collision".
+As shown in the figure below, both student numbers point to the same name, which is obviously incorrect. This situation where multiple inputs correspond to the same output is known as <u>hash collision</u>.

 ![Example of hash collision](hash_map.assets/hash_collision.png)

@ -534,4 +534,4 @@ As shown in the figure below, before expansion, key-value pairs `(136, A)` and `

 Similar to array expansion, resizing a hash table requires migrating all key-value pairs from the original hash table to the new one, which is time-consuming. Furthermore, since the capacity `capacity` of the hash table changes, we need to recalculate the storage positions of all key-value pairs using the hash function, which adds to the computational overhead of the resizing process. Therefore, programming languages often reserve a sufficiently large capacity for the hash table to prevent frequent resizing.

-The "load factor" is an important concept for hash tables. It is defined as the ratio of the number of elements in the hash table to the number of buckets. It is used to measure the severity of hash collisions and **is often used as a trigger for resizing the hash table**. For example, in Java, when the load factor exceeds $0.75$, the system will resize the hash table to twice its original size.
+The <u>load factor</u> is an important concept for hash tables. It is defined as the ratio of the number of elements in the hash table to the number of buckets. It is used to measure the severity of hash collisions and **is often used as a trigger for resizing the hash table**. For example, in Java, when the load factor exceeds $0.75$, the system will resize the hash table to twice its original size.
--- a/en/docs/chapter_heap/heap.md
+++ b/en/docs/chapter_heap/heap.md
@ -1,9 +1,9 @@
 # Heap

-A "heap" is a complete binary tree that satisfies specific conditions and can be mainly divided into two types, as shown in the figure below.
+A <u>heap</u> is a complete binary tree that satisfies specific conditions and can be mainly divided into two types, as shown in the figure below.

- "Min heap": The value of any node $\leq$ the values of its child nodes.
- "Max heap": The value of any node $\geq$ the values of its child nodes.
+- <u>Min heap</u>: The value of any node $\leq$ the values of its child nodes.
+- <u>Max heap</u>: The value of any node $\geq$ the values of its child nodes.

 ![Min heap and max heap](heap.assets/min_heap_and_max_heap.png)

@ -15,7 +15,7 @@ As a special case of a complete binary tree, heaps have the following characteri

 ## Common operations on heaps

-It should be noted that many programming languages provide a "priority queue," which is an abstract data structure defined as a queue with priority sorting.
+It should be noted that many programming languages provide a <u>priority queue</u>, which is an abstract data structure defined as a queue with priority sorting.

 In fact, **heaps are often used to implement priority queues, with max heaps equivalent to priority queues where elements are dequeued in descending order**. From a usage perspective, we can consider "priority queue" and "heap" as equivalent data structures. Therefore, this book does not make a special distinction between the two, uniformly referring to them as "heap."

@ -448,7 +448,7 @@ The top element of the heap is the root node of the binary tree, which is also t

 ### Inserting an element into the heap

-Given an element `val`, we first add it to the bottom of the heap. After addition, since `val` may be larger than other elements in the heap, the heap's integrity might be compromised, **thus it's necessary to repair the path from the inserted node to the root node**. This operation is called "heapifying".
+Given an element `val`, we first add it to the bottom of the heap. After addition, since `val` may be larger than other elements in the heap, the heap's integrity might be compromised, **thus it's necessary to repair the path from the inserted node to the root node**. This operation is called <u>heapifying</u>.

 Considering starting from the node inserted, **perform heapify from bottom to top**. As shown in the figure below, we compare the value of the inserted node with its parent node, and if the inserted node is larger, we swap them. Then continue this operation, repairing each node in the heap from bottom to top until passing the root node or encountering a node that does not need to be swapped.

--- a/en/docs/chapter_introduction/algorithms_are_everywhere.md
+++ b/en/docs/chapter_introduction/algorithms_are_everywhere.md
@ -37,7 +37,7 @@ This essential skill for elementary students, looking up a dictionary, is actual

 The above method of organizing playing cards is essentially the "Insertion Sort" algorithm, which is very efficient for small datasets. Many programming languages' sorting functions include the insertion sort.

-**Example 3: Making Change**. Suppose we buy goods worth $69$ yuan at a supermarket and give the cashier $100$ yuan, then the cashier needs to give us $31$ yuan in change. They would naturally complete the thought process as shown below.
+**Example 3: Making Change**. Suppose we buy goods worth $69$ yuan at a supermarket and give the cashier $100$ yuan, then the cashier needs to give us $31$ yuan in change. They would naturally complete the thought process as shown in the figure below.

 1. The options are currencies smaller than $31$, including $1$, $5$, $10$, and $20$.
 2. Take out the largest $20$ from the options, leaving $31 - 20 = 11$.
--- a/en/docs/chapter_introduction/what_is_dsa.md
+++ b/en/docs/chapter_introduction/what_is_dsa.md
@ -2,7 +2,7 @@

 ## Definition of an algorithm

-An "algorithm" is a set of instructions or steps to solve a specific problem within a finite amount of time. It has the following characteristics:
+An <u>algorithm</u> is a set of instructions or steps to solve a specific problem within a finite amount of time. It has the following characteristics:

 - The problem is clearly defined, including unambiguous definitions of input and output.
 - The algorithm is feasible, meaning it can be completed within a finite number of steps, time, and memory space.
@ -10,7 +10,7 @@ An "algorithm" is a set of instructions or steps to solve a specific problem wit

 ## Definition of a data structure

-A "data structure" is a way of organizing and storing data in a computer, with the following design goals:
+A <u>data structure</u> is a way of organizing and storing data in a computer, with the following design goals:

 - Minimize space occupancy to save computer memory.
 - Make data operations as fast as possible, covering data access, addition, deletion, updating, etc.
--- a/en/docs/chapter_preface/about_the_book.md
+++ b/en/docs/chapter_preface/about_the_book.md
@ -20,7 +20,7 @@ If you are an algorithm expert, we look forward to receiving your valuable sugge

 ## Content structure

-The main content of the book is shown in the following figure.
+The main content of the book is shown in the figure below.

 - **Complexity analysis**: explores aspects and methods for evaluating data structures and algorithms. Covers methods of deriving time complexity and space complexity, along with common types and examples.
 - **Data structures**: focuses on fundamental data types, classification methods, definitions, pros and cons, common operations, types, applications, and implementation methods of data structures such as array, linked list, stack, queue, hash table, tree, heap, graph, etc.
--- a/en/docs/chapter_searching/binary_search.md
+++ b/en/docs/chapter_searching/binary_search.md
@ -4,7 +4,7 @@

 !!! question

-    Given an array `nums` of length $n$, with elements arranged in ascending order and non-repeating. Please find and return the index of element `target` in this array. If the array does not contain the element, return $-1$. An example is shown below.
+    Given an array `nums` of length $n$, with elements arranged in ascending order and non-repeating. Please find and return the index of element `target` in this array. If the array does not contain the element, return $-1$. An example is shown in the figure below.

 ![Binary search example data](binary_search.assets/binary_search_example.png)

--- a/en/docs/chapter_searching/replace_linear_by_hashing.md
+++ b/en/docs/chapter_searching/replace_linear_by_hashing.md
@ -22,7 +22,7 @@ This method has a time complexity of $O(n^2)$ and a space complexity of $O(1)$,

 ## Hash search: trading space for time

-Consider using a hash table, with key-value pairs being the array elements and their indices, respectively. Loop through the array, performing the steps shown in the figures below each round.
+Consider using a hash table, with key-value pairs being the array elements and their indices, respectively. Loop through the array, performing the steps shown in the figure below each round.

 1. Check if the number `target - nums[i]` is in the hash table. If so, directly return the indices of these two elements.
 2. Add the key-value pair `nums[i]` and index `i` to the hash table.
--- a/en/docs/chapter_searching/searching_algorithm_revisited.md
+++ b/en/docs/chapter_searching/searching_algorithm_revisited.md
@ -38,7 +38,7 @@ However, **using these algorithms often requires data preprocessing**. For examp

 ## Choosing a search method

-Given a set of data of size $n$, we can use linear search, binary search, tree search, hash search, and other methods to search for the target element from it. The working principles of these methods are shown in the following figure.
+Given a set of data of size $n$, we can use linear search, binary search, tree search, hash search, and other methods to search for the target element from it. The working principles of these methods are shown in the figure below.

 ![Various search strategies](searching_algorithm_revisited.assets/searching_algorithms.png)

--- a/en/docs/chapter_sorting/bubble_sort.md
+++ b/en/docs/chapter_sorting/bubble_sort.md
@ -2,7 +2,7 @@

 <u>Bubble sort</u> achieves sorting by continuously comparing and swapping adjacent elements. This process resembles bubbles rising from the bottom to the top, hence the name bubble sort.

-As shown in the following figures, the bubbling process can be simulated using element swap operations: starting from the leftmost end of the array and moving right, sequentially compare the size of adjacent elements. If "left element > right element," then swap them. After the traversal, the largest element will be moved to the far right end of the array.
+As shown in the figure below, the bubbling process can be simulated using element swap operations: starting from the leftmost end of the array and moving right, sequentially compare the size of adjacent elements. If "left element > right element," then swap them. After the traversal, the largest element will be moved to the far right end of the array.

 === "<1>"
    ![Simulating bubble process using element swap](bubble_sort.assets/bubble_operation_step1.png)
@ -27,7 +27,7 @@ As shown in the following figures, the bubbling process can be simulated using e

 ## Algorithm process

-Assuming the length of the array is $n$, the steps of bubble sort are shown below.
+Assuming the length of the array is $n$, the steps of bubble sort are shown in the figure below.

 1. First, perform a "bubble" on $n$ elements, **swapping the largest element to its correct position**.
 2. Next, perform a "bubble" on the remaining $n - 1$ elements, **swapping the second largest element to its correct position**.
--- a/en/docs/chapter_sorting/counting_sort.md
+++ b/en/docs/chapter_sorting/counting_sort.md
@ -4,7 +4,7 @@

 ## Simple implementation

-Let's start with a simple example. Given an array `nums` of length $n$, where all elements are "non-negative integers", the overall process of counting sort is illustrated in the following diagram.
+Let's start with a simple example. Given an array `nums` of length $n$, where all elements are "non-negative integers", the overall process of counting sort is illustrated in the figure below.

 1. Traverse the array to find the maximum number, denoted as $m$, then create an auxiliary array `counter` of length $m + 1$.
 2. **Use `counter` to count the occurrence of each number in `nums`**, where `counter[num]` corresponds to the occurrence of the number `num`. The counting method is simple, just traverse `nums` (suppose the current number is `num`), and increase `counter[num]` by $1$ each round.
@ -37,7 +37,7 @@ $$
 1. Fill `num` into the array `res` at the index `prefix[num] - 1`.
 2. Reduce the prefix sum `prefix[num]` by $1$, thus obtaining the next index to place `num`.

-After the traversal, the array `res` contains the sorted result, and finally, `res` replaces the original array `nums`. The complete counting sort process is shown in the figures below.
+After the traversal, the array `res` contains the sorted result, and finally, `res` replaces the original array `nums`. The complete counting sort process is shown in the figure below.

 === "<1>"
    ![Counting sort process](counting_sort.assets/counting_sort_step1.png)
--- a/en/docs/chapter_sorting/insertion_sort.md
+++ b/en/docs/chapter_sorting/insertion_sort.md
@ -10,7 +10,7 @@ The figure below shows the process of inserting an element into an array. Assumi

 ## Algorithm process

-The overall process of insertion sort is shown in the following figure.
+The overall process of insertion sort is shown in the figure below.

 1. Initially, the first element of the array is sorted.
 2. The second element of the array is taken as `base`, and after inserting it into the correct position, **the first two elements of the array are sorted**.
--- a/en/docs/chapter_sorting/merge_sort.md
+++ b/en/docs/chapter_sorting/merge_sort.md
@ -1,6 +1,6 @@
 # Merge sort

-<u>Merge sort</u> is a sorting algorithm based on the divide-and-conquer strategy, involving the "divide" and "merge" phases shown in the following figure.
+<u>Merge sort</u> is a sorting algorithm based on the divide-and-conquer strategy, involving the "divide" and "merge" phases shown in the figure below.

 1. **Divide phase**: Recursively split the array from the midpoint, transforming the sorting problem of a long array into that of shorter arrays.
 2. **Merge phase**: Stop dividing when the length of the sub-array is 1, start merging, and continuously combine two shorter ordered arrays into one longer ordered array until the process is complete.
--- a/en/docs/chapter_sorting/quick_sort.md
+++ b/en/docs/chapter_sorting/quick_sort.md
@ -2,7 +2,7 @@

 <u>Quick sort</u> is a sorting algorithm based on the divide and conquer strategy, known for its efficiency and wide application.

-The core operation of quick sort is "pivot partitioning," aiming to: select an element from the array as the "pivot," move all elements smaller than the pivot to its left, and move elements greater than the pivot to its right. Specifically, the pivot partitioning process is illustrated as follows.
+The core operation of quick sort is "pivot partitioning," aiming to: select an element from the array as the "pivot," move all elements smaller than the pivot to its left, and move elements greater than the pivot to its right. Specifically, the pivot partitioning process is illustrated in the figure below.

 1. Select the leftmost element of the array as the pivot, and initialize two pointers `i` and `j` at both ends of the array.
 2. Set up a loop where each round uses `i` (`j`) to find the first element larger (smaller) than the pivot, then swap these two elements.
@ -47,7 +47,7 @@ After the pivot partitioning, the original array is divided into three parts: le

 ## Algorithm process

-The overall process of quick sort is shown in the following figure.
+The overall process of quick sort is shown in the figure below.

 1. First, perform a "pivot partitioning" on the original array to obtain the unsorted left and right sub-arrays.
 2. Then, recursively perform "pivot partitioning" on both the left and right sub-arrays.
--- a/en/docs/chapter_sorting/radix_sort.md
+++ b/en/docs/chapter_sorting/radix_sort.md
@ -6,7 +6,7 @@ The previous section introduced counting sort, which is suitable for scenarios w

 ## Algorithm process

-Taking the student ID data as an example, assuming the least significant digit is the $1^{st}$ and the most significant is the $8^{th}$, the radix sort process is illustrated in the following diagram.
+Taking the student ID data as an example, assuming the least significant digit is the $1^{st}$ and the most significant is the $8^{th}$, the radix sort process is illustrated in the figure below.

 1. Initialize digit $k = 1$.
 2. Perform "counting sort" on the $k^{th}$ digit of the student IDs. After completion, the data will be sorted from smallest to largest based on the $k^{th}$ digit.
--- a/en/docs/chapter_sorting/selection_sort.md
+++ b/en/docs/chapter_sorting/selection_sort.md
@ -2,7 +2,7 @@

 <u>Selection sort</u> works on a very simple principle: it starts a loop where each iteration selects the smallest element from the unsorted interval and moves it to the end of the sorted interval.

-Suppose the length of the array is $n$, the algorithm flow of selection sort is as shown below.
+Suppose the length of the array is $n$, the algorithm flow of selection sort is as shown in the figure below.

 1. Initially, all elements are unsorted, i.e., the unsorted (index) interval is $[0, n-1]$.
 2. Select the smallest element in the interval $[0, n-1]$ and swap it with the element at index $0$. After this, the first element of the array is sorted.
--- a/en/docs/chapter_sorting/sorting_algorithm.md
+++ b/en/docs/chapter_sorting/sorting_algorithm.md
@ -2,7 +2,7 @@

 <u>Sorting algorithms (sorting algorithm)</u> are used to arrange a set of data in a specific order. Sorting algorithms have a wide range of applications because ordered data can usually be searched, analyzed, and processed more efficiently.

-As shown in the following figure, the data types in sorting algorithms can be integers, floating point numbers, characters, or strings, etc. Sorting rules can be set according to needs, such as numerical size, character ASCII order, or custom rules.
+As shown in the figure below, the data types in sorting algorithms can be integers, floating point numbers, characters, or strings, etc. Sorting rules can be set according to needs, such as numerical size, character ASCII order, or custom rules.

 ![Data types and comparator examples](sorting_algorithm.assets/sorting_examples.png)

--- a/en/docs/chapter_sorting/summary.md
+++ b/en/docs/chapter_sorting/summary.md
@ -10,7 +10,7 @@
 - Counting sort is a special case of bucket sort, which sorts by counting the occurrences of each data point. Counting sort is suitable for large datasets with a limited range of data and requires that data can be converted to positive integers.
 - Radix sort sorts data by sorting digit by digit, requiring data to be represented as fixed-length numbers.
 - Overall, we hope to find a sorting algorithm that has high efficiency, stability, in-place operation, and positive adaptability. However, like other data structures and algorithms, no sorting algorithm can meet all these conditions simultaneously. In practical applications, we need to choose the appropriate sorting algorithm based on the characteristics of the data.
- The following figure compares mainstream sorting algorithms in terms of efficiency, stability, in-place nature, and adaptability.
+- The figure below compares mainstream sorting algorithms in terms of efficiency, stability, in-place nature, and adaptability.

 ![Sorting Algorithm Comparison](summary.assets/sorting_algorithms_comparison.png)

--- a/en/docs/chapter_stack_and_queue/deque.md
+++ b/en/docs/chapter_stack_and_queue/deque.md
@ -1,6 +1,6 @@
 # Double-ended queue

-In a queue, we can only delete elements from the head or add elements to the tail. As shown in the following diagram, a "double-ended queue (deque)" offers more flexibility, allowing the addition or removal of elements at both the head and the tail.
+In a queue, we can only delete elements from the head or add elements to the tail. As shown in the figure below, a <u>double-ended queue (deque)</u> offers more flexibility, allowing the addition or removal of elements at both the head and the tail.

 ![Operations in double-ended queue](deque.assets/deque_operations.png)

@ -10,14 +10,14 @@ The common operations in a double-ended queue are listed below, and the names of

 <p align="center"> Table <id> &nbsp; Efficiency of double-ended queue operations </p>

-| Method Name   | Description                 | Time Complexity |
-| ------------- | --------------------------- | --------------- |
+| Method Name   | Description                | Time Complexity |
+| ------------- | -------------------------- | --------------- |
 | `pushFirst()` | Add an element to the head | $O(1)$          |
-| `pushLast()`  | Add an element to the tail  | $O(1)$          |
-| `popFirst()`  | Remove the first element    | $O(1)$          |
-| `popLast()`   | Remove the last element     | $O(1)$          |
-| `peekFirst()` | Access the first element    | $O(1)$          |
-| `peekLast()`  | Access the last element     | $O(1)$          |
+| `pushLast()`  | Add an element to the tail | $O(1)$          |
+| `popFirst()`  | Remove the first element   | $O(1)$          |
+| `popLast()`   | Remove the last element    | $O(1)$          |
+| `peekFirst()` | Access the first element   | $O(1)$          |
+| `peekLast()`  | Access the last element    | $O(1)$          |

 Similarly, we can directly use the double-ended queue classes implemented in programming languages:

--- a/en/docs/chapter_stack_and_queue/queue.md
+++ b/en/docs/chapter_stack_and_queue/queue.md
@ -1,6 +1,6 @@
 # Queue

-"Queue" is a linear data structure that follows the First-In-First-Out (FIFO) rule. As the name suggests, a queue simulates the phenomenon of lining up, where newcomers join the queue at the rear, and the person at the front leaves the queue first.
+A <u>queue</u> is a linear data structure that follows the First-In-First-Out (FIFO) rule. As the name suggests, a queue simulates the phenomenon of lining up, where newcomers join the queue at the rear, and the person at the front leaves the queue first.

 As shown in the figure below, we call the front of the queue the "head" and the back the "tail." The operation of adding elements to the rear of the queue is termed "enqueue," and the operation of removing elements from the front is termed "dequeue."

--- a/en/docs/chapter_stack_and_queue/stack.md
+++ b/en/docs/chapter_stack_and_queue/stack.md
@ -1,6 +1,6 @@
 # Stack

-A "Stack" is a linear data structure that follows the principle of Last-In-First-Out (LIFO).
+A <u>stack</u> is a linear data structure that follows the principle of Last-In-First-Out (LIFO).

 We can compare a stack to a pile of plates on a table. To access the bottom plate, one must first remove the plates on top. By replacing the plates with various types of elements (such as integers, characters, objects, etc.), we obtain the data structure known as a stack.

--- a/en/docs/chapter_stack_and_queue/summary.md
+++ b/en/docs/chapter_stack_and_queue/summary.md
@ -24,8 +24,8 @@ A double-ended queue, which is a combination of a stack and a queue or two stack

 **Q**: How exactly are undo and redo implemented?

-Undo and redo operations are implemented using two stacks: Stack A for undo and Stack B for redo.
+Undo and redo operations are implemented using two stacks: Stack `A` for undo and Stack `B` for redo.

-1. Each time a user performs an operation, it is pushed onto Stack A, and Stack B is cleared.
-2. When the user executes an "undo", the most recent operation is popped from Stack A and pushed onto Stack B.
-3. When the user executes a "redo", the most recent operation is popped from Stack B and pushed back onto Stack A.
+1. Each time a user performs an operation, it is pushed onto Stack `A`, and Stack `B` is cleared.
+2. When the user executes an "undo", the most recent operation is popped from Stack `A` and pushed onto Stack `B`.
+3. When the user executes a "redo", the most recent operation is popped from Stack `B` and pushed back onto Stack `A`.
--- a/en/docs/chapter_tree/array_representation_of_tree.md
+++ b/en/docs/chapter_tree/array_representation_of_tree.md
@ -22,7 +22,7 @@ As shown in the figure below, given a non-perfect binary tree, the above method

 ![Level-order traversal sequence corresponds to multiple binary tree possibilities](array_representation_of_tree.assets/array_representation_without_empty.png)

-To solve this problem, **we can consider explicitly writing out all `None` values in the level-order traversal sequence**. As shown in the following figure, after this treatment, the level-order traversal sequence can uniquely represent a binary tree. Example code is as follows:
+To solve this problem, **we can consider explicitly writing out all `None` values in the level-order traversal sequence**. As shown in the figure below, after this treatment, the level-order traversal sequence can uniquely represent a binary tree. Example code is as follows:

 === "Python"

--- a/en/docs/chapter_tree/avl_tree.md
+++ b/en/docs/chapter_tree/avl_tree.md
@ -10,11 +10,11 @@ For example, in the perfect binary tree shown in the figure below, after inserti

 ![Degradation of an AVL tree after inserting nodes](avl_tree.assets/avltree_degradation_from_inserting_node.png)

-In 1962, G. M. Adelson-Velsky and E. M. Landis proposed the "AVL Tree" in their paper "An algorithm for the organization of information". The paper detailed a series of operations to ensure that after continuously adding and removing nodes, the AVL tree would not degrade, thus maintaining the time complexity of various operations at $O(\log n)$ level. In other words, in scenarios where frequent additions, removals, searches, and modifications are needed, the AVL tree can always maintain efficient data operation performance, which has great application value.
+In 1962, G. M. Adelson-Velsky and E. M. Landis proposed the <u>AVL Tree</u> in their paper "An algorithm for the organization of information". The paper detailed a series of operations to ensure that after continuously adding and removing nodes, the AVL tree would not degrade, thus maintaining the time complexity of various operations at $O(\log n)$ level. In other words, in scenarios where frequent additions, removals, searches, and modifications are needed, the AVL tree can always maintain efficient data operation performance, which has great application value.

 ## Common terminology in AVL trees

-An AVL tree is both a binary search tree and a balanced binary tree, satisfying all properties of these two types of binary trees, hence it is a "balanced binary search tree".
+An AVL tree is both a binary search tree and a balanced binary tree, satisfying all properties of these two types of binary trees, hence it is a <u>balanced binary search tree</u>.

 ### Node height

@ -231,7 +231,7 @@ The "node height" refers to the distance from that node to its farthest leaf nod

 ### Node balance factor

-The "balance factor" of a node is defined as the height of the node's left subtree minus the height of its right subtree, with the balance factor of a null node defined as $0$. We will also encapsulate the functionality of obtaining the node balance factor into a function for easy use later on:
+The <u>balance factor</u> of a node is defined as the height of the node's left subtree minus the height of its right subtree, with the balance factor of a null node defined as $0$. We will also encapsulate the functionality of obtaining the node balance factor into a function for easy use later on:

 ```src
 [file]{avl_tree}-[class]{avl_tree}-[func]{balance_factor}
--- a/en/docs/chapter_tree/binary_search_tree.md
+++ b/en/docs/chapter_tree/binary_search_tree.md
@ -1,8 +1,8 @@
 # Binary search tree

-As shown in the figure below, a "binary search tree" satisfies the following conditions.
+As shown in the figure below, a <u>binary search tree</u> satisfies the following conditions.

-1. For the root node, the value of all nodes in the left subtree < the value of the root node < the value of all nodes in the right subtree.
+1. For the root node, the value of all nodes in the left subtree $<$ the value of the root node $<$ the value of all nodes in the right subtree.
 2. The left and right subtrees of any node are also binary search trees, i.e., they satisfy condition `1.` as well.

 ![Binary search tree](binary_search_tree.assets/binary_search_tree.png)
@ -69,7 +69,7 @@ As shown in the figure below, when the degree of the node to be removed is $1$,

 ![Removing a node in a binary search tree (degree 1)](binary_search_tree.assets/bst_remove_case2.png)

-When the degree of the node to be removed is $2$, we cannot remove it directly, but need to use a node to replace it. To maintain the property of the binary search tree "left subtree < root node < right subtree," **this node can be either the smallest node of the right subtree or the largest node of the left subtree**.
+When the degree of the node to be removed is $2$, we cannot remove it directly, but need to use a node to replace it. To maintain the property of the binary search tree "left subtree $<$ root node $<$ right subtree," **this node can be either the smallest node of the right subtree or the largest node of the left subtree**.

 Assuming we choose the smallest node of the right subtree (the next node in in-order traversal), then the removal operation proceeds as shown in the figure below.

@ -96,7 +96,7 @@ The operation of removing a node also uses $O(\log n)$ time, where finding the n

 ### In-order traversal is ordered

-As shown in the figure below, the in-order traversal of a binary tree follows the "left $\rightarrow$ root $\rightarrow$ right" traversal order, and a binary search tree satisfies the size relationship "left child node < root node < right child node".
+As shown in the figure below, the in-order traversal of a binary tree follows the "left $\rightarrow$ root $\rightarrow$ right" traversal order, and a binary search tree satisfies the size relationship "left child node $<$ root node $<$ right child node".

 This means that in-order traversal in a binary search tree always traverses the next smallest node first, thus deriving an important property: **The in-order traversal sequence of a binary search tree is ascending**.

--- a/en/docs/chapter_tree/binary_tree.md
+++ b/en/docs/chapter_tree/binary_tree.md
@ -1,6 +1,6 @@
 # Binary tree

-A "binary tree" is a non-linear data structure that represents the hierarchical relationship between ancestors and descendants, embodying the divide-and-conquer logic of "splitting into two". Similar to a linked list, the basic unit of a binary tree is a node, each containing a value, a reference to the left child node, and a reference to the right child node.
+A <u>binary tree</u> is a non-linear data structure that represents the hierarchical relationship between ancestors and descendants, embodying the divide-and-conquer logic of "splitting into two". Similar to a linked list, the basic unit of a binary tree is a node, each containing a value, a reference to the left child node, and a reference to the right child node.

 === "Python"

@ -198,7 +198,7 @@ A "binary tree" is a non-linear data structure that represents the hierarchical

    ```

-Each node has two references (pointers), pointing to the "left-child node" and "right-child node," respectively. This node is called the "parent node" of these two child nodes. When given a node of a binary tree, we call the tree formed by this node's left child and all nodes under it the "left subtree" of this node. Similarly, the "right subtree" can be defined.
+Each node has two references (pointers), pointing to the <u>left-child node</u> and <u>right-child node</u>, respectively. This node is called the <u>parent node</u> of these two child nodes. When given a node of a binary tree, we call the tree formed by this node's left child and all nodes under it the <u>left subtree</u> of this node. Similarly, the <u>right subtree</u> can be defined.

 **In a binary tree, except for leaf nodes, all other nodes contain child nodes and non-empty subtrees.** As shown in the figure below, if "Node 2" is considered as the parent node, then its left and right child nodes are "Node 4" and "Node 5," respectively. The left subtree is "the tree formed by Node 4 and all nodes under it," and the right subtree is "the tree formed by Node 5 and all nodes under it."

@ -206,16 +206,16 @@ Each node has two references (pointers), pointing to the "left-child node" and "

 ## Common terminology of binary trees

-The commonly used terminology of binary trees is shown in the following figure.
+The commonly used terminology of binary trees is shown in the figure below.

- "Root node": The node at the top level of the binary tree, which has no parent node.
- "Leaf node": A node with no children, both of its pointers point to `None`.
- "Edge": The line segment connecting two nodes, i.e., node reference (pointer).
- The "level" of a node: Incrementing from top to bottom, with the root node's level being 1.
- The "degree" of a node: The number of children a node has. In a binary tree, the degree can be 0, 1, or 2.
- The "height" of a binary tree: The number of edges passed from the root node to the farthest leaf node.
- The "depth" of a node: The number of edges passed from the root node to the node.
- The "height" of a node: The number of edges from the farthest leaf node to the node.
+- <u>Root node</u>: The node at the top level of the binary tree, which has no parent node.
+- <u>Leaf node</u>: A node with no children, both of its pointers point to `None`.
+- <u>Edge</u>: The line segment connecting two nodes, i.e., node reference (pointer).
+- The <u>level</u> of a node: Incrementing from top to bottom, with the root node's level being 1.
+- The <u>degree</u> of a node: The number of children a node has. In a binary tree, the degree can be 0, 1, or 2.
+- The <u>height</u> of a binary tree: The number of edges passed from the root node to the farthest leaf node.
+- The <u>depth</u> of a node: The number of edges passed from the root node to the node.
+- The <u>height</u> of a node: The number of edges from the farthest leaf node to the node.

 ![Common Terminology of Binary Trees](binary_tree.assets/binary_tree_terminology.png)

@ -615,29 +615,29 @@ Similar to a linked list, inserting and removing nodes in a binary tree can be a

 ### Perfect binary tree

-As shown in the figure below, in a "perfect binary tree," all levels of nodes are fully filled. In a perfect binary tree, the degree of leaf nodes is $0$, while the degree of all other nodes is $2$; if the tree's height is $h$, then the total number of nodes is $2^{h+1} - 1$, showing a standard exponential relationship, reflecting the common phenomenon of cell division in nature.
+As shown in the figure below, in a <u>perfect binary tree</u>, all levels of nodes are fully filled. In a perfect binary tree, the degree of leaf nodes is $0$, while the degree of all other nodes is $2$; if the tree's height is $h$, then the total number of nodes is $2^{h+1} - 1$, showing a standard exponential relationship, reflecting the common phenomenon of cell division in nature.

 !!! tip

-    Please note that in the Chinese community, a perfect binary tree is often referred to as a "full binary tree."
+    Please note that in the Chinese community, a perfect binary tree is often referred to as a <u>full binary tree</u>.

 ![Perfect binary tree](binary_tree.assets/perfect_binary_tree.png)

 ### Complete binary tree

-As shown in the figure below, a "complete binary tree" has only the bottom level nodes not fully filled, and the bottom level nodes are filled as far left as possible.
+As shown in the figure below, a <u>complete binary tree</u> has only the bottom level nodes not fully filled, and the bottom level nodes are filled as far left as possible.

 ![Complete binary tree](binary_tree.assets/complete_binary_tree.png)

 ### Full binary tree

-As shown in the figure below, a "full binary tree" has all nodes except leaf nodes having two children.
+As shown in the figure below, a <u>full binary tree</u> has all nodes except leaf nodes having two children.

 ![Full binary tree](binary_tree.assets/full_binary_tree.png)

 ### Balanced binary tree

-As shown in the figure below, in a "balanced binary tree," the absolute difference in height between the left and right subtrees of any node does not exceed 1.
+As shown in the figure below, in a <u>balanced binary tree</u>, the absolute difference in height between the left and right subtrees of any node does not exceed 1.

 ![Balanced binary tree](binary_tree.assets/balanced_binary_tree.png)

--- a/en/docs/chapter_tree/binary_tree_traversal.md
+++ b/en/docs/chapter_tree/binary_tree_traversal.md
@ -6,9 +6,9 @@ Common traversal methods for binary trees include level-order traversal, preorde

 ## Level-order traversal

-As shown in the figure below, "level-order traversal" traverses the binary tree from top to bottom, layer by layer, and accesses nodes in each layer in a left-to-right order.
+As shown in the figure below, <u>level-order traversal</u> traverses the binary tree from top to bottom, layer by layer, and accesses nodes in each layer in a left-to-right order.

-Level-order traversal essentially belongs to "breadth-first traversal", also known as "breadth-first search (BFS)", which embodies a "circumferentially outward expanding" layer-by-layer traversal method.
+Level-order traversal essentially belongs to <u>breadth-first traversal</u>, also known as <u>breadth-first search (BFS)</u>, which embodies a "circumferentially outward expanding" layer-by-layer traversal method.

 ![Level-order traversal of a binary tree](binary_tree_traversal.assets/binary_tree_bfs.png)

@ -27,7 +27,7 @@ Breadth-first traversal is usually implemented with the help of a "queue". The q

 ## Preorder, inorder, and postorder traversal

-Correspondingly, preorder, inorder, and postorder traversal all belong to "depth-first traversal", also known as "depth-first search (DFS)", which embodies a "proceed to the end first, then backtrack and continue" traversal method.
+Correspondingly, preorder, inorder, and postorder traversal all belong to <u>depth-first traversal</u>, also known as <u>depth-first search (DFS)</u>, which embodies a "proceed to the end first, then backtrack and continue" traversal method.

 The figure below shows the working principle of performing a depth-first traversal on a binary tree. **Depth-first traversal is like walking around the perimeter of the entire binary tree**, encountering three positions at each node, corresponding to preorder traversal, inorder traversal, and postorder traversal.

--- a/en/docs/chapter_tree/summary.md
+++ b/en/docs/chapter_tree/summary.md
@ -26,7 +26,7 @@ Taking the binary search tree as an example, the operation of removing a node ne

 **Q**: Why are there three sequences: pre-order, in-order, and post-order for DFS traversal of a binary tree, and what are their uses?

-Similar to sequential and reverse traversal of arrays, pre-order, in-order, and post-order traversals are three methods of traversing a binary tree, allowing us to obtain a traversal result in a specific order. For example, in a binary search tree, since the node sizes satisfy `left child node value < root node value < right child node value`, we can obtain an ordered node sequence by traversing the tree in the "left → root → right" priority.
+Similar to sequential and reverse traversal of arrays, pre-order, in-order, and post-order traversals are three methods of traversing a binary tree, allowing us to obtain a traversal result in a specific order. For example, in a binary search tree, since the node sizes satisfy `left child node value < root node value < right child node value`, we can obtain an ordered node sequence by traversing the tree in the "left $\rightarrow$ root $\rightarrow$ right" priority.

 **Q**: In a right rotation operation that deals with the relationship between the imbalance nodes `node`, `child`, `grand_child`, isn't the connection between `node` and its parent node and the original link of `node` lost after the right rotation?

--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -12,7 +12,7 @@ edit_uri: tree/main/docs
 version: 1.1.0

 # Copyright
-copyright: Copyright &copy; 2022-2024 krahets<br>The website content is licensed under <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/">CC BY-NC-SA 4.0</a>
+copyright: Copyright &copy; 2024 krahets<br>The website content is licensed under <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/">CC BY-NC-SA 4.0</a>

 # Configuration
 theme:
--- a/zh-hant/codes/go/chapter_backtracking/n_queens.go
+++ b/zh-hant/codes/go/chapter_backtracking/n_queens.go
@ -15,6 +15,7 @@ func backtrack(row, n int, state *[][]string, res *[][][]string, cols, diags1, d

 		}
 		*res = append(*res, newState)
+		return
 	}
 	// 走訪所有列
 	for col := 0; col < n; col++ {
--- a/zh-hant/codes/go/chapter_backtracking/permutations_ii.go
+++ b/zh-hant/codes/go/chapter_backtracking/permutations_ii.go
@ -23,7 +23,7 @@ func backtrackII(state *[]int, choices *[]int, selected *[]bool, res *[][]int) {
 			(*selected)[i] = true
 			*state = append(*state, choice)
 			// 進行下一輪選擇
-			backtrackI(state, choices, selected, res)
+			backtrackII(state, choices, selected, res)
 			// 回退：撤銷選擇，恢復到之前的狀態
 			(*selected)[i] = false
 			*state = (*state)[:len(*state)-1]
--- a/zh-hant/codes/ruby/chapter_sorting/bubble_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/bubble_sort.rb
@ -0,0 +1,51 @@
+=begin
+File: bubble_sort.rb
+Created Time: 2024-05-02
+Author: Xuan Khoa Tu Nguyen (ngxktuzkai2000@gmail.com)
+=end
+
+### 泡沫排序 ###
+def bubble_sort(nums)
+  n = nums.length
+  # 外迴圈：未排序區間為 [0, i]
+  for i in (n - 1).downto(1)
+    # 內迴圈：將未排序區間 [0, i] 中的最大元素交換至該區間的最右端
+    for j in 0...i
+      if nums[j] > nums[j + 1]
+        # 交換 nums[j] 與 nums[j + 1]
+        nums[j], nums[j + 1] = nums[j + 1], nums[j]
+      end
+    end
+  end
+end
+
+### 泡沫排序（標誌最佳化）###
+def bubble_sort_with_flag(nums)
+  n = nums.length
+  # 外迴圈：未排序區間為 [0, i]
+  for i in (n - 1).downto(1)
+    flag = false # 初始化標誌位
+
+    # 內迴圈：將未排序區間 [0, i] 中的最大元素交換至該區間的最右端
+    for j in 0...i
+      if nums[j] > nums[j + 1]
+        # 交換 nums[j] 與 nums[j + 1]
+        nums[j], nums[j + 1] = nums[j + 1], nums[j]
+        flag = true # 記錄交換元素
+      end
+    end
+
+    break unless flag # 此輪“冒泡”未交換任何元素，直接跳出
+  end
+end
+
+### Driver Code ###
+if __FILE__ == $0
+  nums = [4, 1, 3, 1, 5, 2]
+  bubble_sort(nums)
+  puts "泡沫排序完成後 nums = #{nums}"
+
+  nums1 = [4, 1, 3, 1, 5, 2]
+  bubble_sort_with_flag(nums1)
+  puts "泡沫排序完成後 nums = #{nums1}"
+end
--- a/zh-hant/codes/ruby/chapter_sorting/bucket_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/bucket_sort.rb
@ -0,0 +1,43 @@
+=begin
+File: bucket_sort.rb
+Created Time: 2024-04-17
+Author: Martin Xu (martin.xus@gmail.com)
+=end
+
+### 桶排序 ###
+def bucket_sort(nums)
+  # 初始化 k = n/2 個桶，預期向每個桶分配 2 個元素
+  k = nums.length / 2
+  buckets = Array.new(k) { [] }
+  
+  # 1. 將陣列元素分配到各個桶中
+  nums.each do |num|
+    # 輸入資料範圍為 [0, 1)，使用 num * k 對映到索引範圍 [0, k-1]
+    i = (num * k).to_i
+    # 將 num 新增進桶 i
+    buckets[i] << num
+  end
+
+  # 2. 對各個桶執行排序
+  buckets.each do |bucket|
+    # 使用內建排序函式，也可以替換成其他排序演算法
+    bucket.sort!
+  end
+
+  # 3. 走訪桶合併結果
+  i = 0
+  buckets.each do |bucket|
+    bucket.each do |num|
+      nums[i] = num
+      i += 1
+    end
+  end
+end
+
+### Driver Code ###
+if __FILE__ == $0
+  # 設輸入資料為浮點數，範圍為 [0, 1)
+  nums = [0.49, 0.96, 0.82, 0.09, 0.57, 0.43, 0.91, 0.75, 0.15, 0.37]
+  bucket_sort(nums)
+  puts "桶排序完成後 nums = #{nums}"
+end
--- a/zh-hant/codes/ruby/chapter_sorting/counting_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/counting_sort.rb
@ -0,0 +1,62 @@
+=begin
+File: counting_sort.rb
+Created Time: 2024-05-02
+Author: Xuan Khoa Tu Nguyen (ngxktuzkai2000@gmail.com)
+=end
+
+### 計數排序 ###
+def counting_sort_naive(nums)
+  # 簡單實現，無法用於排序物件
+  # 1. 統計陣列最大元素 m
+  m = 0
+  nums.each { |num| m = [m, num].max }
+  # 2. 統計各數字的出現次數
+  # counter[num] 代表 num 的出現次數
+  counter = Array.new(m + 1, 0)
+  nums.each { |num| counter[num] += 1 }
+  # 3. 走訪 counter ，將各元素填入原陣列 nums
+  i = 0
+  for num in 0...(m + 1)
+    (0...counter[num]).each do
+      nums[i] = num
+      i += 1
+    end
+  end
+end
+
+### 計數排序 ###
+def counting_sort(nums)
+  # 完整實現，可排序物件，並且是穩定排序
+  # 1. 統計陣列最大元素 m
+  m = nums.max
+  # 2. 統計各數字的出現次數
+  # counter[num] 代表 num 的出現次數
+  counter = Array.new(m + 1, 0)
+  nums.each { |num| counter[num] += 1 }
+  # 3. 求 counter 的前綴和，將“出現次數”轉換為“尾索引”
+  # 即 counter[num]-1 是 num 在 res 中最後一次出現的索引
+  (0...m).each { |i| counter[i + 1] += counter[i] }
+  # 4. 倒序走訪 nums, 將各元素填入結果陣列 res
+  # 初始化陣列 res 用於記錄結果
+  n = nums.length
+  res = Array.new(n, 0)
+  (n - 1).downto(0).each do |i|
+    num = nums[i]
+    res[counter[num] - 1] = num # 將 num 放置到對應索引處
+    counter[num] -= 1 # 令前綴和自減 1 ，得到下次放置 num 的索引
+  end
+  # 使用結果陣列 res 覆蓋原陣列 nums
+  (0...n).each { |i| nums[i] = res[i] }
+end
+
+### Driver Code ###
+if __FILE__ == $0
+  nums = [1, 0, 1, 2, 0, 4, 0, 2, 2, 4]
+
+  counting_sort_naive(nums)
+  puts "計數排序（無法排序物件）完成後 nums = #{nums}"
+
+  nums1 = [1, 0, 1, 2, 0, 4, 0, 2, 2, 4]
+  counting_sort(nums1)
+  puts "計數排序完成後 nums1 = #{nums1}"
+end
--- a/zh-hant/codes/ruby/chapter_sorting/heap_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/heap_sort.rb
@ -0,0 +1,45 @@
+=begin
+File: heap_sort.rb
+Created Time: 2024-04-10
+Author: junminhong (junminhong1110@gmail.com)
+=end
+
+### 堆積的長度為 n ，從節點 i 開始，從頂至底堆積化 ###
+def sift_down(nums, n, i)
+  while true
+    # 判斷節點 i, l, r 中值最大的節點，記為 ma
+    l = 2 * i + 1
+    r = 2 * i + 2
+    ma = i
+    ma = l if l < n && nums[l] > nums[ma]
+    ma = r if r < n && nums[r] > nums[ma]
+    # 若節點 i 最大或索引 l, r 越界，則無須繼續堆積化，跳出
+    break if ma == i
+    # 交換兩節點
+    nums[i], nums[ma] = nums[ma], nums[i]
+    # 迴圈向下堆積化
+    i = ma
+  end
+end
+
+### 堆積排序 ###
+def heap_sort(nums)
+  # 建堆積操作：堆積化除葉節點以外的其他所有節點
+  (nums.length / 2 - 1).downto(0) do |i|
+    sift_down(nums, nums.length, i)
+  end
+  # 從堆積中提取最大元素，迴圈 n-1 輪
+  (nums.length - 1).downto(1) do |i|
+    # 交換根節點與最右葉節點（交換首元素與尾元素）
+    nums[0], nums[i] = nums[i], nums[0]
+    # 以根節點為起點，從頂至底進行堆積化
+    sift_down(nums, i, 0)
+  end
+end
+
+### Driver Code ###
+if __FILE__ == $0
+  nums = [4, 1, 3, 1, 5, 2]
+  heap_sort(nums)
+  puts "堆積排序完成後 nums = #{nums.inspect}"
+end
--- a/zh-hant/codes/ruby/chapter_sorting/merge_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/merge_sort.rb
@ -0,0 +1,60 @@
+=begin
+File: merge_sort.rb
+Created Time: 2024-04-10
+Author: junminhong (junminhong1110@gmail.com)
+=end
+
+### 合併左子陣列和右子陣列 ###
+def merge(nums, left, mid, right)
+  # 左子陣列區間為 [left, mid], 右子陣列區間為 [mid+1, right]
+  # 建立一個臨時陣列 tmp，用於存放合併後的結果
+  tmp = Array.new(right - left + 1, 0)
+  # 初始化左子陣列和右子陣列的起始索引
+  i, j, k = left, mid + 1, 0
+  # 當左右子陣列都還有元素時，進行比較並將較小的元素複製到臨時陣列中
+  while i <= mid && j <= right
+    if nums[i] <= nums[j]
+      tmp[k] = nums[i]
+      i += 1
+    else
+      tmp[k] = nums[j]
+      j += 1
+    end
+    k += 1
+  end
+  # 將左子陣列和右子陣列的剩餘元素複製到臨時陣列中
+  while i <= mid
+    tmp[k] = nums[i]
+    i += 1
+    k += 1
+  end
+  while j <= right
+    tmp[k] = nums[j]
+    j += 1
+    k += 1
+  end
+  # 將臨時陣列 tmp 中的元素複製回原陣列 nums 的對應區間
+  (0...tmp.length).each do |k|
+    nums[left + k] = tmp[k]
+  end
+end
+
+### 合併排序 ###
+def merge_sort(nums, left, right)
+  # 終止條件
+  # 當子陣列長度為 1 時終止遞迴
+  return if left >= right
+  # 劃分階段
+  mid = (left + right) / 2 # 計算中點
+  merge_sort(nums, left, mid) # 遞迴左子陣列
+  merge_sort(nums, mid + 1, right) # 遞迴右子陣列
+  # 合併階段
+  merge(nums, left, mid, right)
+end
+
+### Driver Code ###
+if __FILE__ == $0
+  nums = [7, 3, 2, 6, 0, 1, 5, 4]
+  merge_sort(nums, 0, nums.length - 1)
+  puts "合併排序完成後 nums = #{nums.inspect}"
+end
--- a/zh-hant/codes/ruby/chapter_sorting/quick_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/quick_sort.rb
@ -9,7 +9,6 @@ class QuickSort
  class << self
    ### 哨兵劃分 ###
    def partition(nums, left, right)
-
      # 以 nums[left] 為基準數
      i, j = left, right
      while i < j
@ -116,7 +115,7 @@ class QuickSortTailCall
      i # 返回基準數的索引
    end

-    ### 快速排序（尾遞迴最佳化）
+    ### 快速排序（尾遞迴最佳化）###
    def quick_sort(nums, left, right)
      # 子陣列長度不為 1 時遞迴
      while left < right
--- a/zh-hant/codes/ruby/chapter_sorting/radix_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/radix_sort.rb
@ -0,0 +1,70 @@
+=begin
+File: radix_sort.rb
+Created Time: 2024-05-03
+Author: Xuan Khoa Tu Nguyen (ngxktuzkai2000@gmail.com)
+=end
+
+### 獲取元素 num 的第 k 位，其中 exp = 10^(k-1) ###
+def digit(num, exp)
+  # 轉入 exp 而非 k 可以避免在此重複執行昂貴的次方計算
+  (num / exp) % 10
+end
+
+### 計數排序（根據 nums 第 k 位排序）###
+def counting_sort_digit(nums, exp)
+  # 十進位制的位範圍為 0~9 ，因此需要長度為 10 的桶陣列
+  counter = Array.new(10, 0)
+  n = nums.length
+  # 統計 0~9 各數字的出現次數
+  for i in 0...n
+    d = digit(nums[i], exp) # 獲取 nums[i] 第 k 位，記為 d
+    counter[d] += 1 # 統計數字 d 的出現次數
+  end
+  # 求前綴和，將“出現個數”轉換為“陣列索引”
+  (1...10).each { |i| counter[i] += counter[i - 1] }
+  # 倒序走訪，根據桶內統計結果，將各元素填入 res
+  res = Array.new(n, 0)
+  for i in (n - 1).downto(0)
+    d = digit(nums[i], exp)
+    j = counter[d] - 1 # 獲取 d 在陣列中的索引 j
+    res[j] = nums[i] # 將當前元素填入索引 j
+    counter[d] -= 1 # 將 d 的數量減 1
+  end
+  # 使用結果覆蓋原陣列 nums
+  (0...n).each { |i| nums[i] = res[i] }
+end
+
+### 基數排序 ###
+def radix_sort(nums)
+  # 獲取陣列的最大元素，用於判斷最大位數
+  m = nums.max
+  # 按照從低位到高位的順序走訪
+  exp = 1
+  while exp <= m
+    # 對陣列元素的第 k 位執行計數排序
+    # k = 1 -> exp = 1
+    # k = 2 -> exp = 10
+    # 即 exp = 10^(k-1)
+    counting_sort_digit(nums, exp)
+    exp *= 10
+  end
+end
+
+### Driver Code ###
+if __FILE__ == $0
+  # 基數排序
+  nums = [
+    10546151,
+    35663510,
+    42865989,
+    34862445,
+    81883077,
+    88906420,
+    72429244,
+    30524779,
+    82060337,
+    63832996,
+  ]
+  radix_sort(nums)
+  puts "基數排序完成後 nums = #{nums}"
+end
--- a/zh-hant/codes/ruby/chapter_sorting/selection_sort.rb
+++ b/zh-hant/codes/ruby/chapter_sorting/selection_sort.rb
@ -0,0 +1,29 @@
+=begin
+File: selection_sort.rb
+Created Time: 2024-05-03
+Author: Xuan Khoa Tu Nguyen (ngxktuzkai2000@gmail.com)
+=end
+
+### 選擇排序 ###
+def selection_sort(nums)
+  n = nums.length
+  # 外迴圈：未排序區間為 [i, n-1]
+  for i in 0...(n - 1)
+    # 內迴圈：找到未排序區間內的最小元素
+    k = i
+    for j in (i + 1)...n
+      if nums[j] < nums[k]
+        k = j # 記錄最小元素的索引
+      end
+    end
+    # 將該最小元素與未排序區間的首個元素交換
+    nums[i], nums[k] = nums[k], nums[i]
+  end
+end
+
+### Driver Code ###
+if __FILE__ == $0
+  nums = [4, 1, 3, 1, 5, 2]
+  selection_sort(nums)
+  puts "選擇排序完成後 nums = #{nums}"
+end
--- a/zh-hant/docs/chapter_backtracking/n_queens_problem.md
+++ b/zh-hant/docs/chapter_backtracking/n_queens_problem.md
@ -18,7 +18,7 @@

 也就是說，我們可以採取逐行放置策略：從第一行開始，在每行放置一個皇后，直至最後一行結束。

-下圖所示為 $4$ 皇后問題的逐行放置過程。受畫幅限制，下圖僅展開了第一行的其中一個搜尋分支，並且將不滿足列約束和對角線約束的方案都進行了剪枝。
+下圖所示為 4 皇后問題的逐行放置過程。受畫幅限制，下圖僅展開了第一行的其中一個搜尋分支，並且將不滿足列約束和對角線約束的方案都進行了剪枝。

 ![逐行放置策略](n_queens_problem.assets/n_queens_placing.png)

--- a/zh-hant/docs/chapter_sorting/heap_sort.assets/heap_sort_step1.png
+++ b/zh-hant/docs/chapter_sorting/heap_sort.assets/heap_sort_step1.png
--- a/zh-hant/docs/chapter_tree/binary_tree_traversal.assets/binary_tree_dfs.png
+++ b/zh-hant/docs/chapter_tree/binary_tree_traversal.assets/binary_tree_dfs.png