From 2395804410233411ee98ab83ea4e619e29c4e4e3 Mon Sep 17 00:00:00 2001 From: krahets Date: Sat, 4 May 2024 19:57:03 +0800 Subject: [PATCH] build --- docs/chapter_backtracking/n_queens_problem.md | 1 + docs/chapter_sorting/bubble_sort.md | 35 +++++++- docs/chapter_sorting/counting_sort.md | 45 +++++++++- docs/chapter_sorting/quick_sort.md | 82 +++++++++++++++++-- docs/chapter_sorting/radix_sort.md | 46 ++++++++++- docs/chapter_sorting/selection_sort.md | 17 +++- .../chapter_backtracking/n_queens_problem.md | 1 + .../space_complexity.md | 4 +- .../time_complexity.md | 4 +- en/docs/chapter_graph/graph_traversal.md | 8 +- en/docs/chapter_hashing/hash_collision.md | 60 +++++++------- en/docs/chapter_sorting/bubble_sort.md | 35 +++++++- en/docs/chapter_sorting/counting_sort.md | 45 +++++++++- en/docs/chapter_sorting/quick_sort.md | 82 +++++++++++++++++-- en/docs/chapter_sorting/radix_sort.md | 46 ++++++++++- en/docs/chapter_sorting/selection_sort.md | 17 +++- en/docs/chapter_tree/binary_tree.md | 14 ++-- .../chapter_backtracking/n_queens_problem.md | 3 +- .../permutations_problem.md | 2 +- zh-Hant/docs/chapter_sorting/bubble_sort.md | 35 +++++++- zh-Hant/docs/chapter_sorting/bucket_sort.md | 30 ++++++- zh-Hant/docs/chapter_sorting/counting_sort.md | 45 +++++++++- zh-Hant/docs/chapter_sorting/heap_sort.md | 33 +++++++- zh-Hant/docs/chapter_sorting/merge_sort.md | 48 ++++++++++- zh-Hant/docs/chapter_sorting/quick_sort.md | 82 +++++++++++++++++-- zh-Hant/docs/chapter_sorting/radix_sort.md | 46 ++++++++++- .../docs/chapter_sorting/selection_sort.md | 17 +++- 27 files changed, 792 insertions(+), 91 deletions(-) diff --git a/docs/chapter_backtracking/n_queens_problem.md b/docs/chapter_backtracking/n_queens_problem.md index ac59322eb..afd3e80b7 100644 --- a/docs/chapter_backtracking/n_queens_problem.md +++ b/docs/chapter_backtracking/n_queens_problem.md @@ -269,6 +269,7 @@ comments: true } *res = append(*res, newState) + return } // 遍历所有列 for col := 0; col < n; col++ { diff --git a/docs/chapter_sorting/bubble_sort.md b/docs/chapter_sorting/bubble_sort.md index beb31f559..725378867 100755 --- a/docs/chapter_sorting/bubble_sort.md +++ b/docs/chapter_sorting/bubble_sort.md @@ -276,7 +276,20 @@ comments: true === "Ruby" ```ruby title="bubble_sort.rb" - [class]{}-[func]{bubble_sort} + ### 冒泡排序 ### + def bubble_sort(nums) + n = nums.length + # 外循环:未排序区间为 [0, i] + for i in (n - 1).downto(1) + # 内循环:将未排序区间 [0, i] 中的最大元素交换至该区间的最右端 + for j in 0...i + if nums[j] > nums[j + 1] + # 交换 nums[j] 与 nums[j + 1] + nums[j], nums[j + 1] = nums[j + 1], nums[j] + end + end + end + end ``` === "Zig" @@ -587,7 +600,25 @@ comments: true === "Ruby" ```ruby title="bubble_sort.rb" - [class]{}-[func]{bubble_sort_with_flag} + ### 冒泡排序(标志优化)### + def bubble_sort_with_flag(nums) + n = nums.length + # 外循环:未排序区间为 [0, i] + for i in (n - 1).downto(1) + flag = false # 初始化标志位 + + # 内循环:将未排序区间 [0, i] 中的最大元素交换至该区间的最右端 + for j in 0...i + if nums[j] > nums[j + 1] + # 交换 nums[j] 与 nums[j + 1] + nums[j], nums[j + 1] = nums[j + 1], nums[j] + flag = true # 记录交换元素 + end + end + + break unless flag # 此轮“冒泡”未交换任何元素,直接跳出 + end + end ``` === "Zig" diff --git a/docs/chapter_sorting/counting_sort.md b/docs/chapter_sorting/counting_sort.md index 344f6b73e..f11badfa9 100644 --- a/docs/chapter_sorting/counting_sort.md +++ b/docs/chapter_sorting/counting_sort.md @@ -348,7 +348,25 @@ comments: true === "Ruby" ```ruby title="counting_sort.rb" - [class]{}-[func]{counting_sort_naive} + ### 计数排序 ### + def counting_sort_naive(nums) + # 简单实现,无法用于排序对象 + # 1. 统计数组最大元素 m + m = 0 + nums.each { |num| m = [m, num].max } + # 2. 统计各数字的出现次数 + # counter[num] 代表 num 的出现次数 + counter = Array.new(m + 1, 0) + nums.each { |num| counter[num] += 1 } + # 3. 遍历 counter ,将各元素填入原数组 nums + i = 0 + for num in 0...(m + 1) + (0...counter[num]).each do + nums[i] = num + i += 1 + end + end + end ``` === "Zig" @@ -854,7 +872,30 @@ $$ === "Ruby" ```ruby title="counting_sort.rb" - [class]{}-[func]{counting_sort} + ### 计数排序 ### + def counting_sort(nums) + # 完整实现,可排序对象,并且是稳定排序 + # 1. 统计数组最大元素 m + m = nums.max + # 2. 统计各数字的出现次数 + # counter[num] 代表 num 的出现次数 + counter = Array.new(m + 1, 0) + nums.each { |num| counter[num] += 1 } + # 3. 求 counter 的前缀和,将“出现次数”转换为“尾索引” + # 即 counter[num]-1 是 num 在 res 中最后一次出现的索引 + (0...m).each { |i| counter[i + 1] += counter[i] } + # 4. 倒序遍历 nums, 将各元素填入结果数组 res + # 初始化数组 res 用于记录结果 + n = nums.length + res = Array.new(n, 0) + (n - 1).downto(0).each do |i| + num = nums[i] + res[counter[num] - 1] = num # 将 num 放置到对应索引处 + counter[num] -= 1 # 令前缀和自减 1 ,得到下次放置 num 的索引 + end + # 使用结果数组 res 覆盖原数组 nums + (0...n).each { |i| nums[i] = res[i] } + end ``` === "Zig" diff --git a/docs/chapter_sorting/quick_sort.md b/docs/chapter_sorting/quick_sort.md index f3acd7b1a..98bd76015 100755 --- a/docs/chapter_sorting/quick_sort.md +++ b/docs/chapter_sorting/quick_sort.md @@ -353,7 +353,24 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSort}-[func]{partition} + ### 哨兵划分 ### + def partition(nums, left, right) + # 以 nums[left] 为基准数 + i, j = left, right + while i < j + while i < j && nums[j] >= nums[left] + j -= 1 # 从右向左找首个小于基准数的元素 + end + while i < j && nums[i] <= nums[left] + i += 1 # 从左向右找首个大于基准数的元素 + end + # 元素交换 + nums[i], nums[j] = nums[j], nums[i] + end + # 将基准数交换至两子数组的分界线 + nums[i], nums[left] = nums[left], nums[i] + i # 返回基准数的索引 + end ``` === "Zig" @@ -594,7 +611,18 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSort}-[func]{quick_sort} + ### 快速排序类 ### + def quick_sort(nums, left, right) + # 子数组长度不为 1 时递归 + if left < right + # 哨兵划分 + pivot = partition(nums, left, right) + # 递归左子数组、右子数组 + quick_sort(nums, left, pivot - 1) + quick_sort(nums, pivot + 1, right) + end + nums + end ``` === "Zig" @@ -1067,9 +1095,38 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSortMedian}-[func]{median_three} + ### 选取三个候选元素的中位数 ### + def median_three(nums, left, mid, right) + # 选取三个候选元素的中位数 + _l, _m, _r = nums[left], nums[mid], nums[right] + # m 在 l 和 r 之间 + return mid if (_l <= _m && _m <= _r) || (_r <= _m && _m <= _l) + # l 在 m 和 r 之间 + return left if (_m <= _l && _l <= _r) || (_r <= _l && _l <= _m) + return right + end - [class]{QuickSortMedian}-[func]{partition} + ### 哨兵划分(三数取中值)### + def partition(nums, left, right) + ### 以 nums[left] 为基准数 + med = median_three(nums, left, (left + right) / 2, right) + # 将中位数交换至数组最左断 + nums[left], nums[med] = nums[med], nums[left] + i, j = left, right + while i < j + while i < j && nums[j] >= nums[left] + j -= 1 # 从右向左找首个小于基准数的元素 + end + while i < j && nums[i] <= nums[left] + i += 1 # 从左向右找首个大于基准数的元素 + end + # 元素交换 + nums[i], nums[j] = nums[j], nums[i] + end + # 将基准数交换至两子数组的分界线 + nums[i], nums[left] = nums[left], nums[i] + i # 返回基准数的索引 + end ``` === "Zig" @@ -1377,7 +1434,22 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSortTailCall}-[func]{quick_sort} + ### 快速排序(尾递归优化)### + def quick_sort(nums, left, right) + # 子数组长度不为 1 时递归 + while left < right + # 哨兵划分 + pivot = partition(nums, left, right) + # 对两个子数组中较短的那个执行快速排序 + if pivot - left < right - pivot + quick_sort(nums, left, pivot - 1) + left = pivot + 1 # 剩余未排序区间为 [pivot + 1, right] + else + quick_sort(nums, pivot + 1, right) + right = pivot - 1 # 剩余未排序区间为 [left, pivot - 1] + end + end + end ``` === "Zig" diff --git a/docs/chapter_sorting/radix_sort.md b/docs/chapter_sorting/radix_sort.md index 18b592896..eb5067b5e 100644 --- a/docs/chapter_sorting/radix_sort.md +++ b/docs/chapter_sorting/radix_sort.md @@ -677,11 +677,51 @@ $$ === "Ruby" ```ruby title="radix_sort.rb" - [class]{}-[func]{digit} + ### 获取元素 num 的第 k 位,其中 exp = 10^(k-1) ### + def digit(num, exp) + # 转入 exp 而非 k 可以避免在此重复执行昂贵的次方计算 + (num / exp) % 10 + end - [class]{}-[func]{counting_sort_digit} + ### 计数排序(根据 nums 第 k 位排序)### + def counting_sort_digit(nums, exp) + # 十进制的位范围为 0~9 ,因此需要长度为 10 的桶数组 + counter = Array.new(10, 0) + n = nums.length + # 统计 0~9 各数字的出现次数 + for i in 0...n + d = digit(nums[i], exp) # 获取 nums[i] 第 k 位,记为 d + counter[d] += 1 # 统计数字 d 的出现次数 + end + # 求前缀和,将“出现个数”转换为“数组索引” + (1...10).each { |i| counter[i] += counter[i - 1] } + # 倒序遍历,根据桶内统计结果,将各元素填入 res + res = Array.new(n, 0) + for i in (n - 1).downto(0) + d = digit(nums[i], exp) + j = counter[d] - 1 # 获取 d 在数组中的索引 j + res[j] = nums[i] # 将当前元素填入索引 j + counter[d] -= 1 # 将 d 的数量减 1 + end + # 使用结果覆盖原数组 nums + (0...n).each { |i| nums[i] = res[i] } + end - [class]{}-[func]{radix_sort} + ### 基数排序 ### + def radix_sort(nums) + # 获取数组的最大元素,用于判断最大位数 + m = nums.max + # 按照从低位到高位的顺序遍历 + exp = 1 + while exp <= m + # 对数组元素的第 k 位执行计数排序 + # k = 1 -> exp = 1 + # k = 2 -> exp = 10 + # 即 exp = 10^(k-1) + counting_sort_digit(nums, exp) + exp *= 10 + end + end ``` === "Zig" diff --git a/docs/chapter_sorting/selection_sort.md b/docs/chapter_sorting/selection_sort.md index 983e01343..680bc5809 100644 --- a/docs/chapter_sorting/selection_sort.md +++ b/docs/chapter_sorting/selection_sort.md @@ -306,7 +306,22 @@ comments: true === "Ruby" ```ruby title="selection_sort.rb" - [class]{}-[func]{selection_sort} + ### 选择排序 ### + def selection_sort(nums) + n = nums.length + # 外循环:未排序区间为 [i, n-1] + for i in 0...(n - 1) + # 内循环:找到未排序区间内的最小元素 + k = i + for j in (i + 1)...n + if nums[j] < nums[k] + k = j # 记录最小元素的索引 + end + end + # 将该最小元素与未排序区间的首个元素交换 + nums[i], nums[k] = nums[k], nums[i] + end + end ``` === "Zig" diff --git a/en/docs/chapter_backtracking/n_queens_problem.md b/en/docs/chapter_backtracking/n_queens_problem.md index 85bcc04f6..2659c1bb2 100644 --- a/en/docs/chapter_backtracking/n_queens_problem.md +++ b/en/docs/chapter_backtracking/n_queens_problem.md @@ -269,6 +269,7 @@ Please note, in an $n$-dimensional matrix, the range of $row - col$ is $[-n + 1, } *res = append(*res, newState) + return } // 遍历所有列 for col := 0; col < n; col++ { diff --git a/en/docs/chapter_computational_complexity/space_complexity.md b/en/docs/chapter_computational_complexity/space_complexity.md index 70e0710f2..5b1ae61d6 100644 --- a/en/docs/chapter_computational_complexity/space_complexity.md +++ b/en/docs/chapter_computational_complexity/space_complexity.md @@ -735,8 +735,8 @@ Let the size of the input data be $n$, the following chart displays common types $$ \begin{aligned} -O(1) < O(\log n) < O(n) < O(n^2) < O(2^n) \newline -\text{Constant Order} < \text{Logarithmic Order} < \text{Linear Order} < \text{Quadratic Order} < \text{Exponential Order} +& O(1) < O(\log n) < O(n) < O(n^2) < O(2^n) \newline +& \text{Constant} < \text{Logarithmic} < \text{Linear} < \text{Quadratic} < \text{Exponential} \end{aligned} $$ diff --git a/en/docs/chapter_computational_complexity/time_complexity.md b/en/docs/chapter_computational_complexity/time_complexity.md index 5155bbba5..614096f3e 100644 --- a/en/docs/chapter_computational_complexity/time_complexity.md +++ b/en/docs/chapter_computational_complexity/time_complexity.md @@ -967,8 +967,8 @@ Let's consider the input data size as $n$. The common types of time complexities $$ \begin{aligned} -O(1) < O(\log n) < O(n) < O(n \log n) < O(n^2) < O(2^n) < O(n!) \newline -\text{Constant Order} < \text{Logarithmic Order} < \text{Linear Order} < \text{Linear-Logarithmic Order} < \text{Quadratic Order} < \text{Exponential Order} < \text{Factorial Order} +& O(1) < O(\log n) < O(n) < O(n \log n) < O(n^2) < O(2^n) < O(n!) \newline +& \text{Constant} < \text{Log} < \text{Linear} < \text{Linear-Log} < \text{Quadratic} < \text{Exp} < \text{Factorial} \end{aligned} $$ diff --git a/en/docs/chapter_graph/graph_traversal.md b/en/docs/chapter_graph/graph_traversal.md index 8b08853a5..d352446aa 100644 --- a/en/docs/chapter_graph/graph_traversal.md +++ b/en/docs/chapter_graph/graph_traversal.md @@ -24,7 +24,7 @@ BFS is usually implemented with the help of a queue, as shown in the code below. 2. In each iteration of the loop, pop the vertex at the front of the queue and record it as visited, then add all adjacent vertices of that vertex to the back of the queue. 3. Repeat step `2.` until all vertices have been visited. -To prevent revisiting vertices, we use a hash table `visited` to record which nodes have been visited. +To prevent revisiting vertices, we use a hash set `visited` to record which nodes have been visited. === "Python" @@ -528,7 +528,7 @@ The code is relatively abstract, it is suggested to compare with Figure 9-10 to **Time complexity**: All vertices will be enqueued and dequeued once, using $O(|V|)$ time; in the process of traversing adjacent vertices, since it is an undirected graph, all edges will be visited $2$ times, using $O(2|E|)$ time; overall using $O(|V| + |E|)$ time. -**Space complexity**: The maximum number of vertices in list `res`, hash table `visited`, and queue `que` is $|V|$, using $O(|V|)$ space. +**Space complexity**: The maximum number of vertices in list `res`, hash set `visited`, and queue `que` is $|V|$, using $O(|V|)$ space. ## 9.3.2   Depth-first search @@ -540,7 +540,7 @@ The code is relatively abstract, it is suggested to compare with Figure 9-10 to ### 1.   Algorithm implementation -This "go as far as possible and then return" algorithm paradigm is usually implemented based on recursion. Similar to breadth-first search, in depth-first search, we also need the help of a hash table `visited` to record the visited vertices to avoid revisiting. +This "go as far as possible and then return" algorithm paradigm is usually implemented based on recursion. Similar to breadth-first search, in depth-first search, we also need the help of a hash set `visited` to record the visited vertices to avoid revisiting. === "Python" @@ -1003,4 +1003,4 @@ To deepen the understanding, it is suggested to combine Figure 9-12 with the cod **Time complexity**: All vertices will be visited once, using $O(|V|)$ time; all edges will be visited twice, using $O(2|E|)$ time; overall using $O(|V| + |E|)$ time. -**Space complexity**: The maximum number of vertices in list `res`, hash table `visited` is $|V|$, and the maximum recursion depth is $|V|$, therefore using $O(|V|)$ space. +**Space complexity**: The maximum number of vertices in list `res`, hash set `visited` is $|V|$, and the maximum recursion depth is $|V|$, therefore using $O(|V|)$ space. diff --git a/en/docs/chapter_hashing/hash_collision.md b/en/docs/chapter_hashing/hash_collision.md index a997e4cc0..fa5ad8ef5 100644 --- a/en/docs/chapter_hashing/hash_collision.md +++ b/en/docs/chapter_hashing/hash_collision.md @@ -4,18 +4,18 @@ comments: true # 6.2   Hash collision -As mentioned in the previous section, **usually the input space of a hash function is much larger than its output space**, making hash collisions theoretically inevitable. For example, if the input space consists of all integers and the output space is the size of the array capacity, multiple integers will inevitably map to the same bucket index. +The previous section mentioned that, **in most cases, the input space of a hash function is much larger than the output space**, so theoretically, hash collisions are inevitable. For example, if the input space is all integers and the output space is the size of the array capacity, then multiple integers will inevitably be mapped to the same bucket index. -Hash collisions can lead to incorrect query results, severely affecting the usability of hash tables. To solve this problem, we expand the hash table whenever a hash collision occurs, until the collision is resolved. This method is simple and effective but inefficient due to the extensive data transfer and hash value computation involved in resizing the hash table. To improve efficiency, we can adopt the following strategies: +Hash collisions can lead to incorrect query results, severely impacting the usability of the hash table. To address this issue, whenever a hash collision occurs, we perform hash table resizing until the collision disappears. This approach is pretty simple, straightforward, and working well. However, it appears to be pretty inefficient as the table expansion involves a lot of data migration as well as recalculation of hash code, which are expansive. To improve efficiency, we can adopt the following strategies: -1. Improve the data structure of the hash table, **allowing it to function normally in the event of a hash collision**. -2. Only perform resizing when necessary, i.e., when hash collisions are severe. +1. Improve the hash table data structure in a way that **locating target element is still functioning well in the event of a hash collision**. +2. Expansion is the last resort before it becomes necessary, when severe collisions are observed. There are mainly two methods for improving the structure of hash tables: "Separate Chaining" and "Open Addressing". ## 6.2.1   Separate chaining -In the original hash table, each bucket can store only one key-value pair. Separate chaining transforms individual elements into a linked list, with key-value pairs as list nodes, storing all colliding key-value pairs in the same list. Figure 6-5 shows an example of a hash table with separate chaining. +In the original hash table, each bucket can store only one key-value pair. Separate chaining converts a single element into a linked list, treating key-value pairs as list nodes, storing all colliding key-value pairs in the same linked list. Figure 6-5 shows an example of a hash table with separate chaining. ![Separate chaining hash table](hash_collision.assets/hash_table_chaining.png){ class="animation-figure" } @@ -23,19 +23,19 @@ In the original hash table, each bucket can store only one key-value pair. Se The operations of a hash table implemented with separate chaining have changed as follows: -- **Querying elements**: Input `key`, pass through the hash function to obtain the bucket index, access the head node of the list, then traverse the list and compare `key` to find the target key-value pair. -- **Adding elements**: First access the list head node via the hash function, then add the node (key-value pair) to the list. -- **Deleting elements**: Access the list head based on the hash function's result, then traverse the list to find and remove the target node. +- **Querying Elements**: Input `key`, obtain the bucket index through the hash function, then access the head node of the linked list. Traverse the linked list and compare key to find the target key-value pair. +- **Adding Elements**: Access the head node of the linked list via the hash function, then append the node (key-value pair) to the list. +- **Deleting Elements**: Access the head of the linked list based on the result of the hash function, then traverse the linked list to find the target node and delete it. Separate chaining has the following limitations: -- **Increased space usage**: The linked list contains node pointers, which consume more memory space than arrays. -- **Reduced query efficiency**: Due to the need for linear traversal of the list to find the corresponding element. +- **Increased Space Usage**: The linked list contains node pointers, which consume more memory space than arrays. +- **Reduced Query Efficiency**: This is because linear traversal of the linked list is required to find the corresponding element. The code below provides a simple implementation of a separate chaining hash table, with two things to note: - Lists (dynamic arrays) are used instead of linked lists for simplicity. In this setup, the hash table (array) contains multiple buckets, each of which is a list. -- This implementation includes a method for resizing the hash table. When the load factor exceeds $\frac{2}{3}$, we resize the hash table to twice its original size. +- This implementation includes a hash table resizing method. When the load factor exceeds $\frac{2}{3}$, we expand the hash table to twice its original size. === "Python" @@ -1532,11 +1532,11 @@ The code below provides a simple implementation of a separate chaining hash tabl
Full Screen >
-It's worth noting that when the list is very long, the query efficiency $O(n)$ is poor. **At this point, the list can be converted to an "AVL tree" or "Red-Black tree"** to optimize the time complexity of the query operation to $O(\log n)$. +It's worth noting that when the linked list is very long, the query efficiency $O(n)$ is poor. **In this case, the list can be converted to an "AVL tree" or "Red-Black tree"** to optimize the time complexity of the query operation to $O(\log n)$. ## 6.2.2   Open addressing -Open addressing does not introduce additional data structures but uses "multiple probes" to handle hash collisions. The probing methods mainly include linear probing, quadratic probing, and double hashing. +Open addressing does not introduce additional data structures but instead handles hash collisions through "multiple probing". The probing methods mainly include linear probing, quadratic probing, and double hashing. Let's use linear probing as an example to introduce the mechanism of open addressing hash tables. @@ -1544,16 +1544,16 @@ Let's use linear probing as an example to introduce the mechanism of open addres Linear probing uses a fixed-step linear search for probing, differing from ordinary hash tables. -- **Inserting elements**: Calculate the bucket index using the hash function. If the bucket already contains an element, linearly traverse forward from the conflict position (usually with a step size of $1$) until an empty bucket is found, then insert the element. -- **Searching for elements**: If a hash collision is found, use the same step size to linearly traverse forward until the corresponding element is found and return `value`; if an empty bucket is encountered, it means the target element is not in the hash table, so return `None`. +- **Inserting Elements**: Calculate the bucket index using the hash function. If the bucket already contains an element, linearly traverse forward from the conflict position (usually with a step size of $1$) until an empty bucket is found, then insert the element. +- **Searching for Elements**: If a hash collision is encountered, use the same step size to linearly traverse forward until the corresponding element is found and return `value`; if an empty bucket is encountered, it means the target element is not in the hash table, so return `None`. -Figure 6-6 shows the distribution of key-value pairs in an open addressing (linear probing) hash table. According to this hash function, keys with the same last two digits will be mapped to the same bucket. Through linear probing, they are stored consecutively in that bucket and the buckets below it. +Figure 6-6 shows the distribution of key-value pairs in an open addressing (linear probing) hash table. According to this hash function, keys with the same last two digits will be mapped to the same bucket. Through linear probing, they are stored sequentially in that bucket and the buckets below it. ![Distribution of key-value pairs in open addressing (linear probing) hash table](hash_collision.assets/hash_table_linear_probing.png){ class="animation-figure" }

Figure 6-6   Distribution of key-value pairs in open addressing (linear probing) hash table

-However, **linear probing tends to create "clustering"**. Specifically, the longer a continuous position in the array is occupied, the more likely these positions are to encounter hash collisions, further promoting the growth of these clusters and eventually leading to deterioration in the efficiency of operations. +However, **linear probing is prone to create "clustering"**. Specifically, the longer the continuously occupied positions in the array, the greater the probability of hash collisions occurring in these continuous positions, further promoting the growth of clustering at that position, forming a vicious cycle, and ultimately leading to degraded efficiency of insertion, deletion, query, and update operations. It's important to note that **we cannot directly delete elements in an open addressing hash table**. Deleting an element creates an empty bucket `None` in the array. When searching for elements, if linear probing encounters this empty bucket, it will return, making the elements below this bucket inaccessible. The program may incorrectly assume these elements do not exist, as shown in Figure 6-7. @@ -1561,13 +1561,13 @@ It's important to note that **we cannot directly delete elements in an open addr

Figure 6-7   Query issues caused by deletion in open addressing

-To solve this problem, we can use a lazy deletion mechanism: instead of directly removing elements from the hash table, **use a constant `TOMBSTONE` to mark the bucket**. In this mechanism, both `None` and `TOMBSTONE` represent empty buckets and can hold key-value pairs. However, when linear probing encounters `TOMBSTONE`, it should continue traversing since there may still be key-value pairs below it. +To solve this problem, we can adopt the lazy deletion mechanism: instead of directly removing elements from the hash table, **use a constant `TOMBSTONE` to mark the bucket**. In this mechanism, both `None` and `TOMBSTONE` represent empty buckets and can hold key-value pairs. However, when linear probing encounters `TOMBSTONE`, it should continue traversing since there may still be key-value pairs below it. -However, **lazy deletion may accelerate the degradation of hash table performance**. Every deletion operation produces a delete mark, and as `TOMBSTONE` increases, so does the search time, as linear probing may have to skip multiple `TOMBSTONE` to find the target element. +However, **lazy deletion may accelerate the performance degradation of the hash table**. Every deletion operation produces a delete mark, and as `TOMBSTONE` increases, the search time will also increase because linear probing may need to skip multiple `TOMBSTONE` to find the target element. -Therefore, consider recording the index of the first `TOMBSTONE` encountered during linear probing and swapping the target element found with this `TOMBSTONE`. The advantage of this is that each time a query or addition is performed, the element is moved to a bucket closer to the ideal position (starting point of probing), thereby optimizing the query efficiency. +To address this, consider recording the index of the first encountered `TOMBSTONE` during linear probing and swapping the positions of the searched target element with that `TOMBSTONE`. The benefit of doing this is that each time an element is queried or added, the element will be moved to a bucket closer to its ideal position (the starting point of probing), thereby optimizing query efficiency. -The code below implements an open addressing (linear probing) hash table with lazy deletion. To make fuller use of the hash table space, we treat the hash table as a "circular array," continuing to traverse from the beginning when the end of the array is passed. +The code below implements an open addressing (linear probing) hash table with lazy deletion. To make better use of the hash table space, we treat the hash table as a "circular array,". When going beyond the end of the array, we return to the beginning and continue traversing. === "Python" @@ -3300,35 +3300,35 @@ The code below implements an open addressing (linear probing) hash table with la ### 2.   Quadratic probing -Quadratic probing is similar to linear probing and is one of the common strategies of open addressing. When a collision occurs, quadratic probing does not simply skip a fixed number of steps but skips "the square of the number of probes," i.e., $1, 4, 9, \dots$ steps. +Quadratic probing is similar to linear probing and is one of the common strategies of open addressing. When a collision occurs, quadratic probing does not simply skip a fixed number of steps but skips a number of steps equal to the "square of the number of probes", i.e., $1, 4, 9, \dots$ steps. Quadratic probing has the following advantages: - Quadratic probing attempts to alleviate the clustering effect of linear probing by skipping the distance of the square of the number of probes. -- Quadratic probing skips larger distances to find empty positions, helping to distribute data more evenly. +- Quadratic probing skips larger distances to find empty positions, which helps to distribute data more evenly. However, quadratic probing is not perfect: - Clustering still exists, i.e., some positions are more likely to be occupied than others. -- Due to the growth of squares, quadratic probing may not probe the entire hash table, meaning it might not access empty buckets even if they exist in the hash table. +- Due to the growth of squares, quadratic probing may not probe the entire hash table, meaning that even if there are empty buckets in the hash table, quadratic probing may not be able to access them. ### 3.   Double hashing As the name suggests, the double hashing method uses multiple hash functions $f_1(x)$, $f_2(x)$, $f_3(x)$, $\dots$ for probing. -- **Inserting elements**: If hash function $f_1(x)$ encounters a conflict, try $f_2(x)$, and so on, until an empty position is found and the element is inserted. -- **Searching for elements**: Search in the same order of hash functions until the target element is found and returned; if an empty position is encountered or all hash functions have been tried, it indicates the element is not in the hash table, then return `None`. +- **Inserting Elements**: If hash function $f_1(x)$ encounters a conflict, it tries $f_2(x)$, and so on, until an empty position is found and the element is inserted. +- **Searching for Elements**: Search in the same order of hash functions until the target element is found and returned; if an empty position is encountered or all hash functions have been tried, it indicates the element is not in the hash table, then return `None`. -Compared to linear probing, double hashing is less prone to clustering but involves additional computation for multiple hash functions. +Compared to linear probing, the double hashing method is less prone to clustering, but multiple hash functions introduce additional computational overhead. !!! tip - Please note that open addressing (linear probing, quadratic probing, and double hashing) hash tables all have the issue of "not being able to directly delete elements." + Please note that open addressing (linear probing, quadratic probing, and double hashing) hash tables all have the problem of "can not directly delete elements." ## 6.2.3   Choice of programming languages -Various programming languages have adopted different hash table implementation strategies, here are a few examples: +Different programming languages adopt different hash table implementation strategies. Here are a few examples: - Python uses open addressing. The `dict` dictionary uses pseudo-random numbers for probing. - Java uses separate chaining. Since JDK 1.8, when the array length in `HashMap` reaches 64 and the length of a linked list reaches 8, the linked list is converted to a red-black tree to improve search performance. -- Go uses separate chaining. Go stipulates that each bucket can store up to 8 key-value pairs, and if the capacity is exceeded, an overflow bucket is connected; when there are too many overflow buckets, a special equal-size expansion operation is performed to ensure performance. +- Go uses separate chaining. Go stipulates that each bucket can store up to 8 key-value pairs, and if the capacity is exceeded, an overflow bucket is linked; when there are too many overflow buckets, a special equal-capacity resizing operation is performed to ensure performance. diff --git a/en/docs/chapter_sorting/bubble_sort.md b/en/docs/chapter_sorting/bubble_sort.md index ec12bd7c3..3854212e0 100644 --- a/en/docs/chapter_sorting/bubble_sort.md +++ b/en/docs/chapter_sorting/bubble_sort.md @@ -276,7 +276,20 @@ Example code is as follows: === "Ruby" ```ruby title="bubble_sort.rb" - [class]{}-[func]{bubble_sort} + ### 冒泡排序 ### + def bubble_sort(nums) + n = nums.length + # 外循环:未排序区间为 [0, i] + for i in (n - 1).downto(1) + # 内循环:将未排序区间 [0, i] 中的最大元素交换至该区间的最右端 + for j in 0...i + if nums[j] > nums[j + 1] + # 交换 nums[j] 与 nums[j + 1] + nums[j], nums[j + 1] = nums[j + 1], nums[j] + end + end + end + end ``` === "Zig" @@ -587,7 +600,25 @@ Even after optimization, the worst-case time complexity and average time complex === "Ruby" ```ruby title="bubble_sort.rb" - [class]{}-[func]{bubble_sort_with_flag} + ### 冒泡排序(标志优化)### + def bubble_sort_with_flag(nums) + n = nums.length + # 外循环:未排序区间为 [0, i] + for i in (n - 1).downto(1) + flag = false # 初始化标志位 + + # 内循环:将未排序区间 [0, i] 中的最大元素交换至该区间的最右端 + for j in 0...i + if nums[j] > nums[j + 1] + # 交换 nums[j] 与 nums[j + 1] + nums[j], nums[j + 1] = nums[j + 1], nums[j] + flag = true # 记录交换元素 + end + end + + break unless flag # 此轮“冒泡”未交换任何元素,直接跳出 + end + end ``` === "Zig" diff --git a/en/docs/chapter_sorting/counting_sort.md b/en/docs/chapter_sorting/counting_sort.md index 1a2c371e4..6afd7e756 100644 --- a/en/docs/chapter_sorting/counting_sort.md +++ b/en/docs/chapter_sorting/counting_sort.md @@ -348,7 +348,25 @@ The code is shown below: === "Ruby" ```ruby title="counting_sort.rb" - [class]{}-[func]{counting_sort_naive} + ### 计数排序 ### + def counting_sort_naive(nums) + # 简单实现,无法用于排序对象 + # 1. 统计数组最大元素 m + m = 0 + nums.each { |num| m = [m, num].max } + # 2. 统计各数字的出现次数 + # counter[num] 代表 num 的出现次数 + counter = Array.new(m + 1, 0) + nums.each { |num| counter[num] += 1 } + # 3. 遍历 counter ,将各元素填入原数组 nums + i = 0 + for num in 0...(m + 1) + (0...counter[num]).each do + nums[i] = num + i += 1 + end + end + end ``` === "Zig" @@ -854,7 +872,30 @@ The implementation code of counting sort is shown below: === "Ruby" ```ruby title="counting_sort.rb" - [class]{}-[func]{counting_sort} + ### 计数排序 ### + def counting_sort(nums) + # 完整实现,可排序对象,并且是稳定排序 + # 1. 统计数组最大元素 m + m = nums.max + # 2. 统计各数字的出现次数 + # counter[num] 代表 num 的出现次数 + counter = Array.new(m + 1, 0) + nums.each { |num| counter[num] += 1 } + # 3. 求 counter 的前缀和,将“出现次数”转换为“尾索引” + # 即 counter[num]-1 是 num 在 res 中最后一次出现的索引 + (0...m).each { |i| counter[i + 1] += counter[i] } + # 4. 倒序遍历 nums, 将各元素填入结果数组 res + # 初始化数组 res 用于记录结果 + n = nums.length + res = Array.new(n, 0) + (n - 1).downto(0).each do |i| + num = nums[i] + res[counter[num] - 1] = num # 将 num 放置到对应索引处 + counter[num] -= 1 # 令前缀和自减 1 ,得到下次放置 num 的索引 + end + # 使用结果数组 res 覆盖原数组 nums + (0...n).each { |i| nums[i] = res[i] } + end ``` === "Zig" diff --git a/en/docs/chapter_sorting/quick_sort.md b/en/docs/chapter_sorting/quick_sort.md index 4171fbc5d..becc87232 100644 --- a/en/docs/chapter_sorting/quick_sort.md +++ b/en/docs/chapter_sorting/quick_sort.md @@ -353,7 +353,24 @@ After the pivot partitioning, the original array is divided into three parts: le === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSort}-[func]{partition} + ### 哨兵划分 ### + def partition(nums, left, right) + # 以 nums[left] 为基准数 + i, j = left, right + while i < j + while i < j && nums[j] >= nums[left] + j -= 1 # 从右向左找首个小于基准数的元素 + end + while i < j && nums[i] <= nums[left] + i += 1 # 从左向右找首个大于基准数的元素 + end + # 元素交换 + nums[i], nums[j] = nums[j], nums[i] + end + # 将基准数交换至两子数组的分界线 + nums[i], nums[left] = nums[left], nums[i] + i # 返回基准数的索引 + end ``` === "Zig" @@ -594,7 +611,18 @@ The overall process of quick sort is shown in Figure 11-9. === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSort}-[func]{quick_sort} + ### 快速排序类 ### + def quick_sort(nums, left, right) + # 子数组长度不为 1 时递归 + if left < right + # 哨兵划分 + pivot = partition(nums, left, right) + # 递归左子数组、右子数组 + quick_sort(nums, left, pivot - 1) + quick_sort(nums, pivot + 1, right) + end + nums + end ``` === "Zig" @@ -1067,9 +1095,38 @@ Sample code is as follows: === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSortMedian}-[func]{median_three} + ### 选取三个候选元素的中位数 ### + def median_three(nums, left, mid, right) + # 选取三个候选元素的中位数 + _l, _m, _r = nums[left], nums[mid], nums[right] + # m 在 l 和 r 之间 + return mid if (_l <= _m && _m <= _r) || (_r <= _m && _m <= _l) + # l 在 m 和 r 之间 + return left if (_m <= _l && _l <= _r) || (_r <= _l && _l <= _m) + return right + end - [class]{QuickSortMedian}-[func]{partition} + ### 哨兵划分(三数取中值)### + def partition(nums, left, right) + ### 以 nums[left] 为基准数 + med = median_three(nums, left, (left + right) / 2, right) + # 将中位数交换至数组最左断 + nums[left], nums[med] = nums[med], nums[left] + i, j = left, right + while i < j + while i < j && nums[j] >= nums[left] + j -= 1 # 从右向左找首个小于基准数的元素 + end + while i < j && nums[i] <= nums[left] + i += 1 # 从左向右找首个大于基准数的元素 + end + # 元素交换 + nums[i], nums[j] = nums[j], nums[i] + end + # 将基准数交换至两子数组的分界线 + nums[i], nums[left] = nums[left], nums[i] + i # 返回基准数的索引 + end ``` === "Zig" @@ -1377,7 +1434,22 @@ To prevent the accumulation of stack frame space, we can compare the lengths of === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSortTailCall}-[func]{quick_sort} + ### 快速排序(尾递归优化)### + def quick_sort(nums, left, right) + # 子数组长度不为 1 时递归 + while left < right + # 哨兵划分 + pivot = partition(nums, left, right) + # 对两个子数组中较短的那个执行快速排序 + if pivot - left < right - pivot + quick_sort(nums, left, pivot - 1) + left = pivot + 1 # 剩余未排序区间为 [pivot + 1, right] + else + quick_sort(nums, pivot + 1, right) + right = pivot - 1 # 剩余未排序区间为 [left, pivot - 1] + end + end + end ``` === "Zig" diff --git a/en/docs/chapter_sorting/radix_sort.md b/en/docs/chapter_sorting/radix_sort.md index ab6702ca8..35b9dfd6e 100644 --- a/en/docs/chapter_sorting/radix_sort.md +++ b/en/docs/chapter_sorting/radix_sort.md @@ -677,11 +677,51 @@ Additionally, we need to slightly modify the counting sort code to allow sorting === "Ruby" ```ruby title="radix_sort.rb" - [class]{}-[func]{digit} + ### 获取元素 num 的第 k 位,其中 exp = 10^(k-1) ### + def digit(num, exp) + # 转入 exp 而非 k 可以避免在此重复执行昂贵的次方计算 + (num / exp) % 10 + end - [class]{}-[func]{counting_sort_digit} + ### 计数排序(根据 nums 第 k 位排序)### + def counting_sort_digit(nums, exp) + # 十进制的位范围为 0~9 ,因此需要长度为 10 的桶数组 + counter = Array.new(10, 0) + n = nums.length + # 统计 0~9 各数字的出现次数 + for i in 0...n + d = digit(nums[i], exp) # 获取 nums[i] 第 k 位,记为 d + counter[d] += 1 # 统计数字 d 的出现次数 + end + # 求前缀和,将“出现个数”转换为“数组索引” + (1...10).each { |i| counter[i] += counter[i - 1] } + # 倒序遍历,根据桶内统计结果,将各元素填入 res + res = Array.new(n, 0) + for i in (n - 1).downto(0) + d = digit(nums[i], exp) + j = counter[d] - 1 # 获取 d 在数组中的索引 j + res[j] = nums[i] # 将当前元素填入索引 j + counter[d] -= 1 # 将 d 的数量减 1 + end + # 使用结果覆盖原数组 nums + (0...n).each { |i| nums[i] = res[i] } + end - [class]{}-[func]{radix_sort} + ### 基数排序 ### + def radix_sort(nums) + # 获取数组的最大元素,用于判断最大位数 + m = nums.max + # 按照从低位到高位的顺序遍历 + exp = 1 + while exp <= m + # 对数组元素的第 k 位执行计数排序 + # k = 1 -> exp = 1 + # k = 2 -> exp = 10 + # 即 exp = 10^(k-1) + counting_sort_digit(nums, exp) + exp *= 10 + end + end ``` === "Zig" diff --git a/en/docs/chapter_sorting/selection_sort.md b/en/docs/chapter_sorting/selection_sort.md index 22e17363e..3af22ab6b 100644 --- a/en/docs/chapter_sorting/selection_sort.md +++ b/en/docs/chapter_sorting/selection_sort.md @@ -306,7 +306,22 @@ In the code, we use $k$ to record the smallest element within the unsorted inter === "Ruby" ```ruby title="selection_sort.rb" - [class]{}-[func]{selection_sort} + ### 选择排序 ### + def selection_sort(nums) + n = nums.length + # 外循环:未排序区间为 [i, n-1] + for i in 0...(n - 1) + # 内循环:找到未排序区间内的最小元素 + k = i + for j in (i + 1)...n + if nums[j] < nums[k] + k = j # 记录最小元素的索引 + end + end + # 将该最小元素与未排序区间的首个元素交换 + nums[i], nums[k] = nums[k], nums[i] + end + end ``` === "Zig" diff --git a/en/docs/chapter_tree/binary_tree.md b/en/docs/chapter_tree/binary_tree.md index 432be5c8a..4e6083349 100644 --- a/en/docs/chapter_tree/binary_tree.md +++ b/en/docs/chapter_tree/binary_tree.md @@ -4,7 +4,7 @@ comments: true # 7.1   Binary tree -A binary tree is a non-linear data structure that represents the ancestral and descendent relationships, embodying the "divide and conquer" logic. Similar to a linked list, the basic unit of a binary tree is a node, each containing a value, a reference to the left child node, and a reference to the right child node. +A binary tree is a non-linear data structure that represents the hierarchical relationship between ancestors and descendants, embodying the divide-and-conquer logic of "splitting into two". Similar to a linked list, the basic unit of a binary tree is a node, each containing a value, a reference to the left child node, and a reference to the right child node. === "Python" @@ -218,7 +218,7 @@ The commonly used terminology of binary trees is shown in Figure 7-2. - Leaf node: A node with no children, both of its pointers point to `None`. - Edge: The line segment connecting two nodes, i.e., node reference (pointer). - The level of a node: Incrementing from top to bottom, with the root node's level being 1. -- The degree of a node: The number of a node's children. In a binary tree, the degree can be 0, 1, or 2. +- The degree of a node: The number of children a node has. In a binary tree, the degree can be 0, 1, or 2. - The height of a binary tree: The number of edges passed from the root node to the farthest leaf node. - The depth of a node: The number of edges passed from the root node to the node. - The height of a node: The number of edges from the farthest leaf node to the node. @@ -229,13 +229,13 @@ The commonly used terminology of binary trees is shown in Figure 7-2. !!! tip - Please note that we usually define "height" and "depth" as "the number of edges passed," but some problems or textbooks may define them as "the number of nodes passed." In this case, both height and depth need to be incremented by 1. + Please note that we typically define "height" and "depth" as "the number of edges traversed", but some problems or textbooks may define them as "the number of nodes traversed". In such cases, both height and depth need to be incremented by 1. ## 7.1.2   Basic operations of binary trees ### 1.   Initializing a binary tree -Similar to a linked list, initialize nodes first, then construct references (pointers). +Similar to a linked list, begin by initialize nodes, then construct references (pointers). === "Python" @@ -619,13 +619,13 @@ Similar to a linked list, inserting and removing nodes in a binary tree can be a !!! tip - It's important to note that inserting nodes may change the original logical structure of the binary tree, while removing nodes usually means removing the node and all its subtrees. Therefore, in a binary tree, insertion and removal are usually performed through a set of operations to achieve meaningful actions. + It's important to note that inserting nodes may change the original logical structure of the binary tree, while removing nodes typically involves removing the node and all its subtrees. Therefore, in a binary tree, insertion and removal are usually performed through a coordinated set of operations to achieve meaningful outcomes. ## 7.1.3   Common types of binary trees ### 1.   Perfect binary tree -As shown in Figure 7-4, in a perfect binary tree, all levels of nodes are fully filled. In a perfect binary tree, the degree of leaf nodes is $0$, and the degree of all other nodes is $2$; if the tree's height is $h$, then the total number of nodes is $2^{h+1} - 1$, showing a standard exponential relationship, reflecting the common phenomenon of cell division in nature. +As shown in Figure 7-4, in a perfect binary tree, all levels of nodes are fully filled. In a perfect binary tree, the degree of leaf nodes is $0$, while the degree of all other nodes is $2$; if the tree's height is $h$, then the total number of nodes is $2^{h+1} - 1$, showing a standard exponential relationship, reflecting the common phenomenon of cell division in nature. !!! tip @@ -661,7 +661,7 @@ As shown in Figure 7-7, in a balanced binary tree, the absolute differenc ## 7.1.4   Degeneration of binary trees -Figure 7-8 shows the ideal and degenerate structures of binary trees. When every level of a binary tree is filled, it reaches the "perfect binary tree"; when all nodes are biased towards one side, the binary tree degenerates into a "linked list". +Figure 7-8 shows the ideal and degenerate structures of binary trees. A binary tree becomes a "perfect binary tree" when every level is filled; while it degenerates into a "linked list" when all nodes are biased toward one side. - The perfect binary tree is the ideal situation, fully leveraging the "divide and conquer" advantage of binary trees. - A linked list is another extreme, where operations become linear, degrading the time complexity to $O(n)$. diff --git a/zh-Hant/docs/chapter_backtracking/n_queens_problem.md b/zh-Hant/docs/chapter_backtracking/n_queens_problem.md index 52112597e..566f4e3ad 100644 --- a/zh-Hant/docs/chapter_backtracking/n_queens_problem.md +++ b/zh-Hant/docs/chapter_backtracking/n_queens_problem.md @@ -26,7 +26,7 @@ comments: true 也就是說,我們可以採取逐行放置策略:從第一行開始,在每行放置一個皇后,直至最後一行結束。 -圖 13-17 所示為 $4$ 皇后問題的逐行放置過程。受畫幅限制,圖 13-17 僅展開了第一行的其中一個搜尋分支,並且將不滿足列約束和對角線約束的方案都進行了剪枝。 +圖 13-17 所示為 4 皇后問題的逐行放置過程。受畫幅限制,圖 13-17 僅展開了第一行的其中一個搜尋分支,並且將不滿足列約束和對角線約束的方案都進行了剪枝。 ![逐行放置策略](n_queens_problem.assets/n_queens_placing.png){ class="animation-figure" } @@ -269,6 +269,7 @@ comments: true } *res = append(*res, newState) + return } // 走訪所有列 for col := 0; col < n; col++ { diff --git a/zh-Hant/docs/chapter_backtracking/permutations_problem.md b/zh-Hant/docs/chapter_backtracking/permutations_problem.md index 612f110f7..ddb3a6459 100644 --- a/zh-Hant/docs/chapter_backtracking/permutations_problem.md +++ b/zh-Hant/docs/chapter_backtracking/permutations_problem.md @@ -724,7 +724,7 @@ comments: true (*selected)[i] = true *state = append(*state, choice) // 進行下一輪選擇 - backtrackI(state, choices, selected, res) + backtrackII(state, choices, selected, res) // 回退:撤銷選擇,恢復到之前的狀態 (*selected)[i] = false *state = (*state)[:len(*state)-1] diff --git a/zh-Hant/docs/chapter_sorting/bubble_sort.md b/zh-Hant/docs/chapter_sorting/bubble_sort.md index 64f9ff562..0e42119ff 100755 --- a/zh-Hant/docs/chapter_sorting/bubble_sort.md +++ b/zh-Hant/docs/chapter_sorting/bubble_sort.md @@ -276,7 +276,20 @@ comments: true === "Ruby" ```ruby title="bubble_sort.rb" - [class]{}-[func]{bubble_sort} + ### 泡沫排序 ### + def bubble_sort(nums) + n = nums.length + # 外迴圈:未排序區間為 [0, i] + for i in (n - 1).downto(1) + # 內迴圈:將未排序區間 [0, i] 中的最大元素交換至該區間的最右端 + for j in 0...i + if nums[j] > nums[j + 1] + # 交換 nums[j] 與 nums[j + 1] + nums[j], nums[j + 1] = nums[j + 1], nums[j] + end + end + end + end ``` === "Zig" @@ -587,7 +600,25 @@ comments: true === "Ruby" ```ruby title="bubble_sort.rb" - [class]{}-[func]{bubble_sort_with_flag} + ### 泡沫排序(標誌最佳化)### + def bubble_sort_with_flag(nums) + n = nums.length + # 外迴圈:未排序區間為 [0, i] + for i in (n - 1).downto(1) + flag = false # 初始化標誌位 + + # 內迴圈:將未排序區間 [0, i] 中的最大元素交換至該區間的最右端 + for j in 0...i + if nums[j] > nums[j + 1] + # 交換 nums[j] 與 nums[j + 1] + nums[j], nums[j + 1] = nums[j + 1], nums[j] + flag = true # 記錄交換元素 + end + end + + break unless flag # 此輪“冒泡”未交換任何元素,直接跳出 + end + end ``` === "Zig" diff --git a/zh-Hant/docs/chapter_sorting/bucket_sort.md b/zh-Hant/docs/chapter_sorting/bucket_sort.md index 7bea39b2a..38c0a7cfa 100644 --- a/zh-Hant/docs/chapter_sorting/bucket_sort.md +++ b/zh-Hant/docs/chapter_sorting/bucket_sort.md @@ -406,7 +406,35 @@ comments: true === "Ruby" ```ruby title="bucket_sort.rb" - [class]{}-[func]{bucket_sort} + ### 桶排序 ### + def bucket_sort(nums) + # 初始化 k = n/2 個桶,預期向每個桶分配 2 個元素 + k = nums.length / 2 + buckets = Array.new(k) { [] } + + # 1. 將陣列元素分配到各個桶中 + nums.each do |num| + # 輸入資料範圍為 [0, 1),使用 num * k 對映到索引範圍 [0, k-1] + i = (num * k).to_i + # 將 num 新增進桶 i + buckets[i] << num + end + + # 2. 對各個桶執行排序 + buckets.each do |bucket| + # 使用內建排序函式,也可以替換成其他排序演算法 + bucket.sort! + end + + # 3. 走訪桶合併結果 + i = 0 + buckets.each do |bucket| + bucket.each do |num| + nums[i] = num + i += 1 + end + end + end ``` === "Zig" diff --git a/zh-Hant/docs/chapter_sorting/counting_sort.md b/zh-Hant/docs/chapter_sorting/counting_sort.md index f70d1e0d0..c7a7d7884 100644 --- a/zh-Hant/docs/chapter_sorting/counting_sort.md +++ b/zh-Hant/docs/chapter_sorting/counting_sort.md @@ -348,7 +348,25 @@ comments: true === "Ruby" ```ruby title="counting_sort.rb" - [class]{}-[func]{counting_sort_naive} + ### 計數排序 ### + def counting_sort_naive(nums) + # 簡單實現,無法用於排序物件 + # 1. 統計陣列最大元素 m + m = 0 + nums.each { |num| m = [m, num].max } + # 2. 統計各數字的出現次數 + # counter[num] 代表 num 的出現次數 + counter = Array.new(m + 1, 0) + nums.each { |num| counter[num] += 1 } + # 3. 走訪 counter ,將各元素填入原陣列 nums + i = 0 + for num in 0...(m + 1) + (0...counter[num]).each do + nums[i] = num + i += 1 + end + end + end ``` === "Zig" @@ -854,7 +872,30 @@ $$ === "Ruby" ```ruby title="counting_sort.rb" - [class]{}-[func]{counting_sort} + ### 計數排序 ### + def counting_sort(nums) + # 完整實現,可排序物件,並且是穩定排序 + # 1. 統計陣列最大元素 m + m = nums.max + # 2. 統計各數字的出現次數 + # counter[num] 代表 num 的出現次數 + counter = Array.new(m + 1, 0) + nums.each { |num| counter[num] += 1 } + # 3. 求 counter 的前綴和,將“出現次數”轉換為“尾索引” + # 即 counter[num]-1 是 num 在 res 中最後一次出現的索引 + (0...m).each { |i| counter[i + 1] += counter[i] } + # 4. 倒序走訪 nums, 將各元素填入結果陣列 res + # 初始化陣列 res 用於記錄結果 + n = nums.length + res = Array.new(n, 0) + (n - 1).downto(0).each do |i| + num = nums[i] + res[counter[num] - 1] = num # 將 num 放置到對應索引處 + counter[num] -= 1 # 令前綴和自減 1 ,得到下次放置 num 的索引 + end + # 使用結果陣列 res 覆蓋原陣列 nums + (0...n).each { |i| nums[i] = res[i] } + end ``` === "Zig" diff --git a/zh-Hant/docs/chapter_sorting/heap_sort.md b/zh-Hant/docs/chapter_sorting/heap_sort.md index fb1e953ab..5656885e0 100644 --- a/zh-Hant/docs/chapter_sorting/heap_sort.md +++ b/zh-Hant/docs/chapter_sorting/heap_sort.md @@ -582,9 +582,38 @@ comments: true === "Ruby" ```ruby title="heap_sort.rb" - [class]{}-[func]{sift_down} + ### 堆積的長度為 n ,從節點 i 開始,從頂至底堆積化 ### + def sift_down(nums, n, i) + while true + # 判斷節點 i, l, r 中值最大的節點,記為 ma + l = 2 * i + 1 + r = 2 * i + 2 + ma = i + ma = l if l < n && nums[l] > nums[ma] + ma = r if r < n && nums[r] > nums[ma] + # 若節點 i 最大或索引 l, r 越界,則無須繼續堆積化,跳出 + break if ma == i + # 交換兩節點 + nums[i], nums[ma] = nums[ma], nums[i] + # 迴圈向下堆積化 + i = ma + end + end - [class]{}-[func]{heap_sort} + ### 堆積排序 ### + def heap_sort(nums) + # 建堆積操作:堆積化除葉節點以外的其他所有節點 + (nums.length / 2 - 1).downto(0) do |i| + sift_down(nums, nums.length, i) + end + # 從堆積中提取最大元素,迴圈 n-1 輪 + (nums.length - 1).downto(1) do |i| + # 交換根節點與最右葉節點(交換首元素與尾元素) + nums[0], nums[i] = nums[i], nums[0] + # 以根節點為起點,從頂至底進行堆積化 + sift_down(nums, i, 0) + end + end ``` === "Zig" diff --git a/zh-Hant/docs/chapter_sorting/merge_sort.md b/zh-Hant/docs/chapter_sorting/merge_sort.md index 1f1e4d21a..4ee6c2381 100755 --- a/zh-Hant/docs/chapter_sorting/merge_sort.md +++ b/zh-Hant/docs/chapter_sorting/merge_sort.md @@ -630,9 +630,53 @@ comments: true === "Ruby" ```ruby title="merge_sort.rb" - [class]{}-[func]{merge} + ### 合併左子陣列和右子陣列 ### + def merge(nums, left, mid, right) + # 左子陣列區間為 [left, mid], 右子陣列區間為 [mid+1, right] + # 建立一個臨時陣列 tmp,用於存放合併後的結果 + tmp = Array.new(right - left + 1, 0) + # 初始化左子陣列和右子陣列的起始索引 + i, j, k = left, mid + 1, 0 + # 當左右子陣列都還有元素時,進行比較並將較小的元素複製到臨時陣列中 + while i <= mid && j <= right + if nums[i] <= nums[j] + tmp[k] = nums[i] + i += 1 + else + tmp[k] = nums[j] + j += 1 + end + k += 1 + end + # 將左子陣列和右子陣列的剩餘元素複製到臨時陣列中 + while i <= mid + tmp[k] = nums[i] + i += 1 + k += 1 + end + while j <= right + tmp[k] = nums[j] + j += 1 + k += 1 + end + # 將臨時陣列 tmp 中的元素複製回原陣列 nums 的對應區間 + (0...tmp.length).each do |k| + nums[left + k] = tmp[k] + end + end - [class]{}-[func]{merge_sort} + ### 合併排序 ### + def merge_sort(nums, left, right) + # 終止條件 + # 當子陣列長度為 1 時終止遞迴 + return if left >= right + # 劃分階段 + mid = (left + right) / 2 # 計算中點 + merge_sort(nums, left, mid) # 遞迴左子陣列 + merge_sort(nums, mid + 1, right) # 遞迴右子陣列 + # 合併階段 + merge(nums, left, mid, right) + end ``` === "Zig" diff --git a/zh-Hant/docs/chapter_sorting/quick_sort.md b/zh-Hant/docs/chapter_sorting/quick_sort.md index 212ba8e7a..d6f964735 100755 --- a/zh-Hant/docs/chapter_sorting/quick_sort.md +++ b/zh-Hant/docs/chapter_sorting/quick_sort.md @@ -353,7 +353,24 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSort}-[func]{partition} + ### 哨兵劃分 ### + def partition(nums, left, right) + # 以 nums[left] 為基準數 + i, j = left, right + while i < j + while i < j && nums[j] >= nums[left] + j -= 1 # 從右向左找首個小於基準數的元素 + end + while i < j && nums[i] <= nums[left] + i += 1 # 從左向右找首個大於基準數的元素 + end + # 元素交換 + nums[i], nums[j] = nums[j], nums[i] + end + # 將基準數交換至兩子陣列的分界線 + nums[i], nums[left] = nums[left], nums[i] + i # 返回基準數的索引 + end ``` === "Zig" @@ -594,7 +611,18 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSort}-[func]{quick_sort} + ### 快速排序類別 ### + def quick_sort(nums, left, right) + # 子陣列長度不為 1 時遞迴 + if left < right + # 哨兵劃分 + pivot = partition(nums, left, right) + # 遞迴左子陣列、右子陣列 + quick_sort(nums, left, pivot - 1) + quick_sort(nums, pivot + 1, right) + end + nums + end ``` === "Zig" @@ -1067,9 +1095,38 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSortMedian}-[func]{median_three} + ### 選取三個候選元素的中位數 ### + def median_three(nums, left, mid, right) + # 選取三個候選元素的中位數 + _l, _m, _r = nums[left], nums[mid], nums[right] + # m 在 l 和 r 之間 + return mid if (_l <= _m && _m <= _r) || (_r <= _m && _m <= _l) + # l 在 m 和 r 之間 + return left if (_m <= _l && _l <= _r) || (_r <= _l && _l <= _m) + return right + end - [class]{QuickSortMedian}-[func]{partition} + ### 哨兵劃分(三數取中值)### + def partition(nums, left, right) + ### 以 nums[left] 為基準數 + med = median_three(nums, left, (left + right) / 2, right) + # 將中位數交換至陣列最左斷 + nums[left], nums[med] = nums[med], nums[left] + i, j = left, right + while i < j + while i < j && nums[j] >= nums[left] + j -= 1 # 從右向左找首個小於基準數的元素 + end + while i < j && nums[i] <= nums[left] + i += 1 # 從左向右找首個大於基準數的元素 + end + # 元素交換 + nums[i], nums[j] = nums[j], nums[i] + end + # 將基準數交換至兩子陣列的分界線 + nums[i], nums[left] = nums[left], nums[i] + i # 返回基準數的索引 + end ``` === "Zig" @@ -1377,7 +1434,22 @@ comments: true === "Ruby" ```ruby title="quick_sort.rb" - [class]{QuickSortTailCall}-[func]{quick_sort} + ### 快速排序(尾遞迴最佳化)### + def quick_sort(nums, left, right) + # 子陣列長度不為 1 時遞迴 + while left < right + # 哨兵劃分 + pivot = partition(nums, left, right) + # 對兩個子陣列中較短的那個執行快速排序 + if pivot - left < right - pivot + quick_sort(nums, left, pivot - 1) + left = pivot + 1 # 剩餘未排序區間為 [pivot + 1, right] + else + quick_sort(nums, pivot + 1, right) + right = pivot - 1 # 剩餘未排序區間為 [left, pivot - 1] + end + end + end ``` === "Zig" diff --git a/zh-Hant/docs/chapter_sorting/radix_sort.md b/zh-Hant/docs/chapter_sorting/radix_sort.md index 8ed828e50..9698590c3 100644 --- a/zh-Hant/docs/chapter_sorting/radix_sort.md +++ b/zh-Hant/docs/chapter_sorting/radix_sort.md @@ -677,11 +677,51 @@ $$ === "Ruby" ```ruby title="radix_sort.rb" - [class]{}-[func]{digit} + ### 獲取元素 num 的第 k 位,其中 exp = 10^(k-1) ### + def digit(num, exp) + # 轉入 exp 而非 k 可以避免在此重複執行昂貴的次方計算 + (num / exp) % 10 + end - [class]{}-[func]{counting_sort_digit} + ### 計數排序(根據 nums 第 k 位排序)### + def counting_sort_digit(nums, exp) + # 十進位制的位範圍為 0~9 ,因此需要長度為 10 的桶陣列 + counter = Array.new(10, 0) + n = nums.length + # 統計 0~9 各數字的出現次數 + for i in 0...n + d = digit(nums[i], exp) # 獲取 nums[i] 第 k 位,記為 d + counter[d] += 1 # 統計數字 d 的出現次數 + end + # 求前綴和,將“出現個數”轉換為“陣列索引” + (1...10).each { |i| counter[i] += counter[i - 1] } + # 倒序走訪,根據桶內統計結果,將各元素填入 res + res = Array.new(n, 0) + for i in (n - 1).downto(0) + d = digit(nums[i], exp) + j = counter[d] - 1 # 獲取 d 在陣列中的索引 j + res[j] = nums[i] # 將當前元素填入索引 j + counter[d] -= 1 # 將 d 的數量減 1 + end + # 使用結果覆蓋原陣列 nums + (0...n).each { |i| nums[i] = res[i] } + end - [class]{}-[func]{radix_sort} + ### 基數排序 ### + def radix_sort(nums) + # 獲取陣列的最大元素,用於判斷最大位數 + m = nums.max + # 按照從低位到高位的順序走訪 + exp = 1 + while exp <= m + # 對陣列元素的第 k 位執行計數排序 + # k = 1 -> exp = 1 + # k = 2 -> exp = 10 + # 即 exp = 10^(k-1) + counting_sort_digit(nums, exp) + exp *= 10 + end + end ``` === "Zig" diff --git a/zh-Hant/docs/chapter_sorting/selection_sort.md b/zh-Hant/docs/chapter_sorting/selection_sort.md index f614cfcc8..6dcc79db9 100644 --- a/zh-Hant/docs/chapter_sorting/selection_sort.md +++ b/zh-Hant/docs/chapter_sorting/selection_sort.md @@ -306,7 +306,22 @@ comments: true === "Ruby" ```ruby title="selection_sort.rb" - [class]{}-[func]{selection_sort} + ### 選擇排序 ### + def selection_sort(nums) + n = nums.length + # 外迴圈:未排序區間為 [i, n-1] + for i in 0...(n - 1) + # 內迴圈:找到未排序區間內的最小元素 + k = i + for j in (i + 1)...n + if nums[j] < nums[k] + k = j # 記錄最小元素的索引 + end + end + # 將該最小元素與未排序區間的首個元素交換 + nums[i], nums[k] = nums[k], nums[i] + end + end ``` === "Zig"