实现搜索功能
This commit is contained in:
parent
9d75014e58
commit
fd3bd8656e
6 changed files with 368 additions and 235 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -161,6 +161,4 @@ cython_debug/
|
||||||
.pdm-python
|
.pdm-python
|
||||||
/test_get_txt.py
|
/test_get_txt.py
|
||||||
/test_tmp.py
|
/test_tmp.py
|
||||||
/深空彼岸/
|
test_search.py
|
||||||
/深空彼岸.epub
|
|
||||||
/斗破苍穹.epub
|
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import re
|
import requests
|
||||||
|
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,63 +37,110 @@ def get_user_agent():
|
||||||
return {'User-Agent': user_agent}
|
return {'User-Agent': user_agent}
|
||||||
|
|
||||||
|
|
||||||
# 定义请求间隔时间(秒)
|
class Download_Novel:
|
||||||
interval = 2
|
|
||||||
|
|
||||||
# 设置请求头,模拟浏览器访问
|
def search_novel(self):
|
||||||
|
hm_url = f'https://user.bqgso.cc/hm.html?&q={self.name}'
|
||||||
|
result = requests.get(hm_url, headers=get_user_agent()).text
|
||||||
|
# print(result)
|
||||||
|
hm = result[2:-2]
|
||||||
|
# print(hm)
|
||||||
|
# 发起请求并获取响应
|
||||||
|
url = f'https://user.bqgso.cc/search.html?&q={self.name}&hm={hm}'
|
||||||
|
|
||||||
# 要爬取的小说主页链接
|
response = json.loads(requests.get(url, headers=get_user_agent()).text[1:-1])
|
||||||
url = 'https://www.bqg221.com/xs/17931/'
|
# print(type(response))
|
||||||
|
for i, book in enumerate(response):
|
||||||
|
# i['url_list'][:9] = 'https://www'
|
||||||
|
trans_url = book['url_list'].replace('https://m', 'https://www')
|
||||||
|
response[i]['url_list'] = trans_url
|
||||||
|
|
||||||
# 发起请求并获取响应
|
# 返回一个json对象
|
||||||
response = requests.get(url, headers=get_user_agent())
|
return response
|
||||||
|
|
||||||
# 将响应转换为BeautifulSoup对象
|
def get_novel_info(self, response):
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
|
||||||
|
|
||||||
# 获取小说名字
|
# 定义请求间隔时间(秒)
|
||||||
title = soup.select_one('.book h1').get_text(strip=True)
|
interval = 2
|
||||||
print(title)
|
|
||||||
|
|
||||||
# 获取小说简介
|
# 设置请求头,模拟浏览器访问
|
||||||
# print(soup.select('.small')[0])
|
|
||||||
div_tag = soup.find('div', {'class': 'small'})
|
|
||||||
# print(div_tag)
|
|
||||||
all_span_tags = div_tag.find_all('span')
|
|
||||||
# print(all_span_tags)
|
|
||||||
author = all_span_tags[0].text.strip()[3:]
|
|
||||||
status = all_span_tags[1].text.strip()
|
|
||||||
update_time = all_span_tags[2].text.strip()
|
|
||||||
latest_update = all_span_tags[3].text.strip()
|
|
||||||
# for i in all_span_tags:
|
|
||||||
# print(i.text.strip())
|
|
||||||
intro = soup.select_one('.intro').get_text(strip=True)[:-6]
|
|
||||||
print(intro)
|
|
||||||
|
|
||||||
cover = soup.select_one('.cover img')['src']
|
# 要爬取的小说主页链接
|
||||||
# print(cover)
|
url = response['url_list']
|
||||||
# 获取小说所有章节链接
|
|
||||||
chapter_urls = [url + i.get('href').split('/')[-1] for i in soup.select('.listmain a') if
|
# 发起请求并获取响应
|
||||||
|
url_response = requests.get(url, headers=get_user_agent())
|
||||||
|
|
||||||
|
# 将响应转换为BeautifulSoup对象
|
||||||
|
soup = BeautifulSoup(url_response.text, 'html.parser')
|
||||||
|
|
||||||
|
# 获取小说名字
|
||||||
|
# title = soup.select_one('.book h1').get_text(strip=True)
|
||||||
|
self.title = response['articlename']
|
||||||
|
print(self.title)
|
||||||
|
|
||||||
|
# 获取小说简介
|
||||||
|
# print(soup.select('.small')[0])
|
||||||
|
div_tag = soup.find('div', {'class': 'small'})
|
||||||
|
# print(div_tag)
|
||||||
|
all_span_tags = div_tag.find_all('span')
|
||||||
|
# print(all_span_tags)
|
||||||
|
# author = all_span_tags[0].text.strip()[3:]
|
||||||
|
self.author = response['author']
|
||||||
|
self.status = all_span_tags[1].text.strip()
|
||||||
|
self.update_time = all_span_tags[2].text.strip()
|
||||||
|
self.latest_update = all_span_tags[3].text.strip()
|
||||||
|
# for i in all_span_tags:
|
||||||
|
# print(i.text.strip())
|
||||||
|
self.intro = soup.select_one('.intro').get_text(strip=True)[:-6]
|
||||||
|
print(self.intro)
|
||||||
|
|
||||||
|
# cover = soup.select_one('.cover img')['src']
|
||||||
|
self.cover = response['url_img']
|
||||||
|
# print(cover)
|
||||||
|
# 获取小说所有章节链接
|
||||||
|
self.chapter_urls = [url + i.get('href').split('/')[-1] for i in soup.select('.listmain a') if
|
||||||
i.get('href').split('/')[-1] != 'javascript:dd_show()']
|
i.get('href').split('/')[-1] != 'javascript:dd_show()']
|
||||||
# print(chapter_urls)
|
# print(chapter_urls)
|
||||||
|
|
||||||
print('开始下载。。。')
|
print('开始下载。。。')
|
||||||
# 停顿两秒
|
|
||||||
time.sleep(interval)
|
|
||||||
|
|
||||||
|
# 获取当前文件所在目录路径
|
||||||
|
dir_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
# 多线程下载txt
|
self.download_path = dir_path + '/downloads/'
|
||||||
def download_url(chapter_url, file_name):
|
self.file_path = self.download_path + self.title + '/'
|
||||||
# 限制下载线程数
|
if not os.path.exists(self.file_path):
|
||||||
with semaphore:
|
os.makedirs(self.file_path)
|
||||||
file_path = './' + title
|
|
||||||
file_name = file_path + '/' + file_name
|
|
||||||
|
|
||||||
if not os.path.exists(file_path):
|
# 停顿两秒
|
||||||
os.makedirs(file_path)
|
time.sleep(self.interval)
|
||||||
print('文件夹不存在,创建文件夹')
|
|
||||||
|
def get_multi_txt_file_status(self, file_name):
|
||||||
|
|
||||||
|
file_name = self.file_path + file_name
|
||||||
|
|
||||||
if os.path.exists(file_name) and os.path.getsize(file_name) > 0:
|
if os.path.exists(file_name) and os.path.getsize(file_name) > 0:
|
||||||
|
print(file_name + ' 已存在,跳过...\n')
|
||||||
|
return file_name, True
|
||||||
|
else:
|
||||||
|
return file_name, False
|
||||||
|
|
||||||
|
def download_url(self, chapter_url, file_name):
|
||||||
|
# 限制下载线程数
|
||||||
|
with self.semaphore:
|
||||||
|
# 获取当前文件所在目录路径
|
||||||
|
# dir_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
#
|
||||||
|
# file_path = dir_path + '/downloads/' + self.title
|
||||||
|
# file_name = file_path + '/' + file_name
|
||||||
|
#
|
||||||
|
# if not os.path.exists(file_path):
|
||||||
|
# os.makedirs(file_path)
|
||||||
|
# # print('文件夹不存在,创建文件夹')
|
||||||
|
file_name, status = self.get_multi_txt_file_status(file_name=file_name)
|
||||||
|
|
||||||
|
if status:
|
||||||
print(file_name + ' 已存在,跳过...\n')
|
print(file_name + ' 已存在,跳过...\n')
|
||||||
# success_account =+ 1
|
# success_account =+ 1
|
||||||
else:
|
else:
|
||||||
|
@ -112,7 +159,8 @@ def download_url(chapter_url, file_name):
|
||||||
# # 将所有的<br>标签替换成换行符\n
|
# # 将所有的<br>标签替换成换行符\n
|
||||||
chapter_content = chapter_content.replace(' ', '\n ')
|
chapter_content = chapter_content.replace(' ', '\n ')
|
||||||
# chapter_content = chapter_content.replace('<br>', '\n')
|
# chapter_content = chapter_content.replace('<br>', '\n')
|
||||||
content = re.sub(r'(第\d+章|请收藏本站|『点此报错).*$', '', chapter_content, flags=re.MULTILINE)
|
content = re.sub(r'(第\d+章|请收藏本站|『点此报错).*$', '', chapter_content,
|
||||||
|
flags=re.MULTILINE)
|
||||||
# print(content)
|
# print(content)
|
||||||
# 将处理后的结果写入到test.txt文件中
|
# 将处理后的结果写入到test.txt文件中
|
||||||
f.write(chapter_title + '\n' + content + '\n\n')
|
f.write(chapter_title + '\n' + content + '\n\n')
|
||||||
|
@ -121,13 +169,103 @@ def download_url(chapter_url, file_name):
|
||||||
# return True
|
# return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e, '\n retry...')
|
print(e, '\n retry...')
|
||||||
time.sleep(interval)
|
time.sleep(self.interval)
|
||||||
retry -= 1
|
retry -= 1
|
||||||
# return False
|
# return False
|
||||||
|
|
||||||
|
def multi_thread_download(self):
|
||||||
|
self.threads = []
|
||||||
|
for file_name, chapter_url in enumerate(self.chapter_urls):
|
||||||
|
args = (chapter_url, str(file_name) + '.txt')
|
||||||
|
thread = threading.Thread(target=self.download_url, args=args)
|
||||||
|
self.threads.append(thread)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
# txt转换为epub
|
for thread in self.threads:
|
||||||
def merge_txt_to_epub(txt_files=[], epub_file='', author='', cover='', direction=''):
|
thread.join()
|
||||||
|
|
||||||
|
def download_process(self):
|
||||||
|
|
||||||
|
# 限制同时4线程,建议使用4线程,过多线程会导致错误增多
|
||||||
|
max_concurrent_threads = 4
|
||||||
|
|
||||||
|
# 创建Semaphore对象,并将其初始值设置为max_concurrent_threads
|
||||||
|
self.semaphore = threading.Semaphore(max_concurrent_threads)
|
||||||
|
|
||||||
|
self.multi_thread_download()
|
||||||
|
|
||||||
|
time.sleep(self.interval)
|
||||||
|
|
||||||
|
file = 0
|
||||||
|
# 判断是否全部下载成功
|
||||||
|
for i in range(0, len(self.chapter_urls)):
|
||||||
|
status = self.get_multi_txt_file_status(str(i) + '.txt')[1]
|
||||||
|
if not status:
|
||||||
|
file += 1
|
||||||
|
break
|
||||||
|
if not file:
|
||||||
|
convert_type = int(input('下载成功!\n请输入要合并的格式:\n0 TxT文件\n1 Epub文件\n'))
|
||||||
|
convert_status = True
|
||||||
|
if convert_type == 0:
|
||||||
|
print(self.file_path, self.download_path + self.title + '.txt')
|
||||||
|
convert_status = self.merge_txt_file(self.download_path + self.title + '.txt')
|
||||||
|
|
||||||
|
elif convert_type == 1:
|
||||||
|
txt_files = []
|
||||||
|
for n in range(0, len(self.chapter_urls)):
|
||||||
|
txt_files.append(self.file_path + str(n) + '.txt')
|
||||||
|
# print('txt_files:',txt_files)
|
||||||
|
convert_status = self.merge_txt_to_epub(txt_files, self.download_path + self.title + '.epub')
|
||||||
|
|
||||||
|
if convert_status:
|
||||||
|
print('合并成功!')
|
||||||
|
else:
|
||||||
|
print('合并失败!请删除downloads下面目录后重新运行程序!')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print('部分文件下载失败,限制线程数可以提高下载成功率,是否重新下载个别文件?')
|
||||||
|
download = input('0 退出\n1 重试\n')
|
||||||
|
if download == 0:
|
||||||
|
exit(0)
|
||||||
|
else:
|
||||||
|
self.download_process()
|
||||||
|
|
||||||
|
# 合并为txt文件
|
||||||
|
def merge_txt_file(self, merged_file_name=''):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:param merged_file_name: 合并后文件保存位置
|
||||||
|
:returns bool: 返回合并成功或者失败状态
|
||||||
|
"""
|
||||||
|
# os.chdir(file_path)
|
||||||
|
if os.path.exists(merged_file_name):
|
||||||
|
os.remove(merged_file_name)
|
||||||
|
print('merge file : ', sorted(os.listdir(self.file_path), key=lambda x: int(x.split('.')[0])))
|
||||||
|
time.sleep(self.interval)
|
||||||
|
|
||||||
|
with open(merged_file_name, 'wb') as outfile:
|
||||||
|
try:
|
||||||
|
for filename in sorted(os.listdir(self.file_path), key=lambda x: int(x.split('.')[0])):
|
||||||
|
print(filename)
|
||||||
|
if filename.endswith('.txt'):
|
||||||
|
# 判断文件是否为空
|
||||||
|
if os.path.exists(self.file_path + '/' + filename) and os.path.getsize(
|
||||||
|
self.file_path + '/' + filename) > 0:
|
||||||
|
# print(filename + ' 已存在,跳过...\n')
|
||||||
|
with open(self.file_path + '/' + filename, 'rb') as infile:
|
||||||
|
shutil.copyfileobj(infile, outfile)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
os.remove(merged_file_name)
|
||||||
|
print(e)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
# txt转换为epub
|
||||||
|
def merge_txt_to_epub(self, txt_files, epub_file):
|
||||||
"""
|
"""
|
||||||
将txt转换为epub
|
将txt转换为epub
|
||||||
|
|
||||||
|
@ -135,14 +273,8 @@ def merge_txt_to_epub(txt_files=[], epub_file='', author='', cover='', direction
|
||||||
|
|
||||||
epub_file (str) :实际为转换成功的epub文件路径及名称
|
epub_file (str) :实际为转换成功的epub文件路径及名称
|
||||||
|
|
||||||
author (str) :作者
|
|
||||||
|
|
||||||
cover (str) :封面图片链接
|
|
||||||
|
|
||||||
direction (str) :书籍简介
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 创建EPUB书籍对象
|
# 创建EPUB书籍对象
|
||||||
book = epub.EpubBook()
|
book = epub.EpubBook()
|
||||||
|
|
||||||
|
@ -150,23 +282,24 @@ def merge_txt_to_epub(txt_files=[], epub_file='', author='', cover='', direction
|
||||||
os.remove(epub_file)
|
os.remove(epub_file)
|
||||||
|
|
||||||
# 设置元数据(可根据需要进行调整)
|
# 设置元数据(可根据需要进行调整)
|
||||||
book.set_title(title)
|
book.set_title(self.title)
|
||||||
book.set_language('zh')
|
book.set_language('zh')
|
||||||
book.add_author(author)
|
book.add_author(self.author)
|
||||||
book.set_direction(direction)
|
book.set_direction(self.intro)
|
||||||
# 添加封面
|
# 添加封面
|
||||||
# 获取图片并将其转换为字节流
|
# 获取图片并将其转换为字节流
|
||||||
response = requests.get(cover)
|
response = requests.get(self.cover)
|
||||||
stream = io.BytesIO(response.content)
|
stream = io.BytesIO(response.content)
|
||||||
|
|
||||||
book.set_cover('cover.jpg', stream.getvalue(), 'image/jpeg')
|
book.set_cover('cover.jpg', stream.getvalue(), 'image/jpeg')
|
||||||
|
|
||||||
print('合并中。。。。。。')
|
print('合并中。。。。。。')
|
||||||
|
# print(txt_files)
|
||||||
|
|
||||||
# 书籍目录
|
# 书籍目录
|
||||||
book_spine = []
|
book_spine = []
|
||||||
# 遍历所有txt文件
|
# 遍历所有txt文件
|
||||||
os.chdir(title)
|
# os.chdir(title)
|
||||||
for i, txt_file in enumerate(txt_files):
|
for i, txt_file in enumerate(txt_files):
|
||||||
# 读取txt文件内容
|
# 读取txt文件内容
|
||||||
with open(txt_file, 'r', encoding='utf-8') as file:
|
with open(txt_file, 'r', encoding='utf-8') as file:
|
||||||
|
@ -184,7 +317,7 @@ def merge_txt_to_epub(txt_files=[], epub_file='', author='', cover='', direction
|
||||||
content[0] = f""" <div class="calibre2" id="calibre_pb_0"></div><h1 class="kindle-cn-heading" id="calibre_pb_1">
|
content[0] = f""" <div class="calibre2" id="calibre_pb_0"></div><h1 class="kindle-cn-heading" id="calibre_pb_1">
|
||||||
{content[0]} </h1> """
|
{content[0]} </h1> """
|
||||||
for j, line in enumerate(content[1:]):
|
for j, line in enumerate(content[1:]):
|
||||||
content[j + 1] = '<p class="calibre3">' + line + '</p>'
|
content[j + 1] = '<p class="calibre3">' + line + '</p>\n'
|
||||||
# content.append('</body></html>')
|
# content.append('</body></html>')
|
||||||
except IndexError as e:
|
except IndexError as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
@ -194,9 +327,9 @@ def merge_txt_to_epub(txt_files=[], epub_file='', author='', cover='', direction
|
||||||
chapter = epub.EpubHtml(title=chapter_title, file_name='text/' + str(i) + '.xhtml')
|
chapter = epub.EpubHtml(title=chapter_title, file_name='text/' + str(i) + '.xhtml')
|
||||||
chapter.content = ''.join(content) # 将整个文件内容作为章节内容
|
chapter.content = ''.join(content) # 将整个文件内容作为章节内容
|
||||||
# 下面的是将css文件引用到单个章节里面
|
# 下面的是将css文件引用到单个章节里面
|
||||||
page_style = open('../css/page_styles.css', 'r', encoding='utf-8').read()
|
page_style = open('./css/page_styles.css', 'r', encoding='utf-8').read()
|
||||||
page_style1 = open('../css/page_styles1.css', 'r', encoding='utf-8').read()
|
page_style1 = open('./css/page_styles1.css', 'r', encoding='utf-8').read()
|
||||||
style = open('../css/stylesheet.css', 'r', encoding='utf-8').read()
|
style = open('./css/stylesheet.css', 'r', encoding='utf-8').read()
|
||||||
chapter.add_item(
|
chapter.add_item(
|
||||||
epub.EpubItem(uid="page_style", file_name="../style/page_styles.css", media_type="text/css",
|
epub.EpubItem(uid="page_style", file_name="../style/page_styles.css", media_type="text/css",
|
||||||
content=page_style))
|
content=page_style))
|
||||||
|
@ -219,13 +352,13 @@ def merge_txt_to_epub(txt_files=[], epub_file='', author='', cover='', direction
|
||||||
book.toc = book_spine
|
book.toc = book_spine
|
||||||
|
|
||||||
# 设置书籍的样式文件
|
# 设置书籍的样式文件
|
||||||
os.chdir('../')
|
|
||||||
page_style = open('./css/page_styles.css', 'r', encoding='utf-8').read()
|
page_style = open('./css/page_styles.css', 'r', encoding='utf-8').read()
|
||||||
page_style1 = open('./css/page_styles1.css', 'r', encoding='utf-8').read()
|
page_style1 = open('./css/page_styles1.css', 'r', encoding='utf-8').read()
|
||||||
|
|
||||||
style = open('./css/stylesheet.css', 'r', encoding='utf-8').read()
|
style = open('./css/stylesheet.css', 'r', encoding='utf-8').read()
|
||||||
book.add_item(
|
book.add_item(
|
||||||
epub.EpubItem(uid="page_style", file_name="style/page_styles.css", media_type="text/css", content=page_style))
|
epub.EpubItem(uid="page_style", file_name="style/page_styles.css", media_type="text/css",
|
||||||
|
content=page_style))
|
||||||
book.add_item(
|
book.add_item(
|
||||||
epub.EpubItem(uid="page_style1", file_name="style/page_styles1.css", media_type="text/css",
|
epub.EpubItem(uid="page_style1", file_name="style/page_styles1.css", media_type="text/css",
|
||||||
content=page_style1))
|
content=page_style1))
|
||||||
|
@ -233,72 +366,46 @@ def merge_txt_to_epub(txt_files=[], epub_file='', author='', cover='', direction
|
||||||
epub.EpubItem(uid="style_default", file_name="style/stylesheet.css", media_type="text/css", content=style))
|
epub.EpubItem(uid="style_default", file_name="style/stylesheet.css", media_type="text/css", content=style))
|
||||||
|
|
||||||
# 打包EPUB文件
|
# 打包EPUB文件
|
||||||
epub.write_epub('./' + epub_file, book, {})
|
epub.write_epub(epub_file, book, {})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
self.file_path = None
|
||||||
|
self.chapter_urls = None
|
||||||
|
self.cover = None
|
||||||
|
self.intro = None
|
||||||
|
self.status = None
|
||||||
|
self.author = None
|
||||||
|
self.title = None
|
||||||
|
self.name = name
|
||||||
|
|
||||||
# 合并为txt文件
|
# 定义请求间隔时间(秒)
|
||||||
def merge_txt_file(file_path='', merged_file_name=''):
|
self.interval = 2
|
||||||
"""
|
|
||||||
|
|
||||||
:param file_path: txt文件的保存位置
|
# 要爬取的小说主页链接
|
||||||
:param merged_file_name: 合并后文件保存位置
|
# self.novel_url = 'https://www.bqg221.com/xs/'
|
||||||
:returns bool: 返回合并成功或者失败状态
|
|
||||||
"""
|
|
||||||
os.chdir(file_path)
|
if __name__ == '__main__':
|
||||||
if os.path.exists(merged_file_name):
|
search_name = input('请输入要搜索的书籍名称: ')
|
||||||
os.rmdir(merged_file_name)
|
if search_name:
|
||||||
print('merge file : ', sorted(os.listdir('.'), key=lambda x: int(x.split('.')[0])))
|
download_novel = Download_Novel(search_name)
|
||||||
with open(merged_file_name, 'wb') as outfile:
|
response = download_novel.search_novel()
|
||||||
for filename in sorted(os.listdir('.'), key=lambda x: int(x.split('.')[0])):
|
print(response)
|
||||||
print(filename)
|
print('搜索到 ' + str(len(response)) + ' 个结果\n')
|
||||||
if filename.endswith('.txt'):
|
print('---------------------------------------\n')
|
||||||
# 判断文件是否为空
|
for i, book in enumerate(response):
|
||||||
if os.path.exists(filename) and os.path.getsize(filename) > 0:
|
print(str(i) + ' 书籍名称:' + book['articlename'] + '\n作者:' + book['author'] + '\n简介:' + book[
|
||||||
# print(filename + ' 已存在,跳过...\n')
|
'intro'] + '...\n')
|
||||||
with open(filename, 'rb') as infile:
|
print('---------------------------------------')
|
||||||
shutil.copyfileobj(infile, outfile)
|
print('---------------------------------------\n')
|
||||||
|
select_book = int(input(f'选择要下载的书籍序号(从0-{str(len(response) - 1)}中选择):'))
|
||||||
|
# 判断输入合法
|
||||||
|
if isinstance(select_book, int) and 0 <= select_book <= len(response):
|
||||||
|
download_novel.get_novel_info(response[select_book])
|
||||||
|
download_novel.download_process()
|
||||||
|
else:
|
||||||
|
print('输入内容不合法!')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return False
|
exit(0)
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def multi_thread_download():
|
|
||||||
threads = []
|
|
||||||
for file_name, chapter_url in enumerate(chapter_urls):
|
|
||||||
args = (chapter_url, str(file_name) + '.txt')
|
|
||||||
thread = threading.Thread(target=download_url, args=args)
|
|
||||||
threads.append(thread)
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
for thread in threads:
|
|
||||||
thread.join()
|
|
||||||
|
|
||||||
|
|
||||||
# 限制同时4线程,建议使用4线程,过多线程会导致错误增多
|
|
||||||
max_concurrent_threads = 4
|
|
||||||
# 创建Semaphore对象,并将其初始值设置为max_concurrent_threads
|
|
||||||
semaphore = threading.Semaphore(max_concurrent_threads)
|
|
||||||
|
|
||||||
multi_thread_download()
|
|
||||||
|
|
||||||
time.sleep(interval)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
# merge_txt_file('./' + title, '../' + title + '.txt')
|
|
||||||
# 调用函数进行合并
|
|
||||||
txt_files = sorted(os.listdir(title), key=lambda x: int(x.split('.')[0]))
|
|
||||||
epub_file_path = title + '.epub'
|
|
||||||
result = merge_txt_to_epub(txt_files, epub_file_path, author, cover, intro)
|
|
||||||
if not result:
|
|
||||||
print('下载失败:', result, '\t是否重试?')
|
|
||||||
num = int(input('0 重试\n1 退出\n'))
|
|
||||||
if num == 0:
|
|
||||||
multi_thread_download()
|
|
||||||
merge_txt_to_epub(txt_files, epub_file_path, author, cover, intro)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
print('合并成功!')
|
|
||||||
break
|
|
||||||
|
|
9
README.md
Normal file
9
README.md
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
## Novel_Download
|
||||||
|
从 `https://www.bqg221.com` 获取小说内容,将爬取的内容合并为txt或者epub格式。
|
||||||
|
|
||||||
|
- 现已实现的功能
|
||||||
|
- [x] 实现下载txt文档
|
||||||
|
- [x] 实现合并txt
|
||||||
|
- [x] 实现合并为epub
|
||||||
|
- [x] 实现搜索功能
|
||||||
|
- [ ] 实现多个书源替换
|
|
@ -2,7 +2,15 @@
|
||||||
margin-bottom: 5pt;
|
margin-bottom: 5pt;
|
||||||
margin-top: 5pt
|
margin-top: 5pt
|
||||||
}
|
}
|
||||||
@font-face {
|
html {
|
||||||
font-family: yinbiao;
|
overflow: hidden;
|
||||||
src: url(styles/XXXXXXXXXXXXXXXX)
|
}
|
||||||
}
|
|
||||||
|
body {
|
||||||
|
height: 100%;
|
||||||
|
overflow: auto;
|
||||||
|
-webkit-column-width: 100%;
|
||||||
|
-moz-column-width: 100%;
|
||||||
|
column-width: 100%;
|
||||||
|
page-break-inside: avoid;
|
||||||
|
}
|
||||||
|
|
12
pdm.lock
12
pdm.lock
|
@ -46,6 +46,12 @@ dependencies = [
|
||||||
"urllib3<3,>=1.21.1",
|
"urllib3<3,>=1.21.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "setuptools"
|
||||||
|
version = "68.0.0"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "six"
|
name = "six"
|
||||||
version = "1.16.0"
|
version = "1.16.0"
|
||||||
|
@ -62,7 +68,7 @@ summary = "HTTP library with thread-safe connection pooling, file post, and more
|
||||||
lock_version = "4.2"
|
lock_version = "4.2"
|
||||||
cross_platform = true
|
cross_platform = true
|
||||||
groups = ["default"]
|
groups = ["default"]
|
||||||
content_hash = "sha256:2fff54024aa639561573351b1841dd1f0084ca345b52d35859bfae33744350e8"
|
content_hash = "sha256:095da8eef1987e0630ebddf4a4513ad826ad8630af33554248065ff297544cb6"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
"certifi 2023.5.7" = [
|
"certifi 2023.5.7" = [
|
||||||
|
@ -236,6 +242,10 @@ content_hash = "sha256:2fff54024aa639561573351b1841dd1f0084ca345b52d35859bfae337
|
||||||
{url = "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
|
{url = "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
|
||||||
{url = "https://files.pythonhosted.org/packages/9d/be/10918a2eac4ae9f02f6cfe6414b7a155ccd8f7f9d4380d62fd5b955065c3/requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
|
{url = "https://files.pythonhosted.org/packages/9d/be/10918a2eac4ae9f02f6cfe6414b7a155ccd8f7f9d4380d62fd5b955065c3/requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
|
||||||
]
|
]
|
||||||
|
"setuptools 68.0.0" = [
|
||||||
|
{url = "https://files.pythonhosted.org/packages/c7/42/be1c7bbdd83e1bfb160c94b9cafd8e25efc7400346cf7ccdbdb452c467fa/setuptools-68.0.0-py3-none-any.whl", hash = "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f"},
|
||||||
|
{url = "https://files.pythonhosted.org/packages/dc/98/5f896af066c128669229ff1aa81553ac14cfb3e5e74b6b44594132b8540e/setuptools-68.0.0.tar.gz", hash = "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235"},
|
||||||
|
]
|
||||||
"six 1.16.0" = [
|
"six 1.16.0" = [
|
||||||
{url = "https://files.pythonhosted.org/packages/71/39/171f1c67cd00715f190ba0b100d606d440a28c93c7714febeca8b79af85e/six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
{url = "https://files.pythonhosted.org/packages/71/39/171f1c67cd00715f190ba0b100d606d440a28c93c7714febeca8b79af85e/six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||||
{url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
{url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||||
|
|
|
@ -9,6 +9,7 @@ authors = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"ebooklib>=0.18",
|
"ebooklib>=0.18",
|
||||||
|
"setuptools>=68.0.0",
|
||||||
]
|
]
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
license = {text = "MIT"}
|
license = {text = "MIT"}
|
||||||
|
|
Loading…
Reference in a new issue