2023-06-21 08:58:08 +08:00
|
|
|
|
import json
|
|
|
|
|
import random
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Download_Novel:
|
|
|
|
|
|
|
|
|
|
def get_user_agent(self):
|
|
|
|
|
# 定义多个User-Agent列表,每个列表中包含多个不同的User-Agent字符串
|
|
|
|
|
user_agents = [
|
|
|
|
|
[
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.3',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'],
|
|
|
|
|
[
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'],
|
|
|
|
|
[
|
|
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30',
|
|
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
|
|
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Safari/604.1.38']
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# 随机选择一个User-Agent列表
|
|
|
|
|
user_agent_list = random.choice(user_agents)
|
|
|
|
|
|
|
|
|
|
# 从选定的User-Agent列表中随机选择一个User-Agent字符串
|
|
|
|
|
user_agent = random.choice(user_agent_list)
|
|
|
|
|
return {'User-Agent': user_agent}
|
|
|
|
|
|
|
|
|
|
def search_novel(self):
|
|
|
|
|
# 定义请求间隔时间(秒)
|
|
|
|
|
interval = 2
|
2023-07-18 12:49:14 +08:00
|
|
|
|
# hm_url = f'https://user.bqgso.cc/hm.html?&q={self.name}'
|
|
|
|
|
# result = requests.get(hm_url, headers=self.get_user_agent()).text
|
2023-06-21 08:58:08 +08:00
|
|
|
|
# print(result)
|
2023-07-18 12:49:14 +08:00
|
|
|
|
# hm = result[2:-2]
|
2023-06-21 08:58:08 +08:00
|
|
|
|
# print(hm)
|
|
|
|
|
# 发起请求并获取响应
|
2023-07-18 12:49:14 +08:00
|
|
|
|
url = f'https://www.bqg221.com/user/search.html?q={self.name}'
|
|
|
|
|
print(url)
|
|
|
|
|
print('响应内容:',requests.get(url, headers=self.get_user_agent()).text)
|
2023-06-21 08:58:08 +08:00
|
|
|
|
response = json.loads(requests.get(url, headers=self.get_user_agent()).text[1:-1])
|
2023-07-18 12:49:14 +08:00
|
|
|
|
print(response)
|
2023-06-21 08:58:08 +08:00
|
|
|
|
# print(type(response))
|
|
|
|
|
for i, book in enumerate(response):
|
|
|
|
|
# i['url_list'][:9] = 'https://www'
|
2023-07-18 12:49:14 +08:00
|
|
|
|
# trans_url = book['url_list'].replace('https://m', 'https://www')
|
|
|
|
|
print(type(book['url_list']))
|
|
|
|
|
trans_url ='https://www.bqg221.com' + str(book['url_list'])
|
2023-06-21 08:58:08 +08:00
|
|
|
|
response[i]['url_list'] = trans_url
|
|
|
|
|
|
|
|
|
|
# 返回一个json对象
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
def get_novel_info(self,response):
|
|
|
|
|
|
|
|
|
|
# todo:
|
|
|
|
|
|
|
|
|
|
# 定义请求间隔时间(秒)
|
|
|
|
|
interval = 2
|
|
|
|
|
|
|
|
|
|
# 设置请求头,模拟浏览器访问
|
|
|
|
|
|
|
|
|
|
# 要爬取的小说主页链接
|
|
|
|
|
url = 'https://www.bqg221.com/xs/17931/'
|
|
|
|
|
|
|
|
|
|
# 发起请求并获取响应
|
|
|
|
|
response = requests.get(url, headers=self.get_user_agent())
|
|
|
|
|
|
|
|
|
|
# 将响应转换为BeautifulSoup对象
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
|
|
|
|
|
|
# 获取小说名字
|
|
|
|
|
title = soup.select_one('.book h1').get_text(strip=True)
|
|
|
|
|
print(title)
|
|
|
|
|
|
|
|
|
|
# 获取小说简介
|
|
|
|
|
# print(soup.select('.small')[0])
|
|
|
|
|
div_tag = soup.find('div', {'class': 'small'})
|
|
|
|
|
# print(div_tag)
|
|
|
|
|
all_span_tags = div_tag.find_all('span')
|
|
|
|
|
# print(all_span_tags)
|
|
|
|
|
author = all_span_tags[0].text.strip()[3:]
|
|
|
|
|
status = all_span_tags[1].text.strip()
|
|
|
|
|
update_time = all_span_tags[2].text.strip()
|
|
|
|
|
latest_update = all_span_tags[3].text.strip()
|
|
|
|
|
# for i in all_span_tags:
|
|
|
|
|
# print(i.text.strip())
|
|
|
|
|
intro = soup.select_one('.intro').get_text(strip=True)[:-6]
|
|
|
|
|
print(intro)
|
|
|
|
|
|
|
|
|
|
cover = soup.select_one('.cover img')['src']
|
|
|
|
|
# print(cover)
|
|
|
|
|
# 获取小说所有章节链接
|
|
|
|
|
chapter_urls = [url + i.get('href').split('/')[-1] for i in soup.select('.listmain a') if
|
|
|
|
|
i.get('href').split('/')[-1] != 'javascript:dd_show()']
|
|
|
|
|
# print(chapter_urls)
|
|
|
|
|
|
|
|
|
|
print('开始下载。。。')
|
|
|
|
|
# 停顿两秒
|
|
|
|
|
time.sleep(interval)
|
|
|
|
|
|
|
|
|
|
def __init__(self, name):
|
|
|
|
|
self.name = name
|
|
|
|
|
# 要爬取的小说主页链接
|
|
|
|
|
# self.novel_url = 'https://www.bqg221.com/xs/'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
search_name = input('请输入要搜索的书籍名称: ')
|
|
|
|
|
if search_name:
|
|
|
|
|
my_object = Download_Novel(search_name)
|
|
|
|
|
response = my_object.search_novel()
|
|
|
|
|
print(response)
|
|
|
|
|
print('搜索到 ' + str(len(response)) + ' 个结果\n')
|
|
|
|
|
print('---------------------------------------\n')
|
|
|
|
|
for i, book in enumerate(response):
|
|
|
|
|
print(str(i)+' 书籍名称:' + book['articlename'] + '\n作者:' + book['author'] + '\n简介:' + book['intro'] + '...\n')
|
|
|
|
|
print('---------------------------------------')
|
|
|
|
|
print('---------------------------------------\n')
|
|
|
|
|
select_book = int(input(f'选择要下载的书籍序号(从0-{str(len(response)-1)}中选择):'))
|
|
|
|
|
# todo: 添加书籍下载处理
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
exit(0)
|