Novel_download/test_search.py
2023-06-21 08:58:08 +08:00

126 lines
5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import random
import time
from bs4 import BeautifulSoup
import requests
class Download_Novel:
def get_user_agent(self):
# 定义多个User-Agent列表每个列表中包含多个不同的User-Agent字符串
user_agents = [
[
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.3',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'],
[
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'],
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Safari/604.1.38']
]
# 随机选择一个User-Agent列表
user_agent_list = random.choice(user_agents)
# 从选定的User-Agent列表中随机选择一个User-Agent字符串
user_agent = random.choice(user_agent_list)
return {'User-Agent': user_agent}
def search_novel(self):
# 定义请求间隔时间(秒)
interval = 2
hm_url = f'https://user.bqgso.cc/hm.html?&q={self.name}'
result = requests.get(hm_url, headers=self.get_user_agent()).text
# print(result)
hm = result[2:-2]
# print(hm)
# 发起请求并获取响应
url = f'https://user.bqgso.cc/search.html?&q={self.name}&hm={hm}'
response = json.loads(requests.get(url, headers=self.get_user_agent()).text[1:-1])
# print(type(response))
for i, book in enumerate(response):
# i['url_list'][:9] = 'https://www'
trans_url = book['url_list'].replace('https://m', 'https://www')
response[i]['url_list'] = trans_url
# 返回一个json对象
return response
def get_novel_info(self,response):
# todo:
# 定义请求间隔时间(秒)
interval = 2
# 设置请求头,模拟浏览器访问
# 要爬取的小说主页链接
url = 'https://www.bqg221.com/xs/17931/'
# 发起请求并获取响应
response = requests.get(url, headers=self.get_user_agent())
# 将响应转换为BeautifulSoup对象
soup = BeautifulSoup(response.text, 'html.parser')
# 获取小说名字
title = soup.select_one('.book h1').get_text(strip=True)
print(title)
# 获取小说简介
# print(soup.select('.small')[0])
div_tag = soup.find('div', {'class': 'small'})
# print(div_tag)
all_span_tags = div_tag.find_all('span')
# print(all_span_tags)
author = all_span_tags[0].text.strip()[3:]
status = all_span_tags[1].text.strip()
update_time = all_span_tags[2].text.strip()
latest_update = all_span_tags[3].text.strip()
# for i in all_span_tags:
# print(i.text.strip())
intro = soup.select_one('.intro').get_text(strip=True)[:-6]
print(intro)
cover = soup.select_one('.cover img')['src']
# print(cover)
# 获取小说所有章节链接
chapter_urls = [url + i.get('href').split('/')[-1] for i in soup.select('.listmain a') if
i.get('href').split('/')[-1] != 'javascript:dd_show()']
# print(chapter_urls)
print('开始下载。。。')
# 停顿两秒
time.sleep(interval)
def __init__(self, name):
self.name = name
# 要爬取的小说主页链接
# self.novel_url = 'https://www.bqg221.com/xs/'
if __name__ == '__main__':
search_name = input('请输入要搜索的书籍名称: ')
if search_name:
my_object = Download_Novel(search_name)
response = my_object.search_novel()
print(response)
print('搜索到 ' + str(len(response)) + ' 个结果\n')
print('---------------------------------------\n')
for i, book in enumerate(response):
print(str(i)+' 书籍名称:' + book['articlename'] + '\n作者:' + book['author'] + '\n简介:' + book['intro'] + '...\n')
print('---------------------------------------')
print('---------------------------------------\n')
select_book = int(input(f'选择要下载的书籍序号(从0-{str(len(response)-1)}中选择)'))
# todo: 添加书籍下载处理
else:
exit(0)