import json import random import time from bs4 import BeautifulSoup import requests class Download_Novel: def get_user_agent(self): # 定义多个User-Agent列表,每个列表中包含多个不同的User-Agent字符串 user_agents = [ [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.3', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'], [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'], [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Safari/604.1.38'] ] # 随机选择一个User-Agent列表 user_agent_list = random.choice(user_agents) # 从选定的User-Agent列表中随机选择一个User-Agent字符串 user_agent = random.choice(user_agent_list) return {'User-Agent': user_agent} def search_novel(self): # 定义请求间隔时间(秒) interval = 2 # hm_url = f'https://user.bqgso.cc/hm.html?&q={self.name}' # result = requests.get(hm_url, headers=self.get_user_agent()).text # print(result) # hm = result[2:-2] # print(hm) # 发起请求并获取响应 url = f'https://www.bqg221.com/user/search.html?q={self.name}' print(url) print('响应内容:',requests.get(url, headers=self.get_user_agent()).text) response = json.loads(requests.get(url, headers=self.get_user_agent()).text[1:-1]) print(response) # print(type(response)) for i, book in enumerate(response): # i['url_list'][:9] = 'https://www' # trans_url = book['url_list'].replace('https://m', 'https://www') print(type(book['url_list'])) trans_url ='https://www.bqg221.com' + str(book['url_list']) response[i]['url_list'] = trans_url # 返回一个json对象 return response def get_novel_info(self,response): # todo: # 定义请求间隔时间(秒) interval = 2 # 设置请求头,模拟浏览器访问 # 要爬取的小说主页链接 url = 'https://www.bqg221.com/xs/17931/' # 发起请求并获取响应 response = requests.get(url, headers=self.get_user_agent()) # 将响应转换为BeautifulSoup对象 soup = BeautifulSoup(response.text, 'html.parser') # 获取小说名字 title = soup.select_one('.book h1').get_text(strip=True) print(title) # 获取小说简介 # print(soup.select('.small')[0]) div_tag = soup.find('div', {'class': 'small'}) # print(div_tag) all_span_tags = div_tag.find_all('span') # print(all_span_tags) author = all_span_tags[0].text.strip()[3:] status = all_span_tags[1].text.strip() update_time = all_span_tags[2].text.strip() latest_update = all_span_tags[3].text.strip() # for i in all_span_tags: # print(i.text.strip()) intro = soup.select_one('.intro').get_text(strip=True)[:-6] print(intro) cover = soup.select_one('.cover img')['src'] # print(cover) # 获取小说所有章节链接 chapter_urls = [url + i.get('href').split('/')[-1] for i in soup.select('.listmain a') if i.get('href').split('/')[-1] != 'javascript:dd_show()'] # print(chapter_urls) print('开始下载。。。') # 停顿两秒 time.sleep(interval) def __init__(self, name): self.name = name # 要爬取的小说主页链接 # self.novel_url = 'https://www.bqg221.com/xs/' if __name__ == '__main__': search_name = input('请输入要搜索的书籍名称: ') if search_name: my_object = Download_Novel(search_name) response = my_object.search_novel() print(response) print('搜索到 ' + str(len(response)) + ' 个结果\n') print('---------------------------------------\n') for i, book in enumerate(response): print(str(i)+' 书籍名称:' + book['articlename'] + '\n作者:' + book['author'] + '\n简介:' + book['intro'] + '...\n') print('---------------------------------------') print('---------------------------------------\n') select_book = int(input(f'选择要下载的书籍序号(从0-{str(len(response)-1)}中选择):')) # todo: 添加书籍下载处理 else: exit(0)