主要是用了bs4 模块,进行了爬取。
纯自学,有很多不足的还请指正。
# -*- coding:utf-8 -*-# 爬取妹子图# url:/mianfei/meizitu/page/1import requestsfrom bs4 import BeautifulSoupimport osdef geturl(url):headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ""Chrome/78.0.3904.108 Safari/537.36"}r = requests.get(url=url, headers=headers, timeout=10)r.encoding = 'utf-8'# print(r.status_code) 检测响应值if r.status_code == 200:return r.textelse:print('地址连接不成功。错误码{}'.format(r.status_code))def save_jpg (main_title,main_url):path = "F:\\妹子图\\{}".format(main_title)folder = os.path.exists(path)if not folder:os.makedirs(path)print('文件夹创建完成%s' % path)else:print("文件夹已经存在")soup = BeautifulSoup(geturl(main_url), 'lxml')img_url_list = soup.find_all('img', attrs={'loading': 'lazy'})count = 1for img_url in img_url_list:img_src = img_url['src']p = requests.get(img_src)with open(path+'\\%s' % str(count)+'.jpg', 'wb')as f:f.write(p.content)f.close()count = count + 1print('图片保存完成。')def allurl():page = int(input('请输入你所需要爬取的页数:\n'))for i in range(0,page):url = '/mianfei/meizitu/page/{}'.format(i)# print(geturl(url))s = BeautifulSoup(geturl(url),'lxml')i_list =s.find_all('a',attrs={'class':'meta-title'})# print(i_list)for a in i_list:main_url = a['href'] #获取图片链接main_title = a.string[5:-8] #获取标题save_jpg (main_title,main_url)if __name__ == '__main__':allurl()