#coding = utf-8 import requests import parsel import os headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36" } def get_oneclass(url_class): response_page_class = requests.get(url=url_class,headers=headers) selector_class = parsel.Selector(response_page_class.text) # url_class_sorts = selector_class.css('.top_nav ul li ::attr(href)').getall() # for url_class_sort in url_class_sorts: dls = selector_class.css('.chanpin_list dl') for dl in dls: dl_url = dl.css('dt a ::attr(href)').get() global dl_name dl_name = dl.css('dt a ::attr(alt)').get() #创建文件夹名 if not os.path.exists('img\\' + dl_name): # 如果该路径下没有该文件夹 os.mkdir('img\\' + dl_name) get_onepeple(dl_url) def get_onepeple(url): response_page = requests.get(url=url,headers=headers) selector = parsel.Selector(response_page.text) imgs = selector.css('.neirong_body p strong img') for img in imgs: img_url = img.css('img ::attr(src)').get() img_name = img_url.split("/")[-1] # suffix = img_url.split(".")[-1] # print(img_url) response_page_1 = requests.get(url=img_url,headers=headers).content with open(f"img\\{dl_name}\\{img_name}", mode='wb') as f: f.write(response_page_1) print("保存完成:", img_name) url_list_1 = [] url_aepnu = "http://www.xunfangimg.com/aepnu/list_1.html" url_z7qnv_1 = 'http://www.xunfangimg.com/z7qnv/list_1.html' url_z7qnv_2 = 'http://www.xunfangimg.com/z7qnv/list_2.html' url_zp7sg_1 = 'http://www.xunfangimg.com/zp7sg/list_1.html' url_full_1 = "http://www.xunfangimg.com/" url_list_2 = [url_aepnu,url_z7qnv_1,url_z7qnv_2,url_zp7sg_1,url_full_1] response_full = requests.get(url=url_full_1,headers=headers) selector_full = parsel.Selector(response_full.text) lis_full = selector_full.css('.top_nav ul li ') for li_full in lis_full: url_class_1 = li_full.css('a ::attr(href)').get() # print(url_class_1) url_list_1.append(url_class_1) url_list = url_list_1 + url_list_2 # print(url_list) for url in url_list: print(url + "开始爬取" ) try: get_oneclass(url) except: print("爬取失败")