From ca8a652b38c4492506af4c709871ac7f7c82cb60 Mon Sep 17 00:00:00 2001 From: kirinjewelry Date: Wed, 28 Feb 2024 07:47:02 +0100 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20''?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Embrace affordable luxury with our [Gold Plated Jewelry Wholesale](https://www.kirinjewelrywholesale.com/collections/plating-plating). Immerse yourself in the radiance of meticulously designed pieces that bring a touch of glamour to your look, celebrating elegance without compromise. --- 宅男女神网.py | 56 ++++++++++++++++++++++++++++++++++++++ 美丽女人网.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 宅男女神网.py create mode 100644 美丽女人网.py diff --git a/宅男女神网.py b/宅男女神网.py new file mode 100644 index 0000000..ee8eda4 --- /dev/null +++ b/宅男女神网.py @@ -0,0 +1,56 @@ +#coding = utf-8 + +import requests +import parsel +import os + +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36" +} + +def get_onepage(url): + response = requests.get(url=url,headers=headers) + selector = parsel.Selector(response.text) + lis = selector.css('.listdiv ul li') + for li in lis: + fail_title = li.css('.galleryli_title a ::text').get() # 文件名 + href = li.css('.galleryli_title a ::attr(href)').get() # 详情页短地址 + # global href_full + href_full = "https://www.fnvshen.com/" + href # 进入详情页的完整地址 + print("正在准备下载" + href_full) + # print('正在下载相册:', fail_title) + + # 创建相册文件夹 + if not os.path.exists('img\\' + fail_title): # 如果该路径下没有该文件夹 + os.mkdir('img\\' + fail_title) + + try: + for page_url_num in range(1,101): + page_url = href_full + "{}.html".format(page_url_num) + response_page = requests.get(url=page_url,headers=headers) + selector_page = parsel.Selector(response_page.text) + imgs = selector_page.css(".gallery_wrapper ul#hgallery img") #获取详情页中单页的对象 + for img in imgs: + img_name = img.css('img ::attr(alt)').get() #获得图片储存名称 + img_href = img.css('img ::attr(src)').get() #图片下载地址 + # print(img_name,img_href) + suffix = img_href.split('/')[-1] #获取图片下载后缀 + response_page_1 = requests.get(url=img_href,headers=headers).content + with open(f'img\\{fail_title}\\{img_name}' + suffix, mode='wb') as f: + f.write(response_page_1) + print("保存完成:", img_name) + except: + print("该页已经是最后一页") + continue + + +# https://www.fnvshen.com/gallery/4.html +# https://www.fnvshen.com/gallery/3.html + +for page in range(1,101): + print("正在下载第{}页".format(page)) + url = "https://www.fnvshen.com/gallery/{}.html".format(page) + get_onepage(url) + + + diff --git a/美丽女人网.py b/美丽女人网.py new file mode 100644 index 0000000..a6bd381 --- /dev/null +++ b/美丽女人网.py @@ -0,0 +1,67 @@ +#coding = utf-8 + +import requests +import parsel +import os + + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36" +} + +def get_oneclass(url_class): + response_page_class = requests.get(url=url_class,headers=headers) + selector_class = parsel.Selector(response_page_class.text) + # url_class_sorts = selector_class.css('.top_nav ul li ::attr(href)').getall() + # for url_class_sort in url_class_sorts: + dls = selector_class.css('.chanpin_list dl') + for dl in dls: + dl_url = dl.css('dt a ::attr(href)').get() + global dl_name + dl_name = dl.css('dt a ::attr(alt)').get() #创建文件夹名 + if not os.path.exists('img\\' + dl_name): # 如果该路径下没有该文件夹 + os.mkdir('img\\' + dl_name) + get_onepeple(dl_url) + +def get_onepeple(url): + response_page = requests.get(url=url,headers=headers) + selector = parsel.Selector(response_page.text) + imgs = selector.css('.neirong_body p strong img') + for img in imgs: + img_url = img.css('img ::attr(src)').get() + img_name = img_url.split("/")[-1] + # suffix = img_url.split(".")[-1] + # print(img_url) + + response_page_1 = requests.get(url=img_url,headers=headers).content + with open(f"img\\{dl_name}\\{img_name}", mode='wb') as f: + f.write(response_page_1) + print("保存完成:", img_name) + + +url_list_1 = [] + +url_aepnu = "http://www.xunfangimg.com/aepnu/list_1.html" +url_z7qnv_1 = 'http://www.xunfangimg.com/z7qnv/list_1.html' +url_z7qnv_2 = 'http://www.xunfangimg.com/z7qnv/list_2.html' +url_zp7sg_1 = 'http://www.xunfangimg.com/zp7sg/list_1.html' +url_full_1 = "http://www.xunfangimg.com/" + +url_list_2 = [url_aepnu,url_z7qnv_1,url_z7qnv_2,url_zp7sg_1,url_full_1] + +response_full = requests.get(url=url_full_1,headers=headers) +selector_full = parsel.Selector(response_full.text) +lis_full = selector_full.css('.top_nav ul li ') +for li_full in lis_full: + url_class_1 = li_full.css('a ::attr(href)').get() + # print(url_class_1) + url_list_1.append(url_class_1) + +url_list = url_list_1 + url_list_2 +# print(url_list) +for url in url_list: + print(url + "开始爬取" ) + try: + get_oneclass(url) + except: + print("爬取失败") \ No newline at end of file