import requests import parsel from tqdm import tqdm import csv import time header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" } def get_vidio(pageurl): # pageurl = "https://www.kirinjewelry2006.com/video/products-detail-967094" response = requests.get(url=pageurl, headers=header) if response.status_code == 200: res_text = response.text select = parsel.Selector(res_text) vidio_name = select.xpath('//*[@id="v620a34694dad7"]//h1/text()').get().split("-")[-1] # print(vidio_name) vidio_url = select.xpath('//*[@id="v620a34694dad7"]//img/@video-src').get() vidio_res = requests.get(url=vidio_url,headers=header).content with open('vidio\\{}.mp4'.format(vidio_name), 'wb') as file: file.write(vidio_res) print("{}视频已保存".format(vidio_name)) with open('vidio.csv', encoding="utf-8", mode='a', newline="") as f: csv_writer = csv.writer(f) csv_writer.writerow([vidio_name,]) time.sleep(5) else: print("无法获取视频") for page in tqdm(range(2,87)): print("正在爬取第{}页".format(page)) url = "https://www.kirinjewelry2006.com/products-list-{}".format(page) response = requests.get(url=url,headers=header).text select = parsel.Selector(response) lis = select.xpath('//*[@id="v6204665765bf2"]/div/div[2]/div[2]//div[1]/ul/li') for li in lis: href = li.xpath('div/a/@href').get() pageurl = "https://www.kirinjewelry2006.com" + href # print(pageurl) try: get_vidio(pageurl) except: continue