You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.7 KiB
Python
49 lines
1.7 KiB
Python
import requests
|
|
import parsel
|
|
from tqdm import tqdm
|
|
import csv
|
|
import time
|
|
|
|
header = {
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
|
|
}
|
|
|
|
|
|
def get_vidio(pageurl):
|
|
# pageurl = "https://www.kirinjewelry2006.com/video/products-detail-967094"
|
|
response = requests.get(url=pageurl, headers=header)
|
|
if response.status_code == 200:
|
|
res_text = response.text
|
|
select = parsel.Selector(res_text)
|
|
vidio_name = select.xpath('//*[@id="v620a34694dad7"]//h1/text()').get().split("-")[-1]
|
|
# print(vidio_name)
|
|
vidio_url = select.xpath('//*[@id="v620a34694dad7"]//img/@video-src').get()
|
|
vidio_res = requests.get(url=vidio_url,headers=header).content
|
|
|
|
with open('vidio\\{}.mp4'.format(vidio_name), 'wb') as file:
|
|
file.write(vidio_res)
|
|
print("{}视频已保存".format(vidio_name))
|
|
|
|
with open('vidio.csv', encoding="utf-8", mode='a', newline="") as f:
|
|
csv_writer = csv.writer(f)
|
|
csv_writer.writerow([vidio_name,])
|
|
time.sleep(5)
|
|
else:
|
|
print("无法获取视频")
|
|
|
|
for page in tqdm(range(2,87)):
|
|
print("正在爬取第{}页".format(page))
|
|
url = "https://www.kirinjewelry2006.com/products-list-{}".format(page)
|
|
response = requests.get(url=url,headers=header).text
|
|
select = parsel.Selector(response)
|
|
|
|
lis = select.xpath('//*[@id="v6204665765bf2"]/div/div[2]/div[2]//div[1]/ul/li')
|
|
for li in lis:
|
|
href = li.xpath('div/a/@href').get()
|
|
pageurl = "https://www.kirinjewelry2006.com" + href
|
|
# print(pageurl)
|
|
try:
|
|
get_vidio(pageurl)
|
|
except:
|
|
continue
|