一。虚拟浏览器方式(测试代码)
前置工作:
# 1.退出已经打开的chrome浏览器
# 2.桌面chrome浏览器图标右击属性修改目标为 C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222
# 3.访问随意一个列表页https://xxxx/en/category/collection/back-to-school/apparel.html
# coding:utf-8
# 1.退出已经打开的chrome浏览器
# 2.桌面chrome浏览器图标右击属性修改目标为 C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222
# 3.访问随意一个列表页https://www.champssports.com/en/category/collection/back-to-school/apparel.html
from playwright.async_api import Playwright, async_playwright, expect
from urllib.parse import urlparse
async def xxx():
async with async_playwright() as diver:
browser = await diver.chromium.connect_over_cdp('http://localhost:9222')
context = browser.contexts[0]
page = context.pages[0]
print(page.url)
good_list = await page.query_selector_all('div[class="SearchResults"]>ul>li')
print("裤子")
print(page.url)
print(len(good_list))
for good in good_list:
print(await good.inner_html())
# 商品详情页地址
print(await (await good.query_selector(
'div[class="ProductCard ProductCard--flexDirection ProductCardV3"] > a')).get_attribute('href'))
# print(await (await good.query_selector('a[class="ProductCard-link"]')).get_attribute('href'))
# 图片地址
# 图片地址
print(await (await good.query_selector("img")).get_attribute('src'))
# 商品名称
print(await (await good.query_selector('span[class="ProductName-primary"]')).text_content())
# 商品价格
span = await good.query_selector('span[class="ProductPrice-final"]')
if not span:
span = await good.query_selector('span[class="ProductPrice"] > span')
print(await span.text_content())
# 切换商品类型
await (await(page.query_selector_all('div[role="toolbar"]>div')))[1].click()
await page.wait_for_timeout(1000)
await (await(page.query_selector_all('ul[aria-labelledby="MegaMenu-0"]>li>a')))[1].click()
await page.wait_for_timeout(2000)
print("鞋子")
print(page.url)
good_list = await page.query_selector_all('div[class="SearchResults"]>ul>li')
for good in good_list:
print(await good.inner_html())
# 商品详情页地址
url = await (await good.query_selector(
'div[class="ProductCard ProductCard--flexDirection ProductCardV3"] > a')).get_attribute('href')
sku = urlparse(url).path
sku = f"{sku[:-3]}-{sku[-3:]}"
print(url)
print(sku)
# print(await (await good.query_selector('a[class="ProductCard-link"]')).get_attribute('href'))
# 图片地址
# 图片地址
print(await (await good.query_selector("img")).get_attribute('src'))
# 商品名称
print(await (await good.query_selector('span[class="ProductName-primary"]')).text_content())
# 商品价格
span = await good.query_selector('span[class="ProductPrice-final"]')
if not span:
span = await good.query_selector('span[class="ProductPrice"] > span')
print(await span.text_content())
if __name__ == '__main__':
import asyncio
# asyncio.run(init_orm())
asyncio.get_event_loop().run_until_complete(xxx())
运行效果
原文地址:https://blog.csdn.net/u010136741/article/details/140734741
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!