自学内容网 自学内容网

一。虚拟浏览器方式(测试代码)

前置工作:

# 1.退出已经打开的chrome浏览器
# 2.桌面chrome浏览器图标右击属性修改目标为 C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222
# 3.访问随意一个列表页https://xxxx/en/category/collection/back-to-school/apparel.html
 

# coding:utf-8
# 1.退出已经打开的chrome浏览器
# 2.桌面chrome浏览器图标右击属性修改目标为 C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222
# 3.访问随意一个列表页https://www.champssports.com/en/category/collection/back-to-school/apparel.html

from playwright.async_api import Playwright, async_playwright, expect
from urllib.parse import urlparse


async def xxx():
    async with async_playwright() as diver:
        browser = await diver.chromium.connect_over_cdp('http://localhost:9222')
        context = browser.contexts[0]
        page = context.pages[0]
        print(page.url)
        good_list = await page.query_selector_all('div[class="SearchResults"]>ul>li')
        print("裤子")
        print(page.url)
        print(len(good_list))
        for good in good_list:
            print(await good.inner_html())
            # 商品详情页地址
            print(await (await good.query_selector(
                'div[class="ProductCard ProductCard--flexDirection ProductCardV3"] > a')).get_attribute('href'))
            # print(await (await good.query_selector('a[class="ProductCard-link"]')).get_attribute('href'))
            # 图片地址
            # 图片地址
            print(await (await good.query_selector("img")).get_attribute('src'))
            # 商品名称
            print(await (await good.query_selector('span[class="ProductName-primary"]')).text_content())
            # 商品价格
            span = await good.query_selector('span[class="ProductPrice-final"]')
            if not span:
                span = await good.query_selector('span[class="ProductPrice"] > span')
            print(await span.text_content())
        # 切换商品类型
        await (await(page.query_selector_all('div[role="toolbar"]>div')))[1].click()
        await page.wait_for_timeout(1000)
        await (await(page.query_selector_all('ul[aria-labelledby="MegaMenu-0"]>li>a')))[1].click()
        await page.wait_for_timeout(2000)
        print("鞋子")
        print(page.url)
        good_list = await page.query_selector_all('div[class="SearchResults"]>ul>li')
        for good in good_list:
            print(await good.inner_html())
            # 商品详情页地址
            url = await (await good.query_selector(
                'div[class="ProductCard ProductCard--flexDirection ProductCardV3"] > a')).get_attribute('href')
            sku = urlparse(url).path
            sku = f"{sku[:-3]}-{sku[-3:]}"
            print(url)
            print(sku)
            # print(await (await good.query_selector('a[class="ProductCard-link"]')).get_attribute('href'))
            # 图片地址
            # 图片地址
            print(await (await good.query_selector("img")).get_attribute('src'))
            # 商品名称
            print(await (await good.query_selector('span[class="ProductName-primary"]')).text_content())
            # 商品价格
            span = await good.query_selector('span[class="ProductPrice-final"]')
            if not span:
                span = await good.query_selector('span[class="ProductPrice"] > span')
            print(await span.text_content())


if __name__ == '__main__':
    import asyncio

    # asyncio.run(init_orm())
    asyncio.get_event_loop().run_until_complete(xxx())

运行效果


原文地址:https://blog.csdn.net/u010136741/article/details/140734741

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!