自学内容网 自学内容网

爬虫学习:毛毛组案例

pip install

  • pip install requests
  • pip install base64
  • pip install pycrytodome

URL

目标网站:https://www.maomaozu.com/#/build
工具网站:https://curlconverter.com/ 简便请求发送信息,使用方法不做过多说明
我使用 cURL (bash)

import requests
import base64
import json
import time
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
# 目标网站:https://www.maomaozu.com/#/build

cookies = {
    'PHPSESSID': 'up6gke39b9s01slbsio4fqj9en',
    'Hm_lvt_6cd598ca665714ffcd8aca3aafc5e0dc': '1713688780',
    'Hm_lpvt_6cd598ca665714ffcd8aca3aafc5e0dc': '1713689090',
    'SECKEY_ABVK': '5kQXirwIrjHWtO1RWF8YhXpwJvuVbZjj3OIK3M8kzGw%3D',
    'BMAP_SECKEY': 'jCbK4eShwM10v4L40RjDhvps7WmJvpqniC9qByzukM7Is2MnYn8yaE5PF_eFjSR8pcAnvaN1BGQGURFfyo0ENeRtnz_sJx1dJhRhzlxORuxDtFIjYKzdI288lJTjkfIFmbZNz8Pk4KBTj2BVK1AX-bBtqdT_eFiwysUElAgn7Ol3AQ9_Jkm9YoKeIyutC68xtI2fiWU8rbvDBQRDMs9NxA',
}

headers = {
    'Accept': '*/*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Connection': 'keep-alive',
    'Content-Type': 'application/json; charset=UTF-8',
    # 'Cookie': 'PHPSESSID=up6gke39b9s01slbsio4fqj9en; Hm_lvt_6cd598ca665714ffcd8aca3aafc5e0dc=1713688780; Hm_lpvt_6cd598ca665714ffcd8aca3aafc5e0dc=1713689090; SECKEY_ABVK=5kQXirwIrjHWtO1RWF8YhXpwJvuVbZjj3OIK3M8kzGw%3D; BMAP_SECKEY=jCbK4eShwM10v4L40RjDhvps7WmJvpqniC9qByzukM7Is2MnYn8yaE5PF_eFjSR8pcAnvaN1BGQGURFfyo0ENeRtnz_sJx1dJhRhzlxORuxDtFIjYKzdI288lJTjkfIFmbZNz8Pk4KBTj2BVK1AX-bBtqdT_eFiwysUElAgn7Ol3AQ9_Jkm9YoKeIyutC68xtI2fiWU8rbvDBQRDMs9NxA',
    'Origin': 'https://www.maomaozu.com',
    'Referer': 'https://www.maomaozu.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

timeStamp = int(time.time() * 1000) # 时间戳
for page in range(1,10):
    # 1.请求体加密
    data = {"Type":0,"page":page,"expire":timeStamp}
    data = json.dumps(data).encode() # 转为json字符串并编码为bytes

    # AES encryption
    key = '55b3b62613aef1a0'.encode()  
    iv = '55b3b62613aef1a0'.encode()  
    aes = AES.new(key, AES.MODE_CBC, iv) 

    # (1) 填充数据 满足AES加密格式
    data = pad(data, AES.block_size)
    
    # (2) 加密数据
    data = aes.encrypt(data)
    
    # (3) 对加密数据 base64 编码
    data = base64.b64encode(data).decode()

    # ========== 发送请求 ===========
    response = requests.post('https://www.maomaozu.com/index/build.json', cookies=cookies, headers=headers, data=data)

    # 2.响应数据解密
    data = response.text

    # (1) 对响应数据 base64 解码
    data = base64.b64decode(data)

    # AES decryption
    key = '0a1fea31626b3b55'.encode()
    iv = '0a1fea31626b3b55'.encode()
    aes = AES.new(key, AES.MODE_CBC, iv)

    # (2) 解密数据
    data = aes.decrypt(data)

    # (3) 去除填充 满足AES解密格式
    data = unpad(data, AES.block_size).decode()

    time.sleep(1)
    print(data)

学习前提

  • python基础
  • requests模块
  • js基础的了解
  • base64编码,了解一下原理
  • AES算法,会用代码也可以,原理了解一下

还有很多知识,不做补充


原文地址:https://blog.csdn.net/unravel_tom/article/details/138047650

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!