自如网租房价格反爬 CSS偏移
import re
import requests
from io import BytesIO
import ddddocr
from lxml import etree
def get_yellow_price(img_url):
response = requests.get(img_url).content
img_bytes = BytesIO(response)
ocr = ddddocr.DdddOcr(beta=True, show_ad=False)
yellow_price = ocr.classification(img_bytes.getvalue())
return yellow_price
def main():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
}
response = requests.get('https://www.ziroom.com/z/', headers=headers).text
# 黄色价格图片
img_url = 'https:' + re.search('//static8.ziroom.com/phoenix/pc/images/price/new-list/(.*?).png', response).group()
yellow_price = get_yellow_price(img_url)
html = etree.HTML(response)
div_list = html.xpath('//div[@class="Z_list-box"]/div') # 第5个div是广告
del div_list[4] # 删除索引为4的元素(第5个元素)
for div in div_list:
title = div.xpath('./div[3]/h5/a/text()')[0]
styles = div.xpath('.//div[3]/div[2]/div/span[position()>1]/@style')
price = ''
for style in styles:
pos = re.findall(r'background-position: -(.*?)px', style)[0]
# 红色字体价格 固定
if 'red.png' in style:
red_price = '8652039147' # 红色价格数字 不变
pos_price = red_price[int(float(pos)/20)]
# 黄色字体价格
else:
pos_price = yellow_price[int(float(pos)/21.4)]
price += pos_price
print(f"{title} ¥{price}/月")
if __name__ == "__main__":
main()
原文地址:https://blog.csdn.net/qq_44990881/article/details/142753033
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!