学习最佳实践G4F中的编程技术:cookie读取和管理
GPT4Free项目源码地址:GitHub - xtekky/gpt4free: The official gpt4free repository | various collection of powerful language models
阅读这段代码,代码在:gpt4free/g4f/cookies.py at main · xtekky/gpt4free · GitHub
from __future__ import annotations
import os
import time
import json
try:
from platformdirs import user_config_dir
has_platformdirs = True
except ImportError:
has_platformdirs = False
try:
from browser_cookie3 import (
chrome, chromium, opera, opera_gx,
brave, edge, vivaldi, firefox,
_LinuxPasswordManager, BrowserCookieError
)
has_browser_cookie3 = True
except ImportError:
has_browser_cookie3 = False
from .typing import Dict, Cookies
from .errors import MissingRequirementsError
from . import debug
class CookiesConfig():
cookies: Dict[str, Cookies] = {}
cookies_dir: str = "./har_and_cookies"
DOMAINS = [
".bing.com",
".meta.ai",
".google.com",
"www.whiterabbitneo.com",
"huggingface.co",
"chat.reka.ai",
]
if has_browser_cookie3 and os.environ.get('DBUS_SESSION_BUS_ADDRESS') == "/dev/null":
_LinuxPasswordManager.get_password = lambda a, b: b"secret"
def get_cookies(domain_name: str = '', raise_requirements_error: bool = True, single_browser: bool = False) -> Dict[str, str]:
"""
Load cookies for a given domain from all supported browsers and cache the results.
Args:
domain_name (str): The domain for which to load cookies.
Returns:
Dict[str, str]: A dictionary of cookie names and values.
"""
if domain_name in CookiesConfig.cookies:
return CookiesConfig.cookies[domain_name]
cookies = load_cookies_from_browsers(domain_name, raise_requirements_error, single_browser)
CookiesConfig.cookies[domain_name] = cookies
return cookies
def set_cookies(domain_name: str, cookies: Cookies = None) -> None:
if cookies:
CookiesConfig.cookies[domain_name] = cookies
elif domain_name in CookiesConfig.cookies:
CookiesConfig.cookies.pop(domain_name)
def load_cookies_from_browsers(domain_name: str, raise_requirements_error: bool = True, single_browser: bool = False) -> Cookies:
"""
Helper function to load cookies from various browsers.
Args:
domain_name (str): The domain for which to load cookies.
Returns:
Dict[str, str]: A dictionary of cookie names and values.
"""
if not has_browser_cookie3:
if raise_requirements_error:
raise MissingRequirementsError('Install "browser_cookie3" package')
return {}
cookies = {}
for cookie_fn in [_g4f, chrome, chromium, opera, opera_gx, brave, edge, vivaldi, firefox]:
try:
cookie_jar = cookie_fn(domain_name=domain_name)
if len(cookie_jar) and debug.logging:
print(f"Read cookies from {cookie_fn.__name__} for {domain_name}")
for cookie in cookie_jar:
if cookie.name not in cookies:
if not cookie.expires or cookie.expires > time.time():
cookies[cookie.name] = cookie.value
if single_browser and len(cookie_jar):
break
except BrowserCookieError:
pass
except Exception as e:
if debug.logging:
print(f"Error reading cookies from {cookie_fn.__name__} for {domain_name}: {e}")
return cookies
def set_cookies_dir(dir: str) -> None:
CookiesConfig.cookies_dir = dir
def get_cookies_dir() -> str:
return CookiesConfig.cookies_dir
def read_cookie_files(dirPath: str = None):
def get_domain(v: dict) -> str:
host = [h["value"] for h in v['request']['headers'] if h["name"].lower() in ("host", ":authority")]
if not host:
return
host = host.pop()
for d in DOMAINS:
if d in host:
return d
harFiles = []
cookieFiles = []
for root, dirs, files in os.walk(CookiesConfig.cookies_dir if dirPath is None else dirPath):
for file in files:
if file.endswith(".har"):
harFiles.append(os.path.join(root, file))
elif file.endswith(".json"):
cookieFiles.append(os.path.join(root, file))
CookiesConfig.cookies = {}
for path in harFiles:
with open(path, 'rb') as file:
try:
harFile = json.load(file)
except json.JSONDecodeError:
# Error: not a HAR file!
continue
if debug.logging:
print("Read .har file:", path)
new_cookies = {}
for v in harFile['log']['entries']:
domain = get_domain(v)
if domain is None:
continue
v_cookies = {}
for c in v['request']['cookies']:
v_cookies[c['name']] = c['value']
if len(v_cookies) > 0:
CookiesConfig.cookies[domain] = v_cookies
new_cookies[domain] = len(v_cookies)
if debug.logging:
for domain, new_values in new_cookies.items():
print(f"Cookies added: {new_values} from {domain}")
for path in cookieFiles:
with open(path, 'rb') as file:
try:
cookieFile = json.load(file)
except json.JSONDecodeError:
# Error: not a json file!
continue
if not isinstance(cookieFile, list):
continue
if debug.logging:
print("Read cookie file:", path)
new_cookies = {}
for c in cookieFile:
if isinstance(c, dict) and "domain" in c:
if c["domain"] not in new_cookies:
new_cookies[c["domain"]] = {}
new_cookies[c["domain"]][c["name"]] = c["value"]
for domain, new_values in new_cookies.items():
if debug.logging:
print(f"Cookies added: {len(new_values)} from {domain}")
CookiesConfig.cookies[domain] = new_values
def _g4f(domain_name: str) -> list:
"""
Load cookies from the 'g4f' browser (if exists).
Args:
domain_name (str): The domain for which to load cookies.
Returns:
list: List of cookies.
"""
if not has_platformdirs:
return []
user_data_dir = user_config_dir("g4f")
cookie_file = os.path.join(user_data_dir, "Default", "Cookies")
return [] if not os.path.exists(cookie_file) else chrome(cookie_file, domain_name)
代码解读
这段代码是一个用于加载和管理浏览器cookies的Python模块。它支持从多种浏览器(如Chrome、Chromium、Firefox等)中读取cookies,并能够将它们存储在内存中以便于后续使用。同时,它还支持从HAR(HTTP Archive)文件和JSON格式的cookie文件中读取cookies。以下是代码的详细解读:
导入模块
- 导入
os
、time
、json
等标准库模块。 - 尝试导入
platformdirs
和browser_cookie3
库,如果导入失败,则设置相应的标志变量为False
。 - 从当前包的其他模块导入类型定义、错误类和调试功能。
类和全局变量
CookiesConfig
类:用于存储和管理cookies的配置,包括一个字典cookies
用于缓存加载的cookies,以及一个字符串cookies_dir
用于指定存储HAR和cookie文件的目录。DOMAINS
列表:包含了一系列特定的域名,用于在处理HAR和cookie文件时识别相关cookies。
函数
get_cookies
:根据域名加载cookies,如果已缓存则直接返回,否则从浏览器中加载并缓存。set_cookies
:设置或删除指定域名的cookies。load_cookies_from_browsers
:辅助函数,用于从各种支持的浏览器中加载cookies。set_cookies_dir
和get_cookies_dir
:设置和获取cookies目录。read_cookie_files
:从指定的目录(或默认目录)中读取HAR文件和JSON格式的cookie文件,并更新CookiesConfig.cookies
缓存。_g4f
:一个特定于“g4f”浏览器的cookies加载函数,如果platformdirs
库可用,则尝试从g4f浏览器的配置目录中加载cookies。
关键点
- 兼容性:代码通过检查
platformdirs
和browser_cookie3
库的存在性来处理兼容性问题,如果缺少这些库,则通过设置标志变量和抛出异常来通知用户。 - 缓存机制:使用
CookiesConfig.cookies
字典来缓存已加载的cookies,以避免重复加载。 - 灵活性:支持从多种浏览器和文件格式中读取cookies,提供了灵活的cookies管理方式。
- 错误处理:在加载cookies时,通过捕获异常来处理可能出现的错误,并提供了调试日志功能以便于问题排查。
使用场景
这段代码可以用于需要跨浏览器读取和管理cookies的场景,例如自动化测试、网络爬虫等。通过读取和设置浏览器的cookies,可以模拟用户的登录状态或绕过一些基于cookies的访问限制。
知识点解析
尝试导入某个模块时用异常处理(Exception Handling)
-
尝试导入:
try: from platformdirs import user_config_dir has_platformdirs = True
这部分代码尝试从
platformdirs
包中导入user_config_dir
函数。如果导入成功,那么user_config_dir
函数将被引入当前命名空间中,并且变量has_platformdirs
将被设置为True
,表示platformdirs
包是可用的。 -
异常处理:
except ImportError: has_platformdirs = False
如果在尝试导入user_config_dir
时发生了ImportError
(这通常意味着platformdirs
包没有安装,或者Python无法找到它),那么except
块中的代码将被执行。这里,它将变量has_platformdirs
设置为False
,表示platformdirs
包不可用。
这种写法的优点在于它允许程序在缺少某些依赖时仍然能够继续运行,而不是因为缺少依赖而完全崩溃。这对于编写需要兼容不同环境或配置的库和应用程序特别有用。
比如在一台MAC机执行上面命令,导入了user_config_dir,执行user_config_dir(),则可以拿到应用目录:
>>> user_config_dir()
'/Users/xxxxuser/Library/Application Support'
目录里面就存放了Cache、Cookie等各种信息。
在一台FreeBSD系统,执行user_config_dir()拿到的存盘目录则是.config
>>> user_config_dir()
'/home/xxuser/.config'
同样的,brower_cookie3 也使用了这种导入技术:
try:
from browser_cookie3 import (
chrome, chromium, opera, opera_gx,
brave, edge, vivaldi, firefox,
_LinuxPasswordManager, BrowserCookieError
)
has_browser_cookie3 = True
except ImportError:
has_browser_cookie3 = False
调试
在咨询文心一言的时候报错
将以上源码咨询文心一言的时候,会报错:http://www.whiterabbitneo.com 看起来您上传了一个空页面,请检查网址
原来文心一言使用了阅读助手,阅读助手在浏览http://www.whiterabbitneo.com 的时候看来拿到了空页面。。。。
但是其他域名就没有问题,代码里涉及的网址:
DOMAINS = [
".bing.com",
".meta.ai",
".google.com",
"www.whiterabbitneo.com",
"huggingface.co",
"chat.reka.ai",
]
原文地址:https://blog.csdn.net/skywalk8163/article/details/142531975
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!