Files
Buffteks-Website/venv/lib/python3.12/site-packages/curl_cffi/requests/session.py
2025-05-08 21:10:14 -05:00

1067 lines
40 KiB
Python

from __future__ import annotations
import asyncio
import queue
import sys
import threading
import warnings
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager, contextmanager, suppress
from io import BytesIO
from typing import (
TYPE_CHECKING,
Callable,
Generic,
Literal,
Optional,
TypedDict,
TypeVar,
Union,
cast,
)
from urllib.parse import urlparse
from ..aio import AsyncCurl
from ..const import CurlHttpVersion, CurlInfo, CurlOpt
from ..curl import Curl, CurlError, CurlMime
from ..utils import CurlCffiWarning
from .cookies import Cookies, CookieTypes, CurlMorsel
from .exceptions import RequestException, SessionClosed, code2error
from .headers import Headers, HeaderTypes
from .impersonate import BrowserTypeLiteral, ExtraFingerprints, ExtraFpDict
from .models import STREAM_END, Response
from .utils import not_set, set_curl_options
from .websockets import AsyncWebSocket, WebSocket
with suppress(ImportError):
import gevent
with suppress(ImportError):
import eventlet.tpool
# Added in 3.13: https://docs.python.org/3/library/typing.html#typing.TypeVar.__default__
if sys.version_info >= (3, 13):
R = TypeVar("R", bound=Response, default=Response)
else:
R = TypeVar("R", bound=Response)
if TYPE_CHECKING:
from typing_extensions import Unpack
class ProxySpec(TypedDict, total=False):
all: str
http: str
https: str
ws: str
wss: str
class BaseSessionParams(Generic[R], TypedDict, total=False):
headers: Optional[HeaderTypes]
cookies: Optional[CookieTypes]
auth: Optional[tuple[str, str]]
proxies: Optional[ProxySpec]
proxy: Optional[str]
proxy_auth: Optional[tuple[str, str]]
base_url: Optional[str]
params: Optional[dict]
verify: bool
timeout: Union[float, tuple[float, float]]
trust_env: bool
allow_redirects: bool
max_redirects: int
impersonate: Optional[BrowserTypeLiteral]
ja3: Optional[str]
akamai: Optional[str]
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]]
default_headers: bool
default_encoding: Union[str, Callable[[bytes], str]]
curl_options: Optional[dict]
curl_infos: Optional[list]
http_version: Optional[CurlHttpVersion]
debug: bool
interface: Optional[str]
cert: Optional[Union[str, tuple[str, str]]]
response_class: Optional[type[R]]
class StreamRequestParams(TypedDict, total=False):
params: Optional[Union[dict, list, tuple]]
data: Optional[Union[dict[str, str], list[tuple], str, BytesIO, bytes]]
json: Optional[dict | list]
headers: Optional[HeaderTypes]
cookies: Optional[CookieTypes]
files: Optional[dict]
auth: Optional[tuple[str, str]]
timeout: Optional[Union[float, tuple[float, float], object]]
allow_redirects: Optional[bool]
max_redirects: Optional[int]
proxies: Optional[ProxySpec]
proxy: Optional[str]
proxy_auth: Optional[tuple[str, str]]
verify: Optional[bool]
referer: Optional[str]
accept_encoding: Optional[str]
content_callback: Optional[Callable]
impersonate: Optional[BrowserTypeLiteral]
ja3: Optional[str]
akamai: Optional[str]
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]]
default_headers: Optional[bool]
default_encoding: Union[str, Callable[[bytes], str]]
quote: Union[str, Literal[False]]
http_version: Optional[CurlHttpVersion]
interface: Optional[str]
cert: Optional[Union[str, tuple[str, str]]]
max_recv_speed: int
multipart: Optional[CurlMime]
class RequestParams(StreamRequestParams, total=False):
stream: Optional[bool]
else:
class _Unpack:
@staticmethod
def __getitem__(*args, **kwargs):
pass
Unpack = _Unpack()
ProxySpec = dict[str, str]
BaseSessionParams = TypedDict
StreamRequestParams, RequestParams = TypedDict, TypedDict
ThreadType = Literal["eventlet", "gevent"]
HttpMethod = Literal[
"GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH", "QUERY"
]
def _is_absolute_url(url: str) -> bool:
"""Check if the provided url is an absolute url"""
parsed_url = urlparse(url)
return bool(parsed_url.scheme and parsed_url.hostname)
def _peek_queue(q: queue.Queue, default=None):
try:
return q.queue[0]
except IndexError:
return default
def _peek_aio_queue(q: asyncio.Queue, default=None):
try:
return q._queue[0] # type: ignore
except IndexError:
return default
class BaseSession(Generic[R]):
"""Provide common methods for setting curl options and reading info in sessions."""
def __init__(
self,
*,
headers: Optional[HeaderTypes] = None,
cookies: Optional[CookieTypes] = None,
auth: Optional[tuple[str, str]] = None,
proxies: Optional[ProxySpec] = None,
proxy: Optional[str] = None,
proxy_auth: Optional[tuple[str, str]] = None,
base_url: Optional[str] = None,
params: Optional[dict] = None,
verify: bool = True,
timeout: Union[float, tuple[float, float]] = 30,
trust_env: bool = True,
allow_redirects: bool = True,
max_redirects: int = 30,
impersonate: Optional[BrowserTypeLiteral] = None,
ja3: Optional[str] = None,
akamai: Optional[str] = None,
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
default_headers: bool = True,
default_encoding: Union[str, Callable[[bytes], str]] = "utf-8",
curl_options: Optional[dict] = None,
curl_infos: Optional[list] = None,
http_version: Optional[CurlHttpVersion] = None,
debug: bool = False,
interface: Optional[str] = None,
cert: Optional[Union[str, tuple[str, str]]] = None,
response_class: Optional[type[R]] = None,
):
self.headers = Headers(headers)
self._cookies = Cookies(cookies) # guarded by @property
self.auth = auth
self.base_url = base_url
self.params = params
self.verify = verify
self.timeout = timeout
self.trust_env = trust_env
self.allow_redirects = allow_redirects
self.max_redirects = max_redirects
self.impersonate = impersonate
self.ja3 = ja3
self.akamai = akamai
self.extra_fp = extra_fp
self.default_headers = default_headers
self.default_encoding = default_encoding
self.curl_options = curl_options or {}
self.curl_infos = curl_infos or []
self.http_version = http_version
self.debug = debug
self.interface = interface
self.cert = cert
if response_class is not None and issubclass(response_class, Response) is False:
raise TypeError(
"`response_class` must be a subclass of "
"`curl_cffi.requests.models.Response`, "
f"not of type `{response_class}`"
)
self.response_class = response_class or Response
if proxy and proxies:
raise TypeError("Cannot specify both 'proxy' and 'proxies'")
if proxy:
proxies = {"all": proxy}
self.proxies: ProxySpec = proxies or {}
self.proxy_auth = proxy_auth
if self.base_url and not _is_absolute_url(self.base_url):
raise ValueError("You need to provide an absolute url for 'base_url'")
self._closed = False
def _parse_response(self, curl, buffer, header_buffer, default_encoding) -> R:
c = curl
rsp = cast(R, self.response_class(c))
rsp.url = cast(bytes, c.getinfo(CurlInfo.EFFECTIVE_URL)).decode()
if buffer:
rsp.content = buffer.getvalue()
rsp.http_version = cast(int, c.getinfo(CurlInfo.HTTP_VERSION))
rsp.status_code = cast(int, c.getinfo(CurlInfo.RESPONSE_CODE))
rsp.ok = 200 <= rsp.status_code < 400
header_lines = header_buffer.getvalue().splitlines()
# TODO: history urls
header_list: list[bytes] = []
for header_line in header_lines:
if not header_line.strip():
continue
if header_line.startswith(b"HTTP/"):
# read header from last response
rsp.reason = c.get_reason_phrase(header_line).decode()
# empty header list for new redirected response
header_list = []
continue
if header_line.startswith(b" ") or header_line.startswith(b"\t"):
header_list[-1] += header_line
continue
header_list.append(header_line)
rsp.headers = Headers(header_list)
# cookies
morsels = [
CurlMorsel.from_curl_format(c) for c in c.getinfo(CurlInfo.COOKIELIST)
]
# for l in c.getinfo(CurlInfo.COOKIELIST):
# print("Curl Cookies", l.decode())
self._cookies.update_cookies_from_curl(morsels)
rsp.cookies = self._cookies
# print("Cookies after extraction", self._cookies)
rsp.primary_ip = cast(bytes, c.getinfo(CurlInfo.PRIMARY_IP)).decode()
rsp.primary_port = cast(int, c.getinfo(CurlInfo.PRIMARY_PORT))
rsp.local_ip = cast(bytes, c.getinfo(CurlInfo.LOCAL_IP)).decode()
rsp.local_port = cast(int, c.getinfo(CurlInfo.LOCAL_PORT))
rsp.default_encoding = default_encoding
rsp.elapsed = cast(float, c.getinfo(CurlInfo.TOTAL_TIME))
rsp.redirect_count = cast(int, c.getinfo(CurlInfo.REDIRECT_COUNT))
rsp.redirect_url = cast(bytes, c.getinfo(CurlInfo.REDIRECT_URL)).decode()
# custom info options
for info in self.curl_infos:
rsp.infos[info] = c.getinfo(info)
return rsp
def _check_session_closed(self):
if self._closed:
raise SessionClosed("Session is closed, cannot send request.")
@property
def cookies(self) -> Cookies:
return self._cookies
@cookies.setter
def cookies(self, cookies: CookieTypes) -> None:
# This ensures that the cookies property is always converted to Cookies.
self._cookies = Cookies(cookies)
class Session(BaseSession[R]):
"""A request session, cookies and connections will be reused. This object is
thread-safe, but it's recommended to use a separate session for each thread."""
def __init__(
self,
curl: Optional[Curl] = None,
thread: Optional[ThreadType] = None,
use_thread_local_curl: bool = True,
**kwargs: Unpack[BaseSessionParams[R]],
):
"""
Parameters set in the ``__init__`` method will be overriden by the same
parameter in request method.
Args:
curl: curl object to use in the session. If not provided, a new one will be
created. Also, a fresh curl object will always be created when accessed
from another thread.
thread: thread engine to use for working with other thread implementations.
choices: eventlet, gevent.
headers: headers to use in the session.
cookies: cookies to add in the session.
auth: HTTP basic auth, a tuple of (username, password), only basic auth is
supported.
proxies: dict of proxies to use, prefer to use proxy if they are the same.
format: ``{"http": proxy_url, "https": proxy_url}``.
proxy: proxy to use, format: "http://proxy_url".
Cannot be used with the above parameter.
proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
base_url: absolute url to use as base for relative urls.
params: query string for the session.
verify: whether to verify https certs.
timeout: how many seconds to wait before giving up.
trust_env: use http_proxy/https_proxy and other environments, default True.
allow_redirects: whether to allow redirection.
max_redirects: max redirect counts, default 30, use -1 for unlimited.
impersonate: which browser version to impersonate in the session.
ja3: ja3 string to impersonate in the session.
akamai: akamai string to impersonate in the session.
extra_fp: extra fingerprints options, in complement to ja3 and akamai str.
interface: which interface use.
default_encoding: encoding for decoding response content if charset is not
found in headers. Defaults to "utf-8". Can be set to a callable for
automatic detection.
cert: a tuple of (cert, key) filenames for client cert.
response_class: A customized subtype of ``Response`` to use.
Notes:
This class can be used as a context manager.
.. code-block:: python
from curl_cffi.requests import Session
with Session() as s:
r = s.get("https://example.com")
"""
super().__init__(**kwargs)
self._thread = thread
self._use_thread_local_curl = use_thread_local_curl
self._queue = None
self._executor = None
if use_thread_local_curl:
self._local = threading.local()
if curl:
self._is_customized_curl = True
self._local.curl = curl
else:
self._is_customized_curl = False
self._local.curl = Curl(debug=self.debug)
else:
self._curl = curl if curl else Curl(debug=self.debug)
@property
def curl(self):
if self._use_thread_local_curl:
if self._is_customized_curl:
warnings.warn(
"Creating fresh curl handle in different thread.",
CurlCffiWarning,
stacklevel=2,
)
if not getattr(self._local, "curl", None):
self._local.curl = Curl(debug=self.debug)
return self._local.curl
else:
return self._curl
@property
def executor(self):
if self._executor is None:
self._executor = ThreadPoolExecutor()
return self._executor
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def close(self) -> None:
"""Close the session."""
self._closed = True
self.curl.close()
@contextmanager
def stream(
self,
method: HttpMethod,
url: str,
**kwargs: Unpack[StreamRequestParams],
):
"""Equivalent to ``with request(..., stream=True) as r:``"""
rsp = self.request(method=method, url=url, **kwargs, stream=True)
try:
yield rsp
finally:
rsp.close()
def ws_connect(
self, url, on_message=None, on_error=None, on_open=None, on_close=None, **kwargs
) -> WebSocket:
"""Connects to a websocket url.
Note: This method is deprecated, use WebSocket instead.
Args:
url: the ws url to connect.
on_message: message callback, ``def on_message(ws, str)``
on_error: error callback, ``def on_error(ws, error)``
on_open: open callback, ``def on_open(ws)``
on_close: close callback, ``def on_close(ws)``
Other parameters are the same as ``.request``
Returns:
a WebSocket instance to communicate with the server.
"""
self._check_session_closed()
curl = self.curl.duphandle()
self.curl.reset()
ws = WebSocket(
curl=curl,
on_message=on_message,
on_error=on_error,
on_open=on_open,
on_close=on_close,
)
ws.connect(url, **kwargs)
return ws
def request(
self,
method: HttpMethod,
url: str,
params: Optional[Union[dict, list, tuple]] = None,
data: Optional[Union[dict[str, str], list[tuple], str, BytesIO, bytes]] = None,
json: Optional[dict | list] = None,
headers: Optional[HeaderTypes] = None,
cookies: Optional[CookieTypes] = None,
files: Optional[dict] = None,
auth: Optional[tuple[str, str]] = None,
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
allow_redirects: Optional[bool] = None,
max_redirects: Optional[int] = None,
proxies: Optional[ProxySpec] = None,
proxy: Optional[str] = None,
proxy_auth: Optional[tuple[str, str]] = None,
verify: Optional[bool] = None,
referer: Optional[str] = None,
accept_encoding: Optional[str] = "gzip, deflate, br",
content_callback: Optional[Callable] = None,
impersonate: Optional[BrowserTypeLiteral] = None,
ja3: Optional[str] = None,
akamai: Optional[str] = None,
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
default_headers: Optional[bool] = None,
default_encoding: Union[str, Callable[[bytes], str]] = "utf-8",
quote: Union[str, Literal[False]] = "",
http_version: Optional[CurlHttpVersion] = None,
interface: Optional[str] = None,
cert: Optional[Union[str, tuple[str, str]]] = None,
stream: Optional[bool] = None,
max_recv_speed: int = 0,
multipart: Optional[CurlMime] = None,
):
"""Send the request, see ``requests.request`` for details on parameters."""
self._check_session_closed()
# clone a new curl instance for streaming response
if stream:
c = self.curl.duphandle()
self.curl.reset()
else:
c = self.curl
req, buffer, header_buffer, q, header_recved, quit_now = set_curl_options(
c,
method=method,
url=url,
params_list=[self.params, params],
base_url=self.base_url,
data=data,
json=json,
headers_list=[self.headers, headers],
cookies_list=[self._cookies, cookies],
files=files,
auth=auth or self.auth,
timeout=self.timeout if timeout is not_set else timeout,
allow_redirects=self.allow_redirects
if allow_redirects is None
else allow_redirects,
max_redirects=self.max_redirects
if max_redirects is None
else max_redirects,
proxies_list=[self.proxies, proxies],
proxy=proxy,
proxy_auth=proxy_auth or self.proxy_auth,
verify_list=[self.verify, verify],
referer=referer,
accept_encoding=accept_encoding,
content_callback=content_callback,
impersonate=impersonate or self.impersonate,
ja3=ja3 or self.ja3,
akamai=akamai or self.akamai,
extra_fp=extra_fp or self.extra_fp,
default_headers=self.default_headers
if default_headers is None
else default_headers,
quote=quote,
http_version=http_version or self.http_version,
interface=interface or self.interface,
stream=stream,
max_recv_speed=max_recv_speed,
multipart=multipart,
cert=cert or self.cert,
curl_options=self.curl_options,
queue_class=queue.Queue,
event_class=threading.Event,
)
if stream:
header_parsed = threading.Event()
def perform():
try:
c.perform()
except CurlError as e:
rsp = self._parse_response(
c, buffer, header_buffer, default_encoding
)
rsp.request = req
q.put_nowait(RequestException(str(e), e.code, rsp)) # type: ignore
finally:
if not cast(threading.Event, header_recved).is_set():
cast(threading.Event, header_recved).set()
q.put(STREAM_END) # type: ignore
def cleanup(fut):
header_parsed.wait()
c.reset()
stream_task = self.executor.submit(perform)
stream_task.add_done_callback(cleanup)
# Wait for the first chunk
header_recved.wait() # type: ignore
rsp = self._parse_response(c, buffer, header_buffer, default_encoding)
header_parsed.set()
# Raise the exception if something wrong happens when receiving the header.
first_element = _peek_queue(q) # type: ignore
if isinstance(first_element, RequestException):
c.reset()
raise first_element
rsp.request = req
rsp.stream_task = stream_task
rsp.quit_now = quit_now
rsp.queue = q
return rsp
else:
try:
if self._thread == "eventlet":
# see: https://eventlet.net/doc/threading.html
eventlet.tpool.execute(c.perform) # type: ignore
elif self._thread == "gevent":
# see: https://www.gevent.org/api/gevent.threadpool.html
gevent.get_hub().threadpool.spawn(c.perform).get() # type: ignore
else:
c.perform()
except CurlError as e:
rsp = self._parse_response(c, buffer, header_buffer, default_encoding)
rsp.request = req
error = code2error(e.code, str(e))
raise error(str(e), e.code, rsp) from e
else:
rsp = self._parse_response(c, buffer, header_buffer, default_encoding)
rsp.request = req
return rsp
finally:
c.reset()
def head(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="HEAD", url=url, **kwargs)
def get(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="GET", url=url, **kwargs)
def post(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="POST", url=url, **kwargs)
def put(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="PUT", url=url, **kwargs)
def patch(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="PATCH", url=url, **kwargs)
def delete(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="DELETE", url=url, **kwargs)
def options(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="OPTIONS", url=url, **kwargs)
def trace(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="TRACE", url=url, **kwargs)
def query(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="QUERY", url=url, **kwargs)
class AsyncSession(BaseSession[R]):
"""An async request session, cookies and connections will be reused."""
def __init__(
self,
*,
loop=None,
async_curl: Optional[AsyncCurl] = None,
max_clients: int = 10,
**kwargs: Unpack[BaseSessionParams[R]],
):
"""
Parameters set in the ``__init__`` method will be override by the same parameter
in request method.
Parameters:
loop: loop to use, if not provided, the running loop will be used.
async_curl: [AsyncCurl](/api/curl_cffi#curl_cffi.AsyncCurl) object to use.
max_clients: maxmium curl handle to use in the session,
this will affect the concurrency ratio.
headers: headers to use in the session.
cookies: cookies to add in the session.
auth: HTTP basic auth, a tuple of (username, password), only basic auth is
supported.
proxies: dict of proxies to use, prefer to use ``proxy`` if they are the
same. format: ``{"http": proxy_url, "https": proxy_url}``.
proxy: proxy to use, format: "http://proxy_url".
Cannot be used with the above parameter.
proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
base_url: absolute url to use for relative urls.
params: query string for the session.
verify: whether to verify https certs.
timeout: how many seconds to wait before giving up.
trust_env: use http_proxy/https_proxy and other environments, default True.
allow_redirects: whether to allow redirection.
max_redirects: max redirect counts, default 30, use -1 for unlimited.
impersonate: which browser version to impersonate in the session.
ja3: ja3 string to impersonate in the session.
akamai: akamai string to impersonate in the session.
extra_fp: extra fingerprints options, in complement to ja3 and akamai str.
default_encoding: encoding for decoding response content if charset is not
found in headers. Defaults to "utf-8". Can be set to a callable for
automatic detection.
cert: a tuple of (cert, key) filenames for client cert.
response_class: A customized subtype of ``Response`` to use.
Notes:
This class can be used as a context manager, and it's recommended to use via
``async with``.
However, unlike aiohttp, it is not required to use ``with``.
.. code-block:: python
from curl_cffi.requests import AsyncSession
# recommended.
async with AsyncSession() as s:
r = await s.get("https://example.com")
s = AsyncSession() # it also works.
"""
super().__init__(**kwargs)
self._loop = loop
self._acurl = async_curl
self.max_clients = max_clients
self.init_pool()
@property
def loop(self):
if self._loop is None:
self._loop = asyncio.get_running_loop()
return self._loop
@property
def acurl(self):
if self._acurl is None:
self._acurl = AsyncCurl(loop=self.loop)
return self._acurl
def init_pool(self):
self.pool = asyncio.LifoQueue(self.max_clients)
while True:
try:
self.pool.put_nowait(None)
except asyncio.QueueFull:
break
async def pop_curl(self):
curl = await self.pool.get()
if curl is None:
curl = Curl(debug=self.debug)
# XXX: This may be related to proxy rotation
# curl.setopt(CurlOpt.FRESH_CONNECT, 1)
# curl.setopt(CurlOpt.FORBID_REUSE, 1)
return curl
def push_curl(self, curl):
with suppress(asyncio.QueueFull):
self.pool.put_nowait(curl)
async def __aenter__(self):
return self
async def __aexit__(self, *args):
await self.close()
return None
async def close(self) -> None:
"""Close the session."""
await self.acurl.close()
self._closed = True
while True:
try:
curl = self.pool.get_nowait()
if curl:
curl.close()
except asyncio.QueueEmpty:
break
def release_curl(self, curl):
curl.clean_after_perform()
if not self._closed:
self.acurl.remove_handle(curl)
curl.reset()
# curl.setopt(CurlOpt.PIPEWAIT, 1)
self.push_curl(curl)
else:
curl.close()
@asynccontextmanager
async def stream(
self,
method: HttpMethod,
url: str,
**kwargs: Unpack[StreamRequestParams],
):
"""Equivalent to ``async with request(..., stream=True) as r:``"""
rsp = await self.request(method=method, url=url, **kwargs, stream=True)
try:
yield rsp
finally:
await rsp.aclose()
async def ws_connect(
self,
url: str,
autoclose: bool = True,
params: Optional[Union[dict, list, tuple]] = None,
headers: Optional[HeaderTypes] = None,
cookies: Optional[CookieTypes] = None,
auth: Optional[tuple[str, str]] = None,
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
allow_redirects: Optional[bool] = None,
max_redirects: Optional[int] = None,
proxies: Optional[ProxySpec] = None,
proxy: Optional[str] = None,
proxy_auth: Optional[tuple[str, str]] = None,
verify: Optional[bool] = None,
referer: Optional[str] = None,
accept_encoding: Optional[str] = "gzip, deflate, br",
impersonate: Optional[BrowserTypeLiteral] = None,
ja3: Optional[str] = None,
akamai: Optional[str] = None,
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
default_headers: Optional[bool] = None,
quote: Union[str, Literal[False]] = "",
http_version: Optional[CurlHttpVersion] = None,
interface: Optional[str] = None,
cert: Optional[Union[str, tuple[str, str]]] = None,
max_recv_speed: int = 0,
) -> AsyncWebSocket:
"""Connects to a WebSocket.
Args:
url: url for the requests.
autoclose: whether to close the WebSocket after receiving a close frame.
params: query string for the requests.
headers: headers to send.
cookies: cookies to use.
auth: HTTP basic auth, a tuple of (username, password), only basic auth is
supported.
timeout: how many seconds to wait before giving up.
allow_redirects: whether to allow redirection.
max_redirects: max redirect counts, default 30, use -1 for unlimited.
proxies: dict of proxies to use, prefer to use ``proxy`` if they are the
same. format: ``{"http": proxy_url, "https": proxy_url}``.
proxy: proxy to use, format: "http://user@pass:proxy_url".
Can't be used with `proxies` parameter.
proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
verify: whether to verify https certs.
referer: shortcut for setting referer header.
accept_encoding: shortcut for setting accept-encoding header.
impersonate: which browser version to impersonate.
ja3: ja3 string to impersonate.
akamai: akamai string to impersonate.
extra_fp: extra fingerprints options, in complement to ja3 and akamai str.
default_headers: whether to set default browser headers.
quote: Set characters to be quoted, i.e. percent-encoded. Default safe
string is ``!#$%&'()*+,/:;=?@[]~``. If set to a sting, the character
will be removed from the safe string, thus quoted. If set to False, the
url will be kept as is, without any automatic percent-encoding, you must
encode the URL yourself.
curl_options: extra curl options to use.
http_version: limiting http version, defaults to http2.
interface: which interface to use.
cert: a tuple of (cert, key) filenames for client cert.
max_recv_speed: maximum receive speed, bytes per second.
"""
self._check_session_closed()
curl = await self.pop_curl()
set_curl_options(
curl=curl,
method="GET",
url=url,
base_url=self.base_url,
params_list=[self.params, params],
headers_list=[self.headers, headers],
cookies_list=[self.cookies, cookies],
auth=auth or self.auth,
timeout=self.timeout if timeout is not_set else timeout,
allow_redirects=self.allow_redirects
if allow_redirects is None
else allow_redirects,
max_redirects=self.max_redirects
if max_redirects is None
else max_redirects,
proxies_list=[self.proxies, proxies],
proxy=proxy,
proxy_auth=proxy_auth or self.proxy_auth,
verify_list=[self.verify, verify],
referer=referer,
accept_encoding=accept_encoding,
impersonate=impersonate or self.impersonate,
ja3=ja3 or self.ja3,
akamai=akamai or self.akamai,
extra_fp=extra_fp or self.extra_fp,
default_headers=self.default_headers
if default_headers is None
else default_headers,
quote=quote,
http_version=http_version or self.http_version,
interface=interface or self.interface,
max_recv_speed=max_recv_speed,
cert=cert or self.cert,
queue_class=asyncio.Queue,
event_class=asyncio.Event,
)
curl.setopt(CurlOpt.CONNECT_ONLY, 2) # https://curl.se/docs/websocket.html
await self.loop.run_in_executor(None, curl.perform)
return AsyncWebSocket(
cast(AsyncSession[Response], self),
curl,
autoclose=autoclose,
)
async def request(
self,
method: HttpMethod,
url: str,
params: Optional[Union[dict, list, tuple]] = None,
data: Optional[Union[dict[str, str], list[tuple], str, BytesIO, bytes]] = None,
json: Optional[dict | list] = None,
headers: Optional[HeaderTypes] = None,
cookies: Optional[CookieTypes] = None,
files: Optional[dict] = None,
auth: Optional[tuple[str, str]] = None,
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
allow_redirects: Optional[bool] = None,
max_redirects: Optional[int] = None,
proxies: Optional[ProxySpec] = None,
proxy: Optional[str] = None,
proxy_auth: Optional[tuple[str, str]] = None,
verify: Optional[bool] = None,
referer: Optional[str] = None,
accept_encoding: Optional[str] = "gzip, deflate, br",
content_callback: Optional[Callable] = None,
impersonate: Optional[BrowserTypeLiteral] = None,
ja3: Optional[str] = None,
akamai: Optional[str] = None,
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
default_headers: Optional[bool] = None,
default_encoding: Union[str, Callable[[bytes], str]] = "utf-8",
quote: Union[str, Literal[False]] = "",
http_version: Optional[CurlHttpVersion] = None,
interface: Optional[str] = None,
cert: Optional[Union[str, tuple[str, str]]] = None,
stream: Optional[bool] = None,
max_recv_speed: int = 0,
multipart: Optional[CurlMime] = None,
):
"""Send the request, see ``curl_cffi.requests.request`` for details on args."""
self._check_session_closed()
curl = await self.pop_curl()
req, buffer, header_buffer, q, header_recved, quit_now = set_curl_options(
curl=curl,
method=method,
url=url,
params_list=[self.params, params],
base_url=self.base_url,
data=data,
json=json,
headers_list=[self.headers, headers],
cookies_list=[self.cookies, cookies],
files=files,
auth=auth or self.auth,
timeout=self.timeout if timeout is not_set else timeout,
allow_redirects=self.allow_redirects
if allow_redirects is None
else allow_redirects,
max_redirects=self.max_redirects
if max_redirects is None
else max_redirects,
proxies_list=[self.proxies, proxies],
proxy=proxy,
proxy_auth=proxy_auth or self.proxy_auth,
verify_list=[self.verify, verify],
referer=referer,
accept_encoding=accept_encoding,
content_callback=content_callback,
impersonate=impersonate or self.impersonate,
ja3=ja3 or self.ja3,
akamai=akamai or self.akamai,
extra_fp=extra_fp or self.extra_fp,
default_headers=self.default_headers
if default_headers is None
else default_headers,
quote=quote,
http_version=http_version or self.http_version,
interface=interface or self.interface,
stream=stream,
max_recv_speed=max_recv_speed,
multipart=multipart,
cert=cert or self.cert,
curl_options=self.curl_options,
queue_class=asyncio.Queue,
event_class=asyncio.Event,
)
if stream:
task = self.acurl.add_handle(curl)
async def perform():
try:
await task
except CurlError as e:
rsp = self._parse_response(
curl, buffer, header_buffer, default_encoding
)
rsp.request = req
q.put_nowait(RequestException(str(e), e.code, rsp)) # type: ignore
finally:
if not cast(asyncio.Event, header_recved).is_set():
cast(asyncio.Event, header_recved).set()
await q.put(STREAM_END) # type: ignore
def cleanup(fut):
self.release_curl(curl)
stream_task = asyncio.create_task(perform())
stream_task.add_done_callback(cleanup)
await cast(asyncio.Event, header_recved).wait()
# Unlike threads, coroutines does not use preemptive scheduling.
# For asyncio, there is no need for a header_parsed event, the
# _parse_response will execute in the foreground, no background tasks
# running.
rsp = self._parse_response(curl, buffer, header_buffer, default_encoding)
first_element = _peek_aio_queue(q) # type: ignore
if isinstance(first_element, RequestException):
self.release_curl(curl)
raise first_element
rsp.request = req
rsp.astream_task = stream_task
rsp.quit_now = quit_now
rsp.queue = q
return rsp
else:
try:
task = self.acurl.add_handle(curl)
await task
except CurlError as e:
rsp = self._parse_response(
curl, buffer, header_buffer, default_encoding
)
rsp.request = req
error = code2error(e.code, str(e))
raise error(str(e), e.code, rsp) from e
else:
rsp = self._parse_response(
curl, buffer, header_buffer, default_encoding
)
rsp.request = req
return rsp
finally:
self.release_curl(curl)
def head(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="HEAD", url=url, **kwargs)
def get(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="GET", url=url, **kwargs)
def post(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="POST", url=url, **kwargs)
def put(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="PUT", url=url, **kwargs)
def patch(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="PATCH", url=url, **kwargs)
def delete(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="DELETE", url=url, **kwargs)
def options(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="OPTIONS", url=url, **kwargs)
def trace(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="TRACE", url=url, **kwargs)
def query(self, url: str, **kwargs: Unpack[RequestParams]):
return self.request(method="QUERY", url=url, **kwargs)