661 lines
22 KiB
Python
661 lines
22 KiB
Python
from __future__ import annotations
|
|
|
|
__all__ = ["set_curl_options", "not_set"]
|
|
|
|
|
|
import asyncio
|
|
import math
|
|
import queue
|
|
import warnings
|
|
from collections import Counter
|
|
from io import BytesIO
|
|
from json import dumps
|
|
from typing import TYPE_CHECKING, Any, Callable, Final, Literal, Optional, Union, cast
|
|
from urllib.parse import ParseResult, parse_qsl, quote, urlencode, urljoin, urlparse
|
|
|
|
from ..const import CurlHttpVersion, CurlOpt, CurlSslVersion
|
|
from ..curl import CURL_WRITEFUNC_ERROR, CurlMime
|
|
from ..utils import CurlCffiWarning
|
|
from .cookies import Cookies
|
|
from .exceptions import ImpersonateError, InvalidURL
|
|
from .headers import Headers
|
|
from .impersonate import (
|
|
TLS_CIPHER_NAME_MAP,
|
|
TLS_EC_CURVES_MAP,
|
|
TLS_VERSION_MAP,
|
|
ExtraFingerprints,
|
|
normalize_browser_type,
|
|
toggle_extension,
|
|
)
|
|
from .models import Request
|
|
|
|
if TYPE_CHECKING:
|
|
from ..curl import Curl
|
|
from .cookies import CookieTypes
|
|
from .headers import HeaderTypes
|
|
from .impersonate import BrowserTypeLiteral, ExtraFpDict
|
|
from .session import ProxySpec
|
|
|
|
|
|
HttpMethod = Literal[
|
|
"GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH", "QUERY"
|
|
]
|
|
|
|
SAFE_CHARS = set("!#$%&'()*+,/:;=?@[]~")
|
|
|
|
not_set: Final[Any] = object()
|
|
|
|
|
|
def is_absolute_url(url: str) -> bool:
|
|
"""Check if the provided url is an absolute url"""
|
|
parsed_url = urlparse(url)
|
|
return bool(parsed_url.scheme and parsed_url.hostname)
|
|
|
|
|
|
def quote_path_and_params(url: str, quote_str: str = ""):
|
|
safe = "".join(SAFE_CHARS - set(quote_str))
|
|
parsed_url = urlparse(url)
|
|
parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
|
|
encoded_get_args = urlencode(parsed_get_args, doseq=True, safe=safe)
|
|
return ParseResult(
|
|
parsed_url.scheme,
|
|
parsed_url.netloc,
|
|
quote(parsed_url.path, safe=safe),
|
|
parsed_url.params,
|
|
encoded_get_args,
|
|
parsed_url.fragment,
|
|
).geturl()
|
|
|
|
|
|
def update_url_params(url: str, params: Union[dict, list, tuple]) -> str:
|
|
"""Add URL query params to provided URL being aware of existing.
|
|
|
|
Args:
|
|
url: string of target URL
|
|
params: dict containing requested params to be added
|
|
|
|
Returns:
|
|
string with updated URL
|
|
|
|
>> url = 'http://stackoverflow.com/test?answers=true'
|
|
>> new_params = {'answers': False, 'data': ['some','values']}
|
|
>> update_url_params(url, new_params)
|
|
'http://stackoverflow.com/test?data=some&data=values&answers=false'
|
|
"""
|
|
# No need to unquote, since requote_uri will be called later.
|
|
parsed_url = urlparse(url)
|
|
|
|
# Extracting URL arguments from parsed URL, NOTE the result is a list, not dict
|
|
parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
|
|
|
|
# Merging URL arguments dict with new params
|
|
old_args_counter = Counter(x[0] for x in parsed_get_args)
|
|
if isinstance(params, dict):
|
|
params = list(params.items())
|
|
new_args_counter = Counter(x[0] for x in params)
|
|
for key, value in params:
|
|
# Bool and Dict values should be converted to json-friendly values
|
|
if isinstance(value, (bool, dict)):
|
|
value = dumps(value)
|
|
# 1 to 1 mapping, we have to search and update it.
|
|
if old_args_counter.get(key) == 1 and new_args_counter.get(key) == 1:
|
|
parsed_get_args = [
|
|
(x if x[0] != key else (key, value)) for x in parsed_get_args
|
|
]
|
|
else:
|
|
parsed_get_args.append((key, value))
|
|
|
|
# Converting URL argument to proper query string
|
|
encoded_get_args = urlencode(parsed_get_args, doseq=True)
|
|
|
|
# Creating new parsed result object based on provided with new
|
|
# URL arguments. Same thing happens inside of urlparse.
|
|
new_url = ParseResult(
|
|
parsed_url.scheme,
|
|
parsed_url.netloc,
|
|
parsed_url.path,
|
|
parsed_url.params,
|
|
encoded_get_args,
|
|
parsed_url.fragment,
|
|
).geturl()
|
|
|
|
return new_url
|
|
|
|
|
|
# Adapted from: https://github.com/psf/requests/blob/1ae6fc3137a11e11565ed22436aa1e77277ac98c/src%2Frequests%2Futils.py#L633-L682
|
|
# License: Apache 2.0
|
|
|
|
# The unreserved URI characters (RFC 3986)
|
|
UNRESERVED_SET = frozenset(
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
|
|
)
|
|
|
|
|
|
def unquote_unreserved(uri: str) -> str:
|
|
"""Un-escape any percent-escape sequences in a URI that are unreserved
|
|
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
|
|
"""
|
|
parts = uri.split("%")
|
|
for i in range(1, len(parts)):
|
|
h = parts[i][0:2]
|
|
if len(h) == 2 and h.isalnum():
|
|
try:
|
|
c = chr(int(h, 16))
|
|
except ValueError as e:
|
|
raise InvalidURL(f"Invalid percent-escape sequence: '{h}'") from e
|
|
|
|
if c in UNRESERVED_SET:
|
|
parts[i] = c + parts[i][2:]
|
|
else:
|
|
parts[i] = f"%{parts[i]}"
|
|
else:
|
|
parts[i] = f"%{parts[i]}"
|
|
return "".join(parts)
|
|
|
|
|
|
def requote_uri(uri: str) -> str:
|
|
"""Re-quote the given URI.
|
|
|
|
This function passes the given URI through an unquote/quote cycle to
|
|
ensure that it is fully and consistently quoted.
|
|
"""
|
|
safe_with_percent = "!#$%&'()*+,/:;=?@[]~|"
|
|
safe_without_percent = "!#$&'()*+,/:;=?@[]~|"
|
|
try:
|
|
# Unquote only the unreserved characters
|
|
# Then quote only illegal characters (do not quote reserved,
|
|
# unreserved, or '%')
|
|
return quote(unquote_unreserved(uri), safe=safe_with_percent)
|
|
except InvalidURL:
|
|
# We couldn't unquote the given URI, so let's try quoting it, but
|
|
# there may be unquoted '%'s in the URI. We need to make sure they're
|
|
# properly quoted so they do not cause issues elsewhere.
|
|
return quote(uri, safe=safe_without_percent)
|
|
|
|
|
|
# TODO: should we move this function to headers.py?
|
|
def update_header_line(
|
|
header_lines: list[str], key: str, value: str, replace: bool = False
|
|
):
|
|
"""Update header line list by key value pair."""
|
|
found = False
|
|
for idx, line in enumerate(header_lines):
|
|
if line.lower().startswith(key.lower() + ":"):
|
|
found = True
|
|
if replace:
|
|
header_lines[idx] = f"{key}: {value}"
|
|
break
|
|
if not found:
|
|
header_lines.append(f"{key}: {value}")
|
|
|
|
|
|
def peek_queue(q: queue.Queue, default=None):
|
|
try:
|
|
return q.queue[0]
|
|
except IndexError:
|
|
return default
|
|
|
|
|
|
def peek_aio_queue(q: asyncio.Queue, default=None):
|
|
try:
|
|
return q._queue[0] # type: ignore
|
|
except IndexError:
|
|
return default
|
|
|
|
|
|
def toggle_extensions_by_ids(curl: Curl, extension_ids):
|
|
# TODO: find a better representation, rather than magic numbers
|
|
default_enabled = {0, 51, 13, 43, 65281, 23, 10, 45, 35, 11, 16}
|
|
|
|
to_enable_ids = extension_ids - default_enabled
|
|
for ext_id in to_enable_ids:
|
|
toggle_extension(curl, ext_id, enable=True)
|
|
|
|
# print("to_enable: ", to_enable_ids)
|
|
|
|
to_disable_ids = default_enabled - extension_ids
|
|
for ext_id in to_disable_ids:
|
|
toggle_extension(curl, ext_id, enable=False)
|
|
|
|
# print("to_disable: ", to_disable_ids)
|
|
|
|
|
|
def set_ja3_options(curl: Curl, ja3: str, permute: bool = False):
|
|
"""
|
|
Detailed explanation: https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
|
|
"""
|
|
tls_version, ciphers, extensions, curves, curve_formats = ja3.split(",")
|
|
|
|
curl_tls_version = TLS_VERSION_MAP[int(tls_version)]
|
|
curl.setopt(CurlOpt.SSLVERSION, curl_tls_version | CurlSslVersion.MAX_DEFAULT)
|
|
assert curl_tls_version == CurlSslVersion.TLSv1_2, "Only TLS v1.2 works for now."
|
|
|
|
cipher_names = []
|
|
for cipher in ciphers.split("-"):
|
|
cipher_id = int(cipher)
|
|
cipher_name = TLS_CIPHER_NAME_MAP[cipher_id]
|
|
cipher_names.append(cipher_name)
|
|
|
|
curl.setopt(CurlOpt.SSL_CIPHER_LIST, ":".join(cipher_names))
|
|
|
|
if extensions.endswith("-21"):
|
|
extensions = extensions[:-3]
|
|
warnings.warn(
|
|
"Padding(21) extension found in ja3 string, whether to add it should "
|
|
"be managed by the SSL engine. The TLS client hello packet may contain "
|
|
"or not contain this extension, any of which should be correct.",
|
|
CurlCffiWarning,
|
|
stacklevel=1,
|
|
)
|
|
extension_ids = set(int(e) for e in extensions.split("-"))
|
|
toggle_extensions_by_ids(curl, extension_ids)
|
|
|
|
if not permute:
|
|
curl.setopt(CurlOpt.TLS_EXTENSION_ORDER, extensions)
|
|
|
|
curve_names = []
|
|
for curve in curves.split("-"):
|
|
curve_id = int(curve)
|
|
curve_name = TLS_EC_CURVES_MAP[curve_id]
|
|
curve_names.append(curve_name)
|
|
|
|
curl.setopt(CurlOpt.SSL_EC_CURVES, ":".join(curve_names))
|
|
|
|
assert int(curve_formats) == 0, "Only curve_formats == 0 is supported."
|
|
|
|
|
|
def set_akamai_options(curl: Curl, akamai: str):
|
|
"""
|
|
Detailed explanation: https://www.blackhat.com/docs/eu-17/materials/eu-17-Shuster-Passive-Fingerprinting-Of-HTTP2-Clients-wp.pdf
|
|
"""
|
|
settings, window_update, streams, header_order = akamai.split("|")
|
|
|
|
# For compatiblity with tls.peet.ws
|
|
settings = settings.replace(",", ";")
|
|
|
|
curl.setopt(CurlOpt.HTTP_VERSION, CurlHttpVersion.V2_0)
|
|
|
|
curl.setopt(CurlOpt.HTTP2_SETTINGS, settings)
|
|
curl.setopt(CurlOpt.HTTP2_WINDOW_UPDATE, int(window_update))
|
|
|
|
if streams != "0":
|
|
curl.setopt(CurlOpt.HTTP2_STREAMS, streams)
|
|
|
|
# m,a,s,p -> masp
|
|
# curl-impersonate only accepts masp format, without commas.
|
|
curl.setopt(CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, header_order.replace(",", ""))
|
|
|
|
|
|
def set_extra_fp(curl: Curl, fp: ExtraFingerprints):
|
|
if fp.tls_signature_algorithms:
|
|
curl.setopt(CurlOpt.SSL_SIG_HASH_ALGS, ",".join(fp.tls_signature_algorithms))
|
|
|
|
curl.setopt(CurlOpt.SSLVERSION, fp.tls_min_version | CurlSslVersion.MAX_DEFAULT)
|
|
curl.setopt(CurlOpt.TLS_GREASE, int(fp.tls_grease))
|
|
curl.setopt(CurlOpt.SSL_PERMUTE_EXTENSIONS, int(fp.tls_permute_extensions))
|
|
curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, fp.tls_cert_compression)
|
|
curl.setopt(CurlOpt.STREAM_WEIGHT, fp.http2_stream_weight)
|
|
curl.setopt(CurlOpt.STREAM_EXCLUSIVE, fp.http2_stream_exclusive)
|
|
|
|
|
|
def set_curl_options(
|
|
curl: Curl,
|
|
method: HttpMethod,
|
|
url: str,
|
|
*,
|
|
params_list: list[Union[dict, list, tuple, None]] = [], # noqa: B006
|
|
base_url: Optional[str] = None,
|
|
data: Optional[Union[dict[str, str], list[tuple], str, BytesIO, bytes]] = None,
|
|
json: Optional[dict | list] = None,
|
|
headers_list: list[Optional[HeaderTypes]] = [], # noqa: B006
|
|
cookies_list: list[Optional[CookieTypes]] = [], # noqa: B006
|
|
files: Optional[dict] = None,
|
|
auth: Optional[tuple[str, str]] = None,
|
|
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
|
|
allow_redirects: Optional[bool] = True,
|
|
max_redirects: Optional[int] = 30,
|
|
proxies_list: list[Optional[ProxySpec]] = [], # noqa: B006
|
|
proxy: Optional[str] = None,
|
|
proxy_auth: Optional[tuple[str, str]] = None,
|
|
verify_list: list[Union[bool, str, None]] = [], # noqa: B006
|
|
referer: Optional[str] = None,
|
|
accept_encoding: Optional[str] = "gzip, deflate, br, zstd",
|
|
content_callback: Optional[Callable] = None,
|
|
impersonate: Optional[Union[BrowserTypeLiteral, str]] = None,
|
|
ja3: Optional[str] = None,
|
|
akamai: Optional[str] = None,
|
|
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
|
|
default_headers: bool = True,
|
|
quote: Union[str, Literal[False]] = "",
|
|
http_version: Optional[CurlHttpVersion] = None,
|
|
interface: Optional[str] = None,
|
|
cert: Optional[Union[str, tuple[str, str]]] = None,
|
|
stream: Optional[bool] = None,
|
|
max_recv_speed: int = 0,
|
|
multipart: Optional[CurlMime] = None,
|
|
queue_class: Any = None,
|
|
event_class: Any = None,
|
|
curl_options: Optional[dict[CurlOpt, str]] = None,
|
|
):
|
|
c = curl
|
|
|
|
method = method.upper() # type: ignore
|
|
|
|
# method
|
|
if method == "POST":
|
|
c.setopt(CurlOpt.POST, 1)
|
|
elif method != "GET":
|
|
c.setopt(CurlOpt.CUSTOMREQUEST, method.encode())
|
|
if method == "HEAD":
|
|
c.setopt(CurlOpt.NOBODY, 1)
|
|
|
|
# url
|
|
base_params, params = params_list
|
|
if base_params:
|
|
url = update_url_params(url, base_params)
|
|
if params:
|
|
url = update_url_params(url, params)
|
|
if base_url:
|
|
url = urljoin(base_url, url)
|
|
if quote:
|
|
url = quote_path_and_params(url, quote_str=quote)
|
|
if quote is not False:
|
|
url = requote_uri(url)
|
|
c.setopt(CurlOpt.URL, url.encode())
|
|
|
|
# data/body/json
|
|
if isinstance(data, (dict, list, tuple)):
|
|
body = urlencode(data).encode()
|
|
elif isinstance(data, str):
|
|
body = data.encode()
|
|
elif isinstance(data, BytesIO):
|
|
body = data.read()
|
|
elif isinstance(data, bytes):
|
|
body = data
|
|
elif data is None:
|
|
body = b""
|
|
else:
|
|
raise TypeError("data must be dict/list/tuple, str, BytesIO or bytes")
|
|
if json is not None:
|
|
body = dumps(json, separators=(",", ":")).encode()
|
|
|
|
# Tell libcurl to be aware of bodies and related headers when,
|
|
# 1. POST/PUT/PATCH, even if the body is empty, it's up to curl to decide what to do
|
|
# 2. GET/DELETE with body, although it's against the RFC, some applications.
|
|
# e.g. Elasticsearch, use this.
|
|
if body or method in ("POST", "PUT", "PATCH"):
|
|
c.setopt(CurlOpt.POSTFIELDS, body)
|
|
# necessary if body contains '\0'
|
|
c.setopt(CurlOpt.POSTFIELDSIZE, len(body))
|
|
if method == "GET":
|
|
c.setopt(CurlOpt.CUSTOMREQUEST, method)
|
|
|
|
# headers
|
|
base_headers, headers = headers_list
|
|
h = Headers(base_headers)
|
|
h.update(headers)
|
|
|
|
# remove Host header if it's unnecessary, otherwise curl may get confused.
|
|
# Host header will be automatically added by curl if it's not present.
|
|
# https://github.com/lexiforest/curl_cffi/issues/119
|
|
host_header = h.get("Host")
|
|
if host_header is not None:
|
|
u = urlparse(url)
|
|
if host_header == u.netloc or host_header == u.hostname:
|
|
h.pop("Host", None)
|
|
|
|
# Make curl always include empty headers.
|
|
# See: https://stackoverflow.com/a/32911474/1061155
|
|
header_lines = []
|
|
for k, v in h.multi_items():
|
|
if v is None:
|
|
header_lines.append(f"{k}:") # Explictly disable this header
|
|
elif v == "":
|
|
header_lines.append(f"{k};") # Add an empty valued header
|
|
else:
|
|
header_lines.append(f"{k}: {v}")
|
|
|
|
# Add content-type if missing
|
|
if json is not None:
|
|
update_header_line(header_lines, "Content-Type", "application/json")
|
|
if isinstance(data, dict) and method != "POST":
|
|
update_header_line(
|
|
header_lines, "Content-Type", "application/x-www-form-urlencoded"
|
|
)
|
|
if isinstance(data, (str, bytes)):
|
|
update_header_line(header_lines, "Content-Type", "application/octet-stream")
|
|
|
|
# Never send `Expect` header.
|
|
update_header_line(header_lines, "Expect", "", replace=True)
|
|
|
|
c.setopt(CurlOpt.HTTPHEADER, [h.encode() for h in header_lines])
|
|
|
|
req = Request(url, h, method)
|
|
|
|
# cookies
|
|
c.setopt(CurlOpt.COOKIEFILE, b"") # always enable the curl cookie engine first
|
|
c.setopt(CurlOpt.COOKIELIST, "ALL") # remove all the old cookies first.
|
|
|
|
base_cookies, cookies = cookies_list
|
|
|
|
if base_cookies:
|
|
for morsel in base_cookies.get_cookies_for_curl(req): # type: ignore
|
|
curl.setopt(CurlOpt.COOKIELIST, morsel.to_curl_format())
|
|
if cookies:
|
|
temp_cookies = Cookies(cookies)
|
|
for morsel in temp_cookies.get_cookies_for_curl(req):
|
|
curl.setopt(CurlOpt.COOKIELIST, morsel.to_curl_format())
|
|
|
|
# files
|
|
if files:
|
|
raise NotImplementedError(
|
|
"files is not supported, use `multipart`. See examples here: "
|
|
"https://github.com/lexiforest/curl_cffi/blob/main/examples/upload.py"
|
|
)
|
|
|
|
# multipart
|
|
if multipart:
|
|
# multipart will overrides postfields
|
|
for k, v in cast(dict, data or {}).items():
|
|
multipart.addpart(name=k, data=v.encode() if isinstance(v, str) else v)
|
|
c.setopt(CurlOpt.MIMEPOST, multipart._form)
|
|
|
|
# auth
|
|
if auth:
|
|
username, password = auth
|
|
c.setopt(CurlOpt.USERNAME, username.encode()) # pyright: ignore [reportPossiblyUnboundVariable=none]
|
|
c.setopt(CurlOpt.PASSWORD, password.encode()) # pyright: ignore [reportPossiblyUnboundVariable=none]
|
|
|
|
# timeout
|
|
if timeout is None:
|
|
timeout = 0 # indefinitely
|
|
|
|
if isinstance(timeout, tuple):
|
|
connect_timeout, read_timeout = timeout
|
|
all_timeout = connect_timeout + read_timeout
|
|
c.setopt(CurlOpt.CONNECTTIMEOUT_MS, int(connect_timeout * 1000))
|
|
if not stream:
|
|
c.setopt(CurlOpt.TIMEOUT_MS, int(all_timeout * 1000))
|
|
else:
|
|
# trick from: https://github.com/lexiforest/curl_cffi/issues/156
|
|
c.setopt(CurlOpt.LOW_SPEED_LIMIT, 1)
|
|
c.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(all_timeout))
|
|
|
|
elif isinstance(timeout, (int, float)):
|
|
if not stream:
|
|
c.setopt(CurlOpt.TIMEOUT_MS, int(timeout * 1000))
|
|
else:
|
|
c.setopt(CurlOpt.CONNECTTIMEOUT_MS, int(timeout * 1000))
|
|
c.setopt(CurlOpt.LOW_SPEED_LIMIT, 1)
|
|
c.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(timeout))
|
|
|
|
# allow_redirects
|
|
c.setopt(CurlOpt.FOLLOWLOCATION, int(allow_redirects)) # type: ignore
|
|
|
|
# max_redirects
|
|
c.setopt(CurlOpt.MAXREDIRS, max_redirects)
|
|
|
|
# proxies
|
|
base_proxies, proxies = proxies_list
|
|
if proxy and proxies:
|
|
raise TypeError("Cannot specify both 'proxy' and 'proxies'")
|
|
if proxy:
|
|
proxies = {"all": proxy}
|
|
if proxies is None:
|
|
proxies = base_proxies
|
|
|
|
if proxies:
|
|
parts = urlparse(url)
|
|
proxy = cast(Optional[str], proxies.get(parts.scheme, proxies.get("all")))
|
|
if parts.hostname:
|
|
proxy = (
|
|
proxies.get( # type: ignore
|
|
f"{parts.scheme}://{parts.hostname}",
|
|
proxies.get(f"all://{parts.hostname}"),
|
|
)
|
|
or proxy
|
|
)
|
|
|
|
if proxy is not None:
|
|
c.setopt(CurlOpt.PROXY, proxy)
|
|
|
|
if parts.scheme == "https":
|
|
if proxy.startswith("https://"):
|
|
warnings.warn(
|
|
"Make sure you are using https over https proxy, otherwise, "
|
|
"the proxy prefix should be 'http://' not 'https://', "
|
|
"see: https://github.com/lexiforest/curl_cffi/issues/6",
|
|
CurlCffiWarning,
|
|
stacklevel=2,
|
|
)
|
|
# For https site with http tunnel proxy, tell curl to enable tunneling
|
|
if not proxy.startswith("socks"):
|
|
c.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
|
|
|
|
# proxy_auth
|
|
if proxy_auth:
|
|
username, password = proxy_auth
|
|
c.setopt(CurlOpt.PROXYUSERNAME, username.encode())
|
|
c.setopt(CurlOpt.PROXYPASSWORD, password.encode())
|
|
|
|
# verify
|
|
base_verify, verify = verify_list
|
|
if verify is False or not base_verify and verify is None:
|
|
c.setopt(CurlOpt.SSL_VERIFYPEER, 0)
|
|
c.setopt(CurlOpt.SSL_VERIFYHOST, 0)
|
|
|
|
# cert for this single request
|
|
if isinstance(verify, str):
|
|
c.setopt(CurlOpt.CAINFO, verify)
|
|
|
|
# cert for the session
|
|
if verify in (None, True) and isinstance(base_verify, str):
|
|
c.setopt(CurlOpt.CAINFO, base_verify)
|
|
|
|
# referer
|
|
if referer:
|
|
c.setopt(CurlOpt.REFERER, referer.encode())
|
|
|
|
# accept_encoding
|
|
if accept_encoding is not None:
|
|
c.setopt(CurlOpt.ACCEPT_ENCODING, accept_encoding.encode())
|
|
|
|
# cert
|
|
if cert:
|
|
if isinstance(cert, str):
|
|
c.setopt(CurlOpt.SSLCERT, cert)
|
|
else:
|
|
cert, key = cert
|
|
c.setopt(CurlOpt.SSLCERT, cert)
|
|
c.setopt(CurlOpt.SSLKEY, key)
|
|
|
|
# impersonate
|
|
if impersonate:
|
|
impersonate = normalize_browser_type(impersonate)
|
|
ret = c.impersonate(impersonate, default_headers=default_headers) # type: ignore
|
|
if ret != 0:
|
|
raise ImpersonateError(f"Impersonating {impersonate} is not supported")
|
|
|
|
# ja3 string
|
|
if ja3:
|
|
if impersonate:
|
|
warnings.warn(
|
|
"JA3 was altered after browser version was set.",
|
|
CurlCffiWarning,
|
|
stacklevel=1,
|
|
)
|
|
permute = False
|
|
if isinstance(extra_fp, ExtraFingerprints) and extra_fp.tls_permute_extensions:
|
|
permute = True
|
|
if isinstance(extra_fp, dict) and extra_fp.get("tls_permute_extensions"):
|
|
permute = True
|
|
set_ja3_options(c, ja3, permute=permute)
|
|
|
|
# akamai string
|
|
if akamai:
|
|
if impersonate:
|
|
warnings.warn(
|
|
"Akamai was altered after browser version was set.",
|
|
CurlCffiWarning,
|
|
stacklevel=1,
|
|
)
|
|
set_akamai_options(c, akamai)
|
|
|
|
# extra_fp options
|
|
if extra_fp:
|
|
if isinstance(extra_fp, dict):
|
|
extra_fp = ExtraFingerprints(**extra_fp)
|
|
if impersonate:
|
|
warnings.warn(
|
|
"Extra fingerprints was altered after browser version was set.",
|
|
CurlCffiWarning,
|
|
stacklevel=1,
|
|
)
|
|
set_extra_fp(c, extra_fp)
|
|
|
|
# http_version, after impersonate, which will change this to http2
|
|
if http_version:
|
|
c.setopt(CurlOpt.HTTP_VERSION, http_version)
|
|
|
|
# set extra curl options, must come after impersonate, because it will alter some
|
|
# options
|
|
if curl_options:
|
|
for option, setting in curl_options.items():
|
|
c.setopt(option, setting)
|
|
|
|
buffer = None
|
|
q = None
|
|
header_recved = None
|
|
quit_now = None
|
|
if stream:
|
|
q = queue_class()
|
|
header_recved = event_class()
|
|
quit_now = event_class()
|
|
|
|
def qput(chunk):
|
|
if not header_recved.is_set():
|
|
header_recved.set()
|
|
if quit_now.is_set():
|
|
return CURL_WRITEFUNC_ERROR
|
|
q.put_nowait(chunk)
|
|
return len(chunk)
|
|
|
|
c.setopt(CurlOpt.WRITEFUNCTION, qput)
|
|
elif content_callback is not None:
|
|
c.setopt(CurlOpt.WRITEFUNCTION, content_callback)
|
|
else:
|
|
buffer = BytesIO()
|
|
c.setopt(CurlOpt.WRITEDATA, buffer)
|
|
header_buffer = BytesIO()
|
|
c.setopt(CurlOpt.HEADERDATA, header_buffer)
|
|
|
|
# interface
|
|
if interface:
|
|
c.setopt(CurlOpt.INTERFACE, interface.encode())
|
|
|
|
# max_recv_speed
|
|
# do not check, since 0 is a valid value to disable it
|
|
c.setopt(CurlOpt.MAX_RECV_SPEED_LARGE, max_recv_speed)
|
|
|
|
return req, buffer, header_buffer, q, header_recved, quit_now
|