Files
Buffteks-Website/venv/lib/python3.12/site-packages/yfinance/data.py
2025-05-08 21:10:14 -05:00

394 lines
14 KiB
Python

import functools
from functools import lru_cache
from curl_cffi import requests
from bs4 import BeautifulSoup
import datetime
from frozendict import frozendict
from . import utils, cache
import threading
from .exceptions import YFRateLimitError
cache_maxsize = 64
def lru_cache_freezeargs(func):
"""
Decorator transforms mutable dictionary and list arguments into immutable types
Needed so lru_cache can cache method calls what has dict or list arguments.
"""
@functools.wraps(func)
def wrapped(*args, **kwargs):
args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args])
kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
args = tuple([tuple(arg) if isinstance(arg, list) else arg for arg in args])
kwargs = {k: tuple(v) if isinstance(v, list) else v for k, v in kwargs.items()}
return func(*args, **kwargs)
# copy over the lru_cache extra methods to this wrapper to be able to access them
# after this decorator has been applied
wrapped.cache_info = func.cache_info
wrapped.cache_clear = func.cache_clear
return wrapped
class SingletonMeta(type):
"""
Metaclass that creates a Singleton instance.
"""
_instances = {}
_lock = threading.Lock()
def __call__(cls, *args, **kwargs):
with cls._lock:
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs)
cls._instances[cls] = instance
else:
# Update the existing instance
if 'session' in kwargs or (args and len(args) > 0):
session = kwargs.get('session') if 'session' in kwargs else args[0]
cls._instances[cls]._set_session(session)
if 'proxy' in kwargs or (args and len(args) > 1):
proxy = kwargs.get('proxy') if 'proxy' in kwargs else args[1]
cls._instances[cls]._set_proxy(proxy)
return cls._instances[cls]
class YfData(metaclass=SingletonMeta):
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations.
Singleton means one session one cookie shared by all threads.
"""
def __init__(self, session=None, proxy=None):
self._crumb = None
self._cookie = None
# Default to using 'basic' strategy
self._cookie_strategy = 'basic'
# If it fails, then fallback method is 'csrf'
# self._cookie_strategy = 'csrf'
self._cookie_lock = threading.Lock()
self._session, self._proxy = None, None
self._set_session(session or requests.Session(impersonate="chrome"))
self._set_proxy(proxy)
def _set_session(self, session):
if session is None:
return
with self._cookie_lock:
self._session = session
if self._proxy is not None:
self._session.proxies = self._proxy
try:
self._session.cache
except AttributeError:
# Not caching
self._session_is_caching = False
else:
# Is caching. This is annoying.
# Can't simply use a non-caching session to fetch cookie & crumb,
# because then the caching-session won't have cookie.
self._session_is_caching = True
from requests_cache import DO_NOT_CACHE
self._expire_after = DO_NOT_CACHE
def _set_proxy(self, proxy=None):
with self._cookie_lock:
if proxy is not None:
proxy = {'http': proxy, 'https': proxy} if isinstance(proxy, str) else proxy
else:
proxy = {}
self._proxy = proxy
self._session.proxies = proxy
def _set_cookie_strategy(self, strategy, have_lock=False):
if strategy == self._cookie_strategy:
return
if not have_lock:
self._cookie_lock.acquire()
try:
if self._cookie_strategy == 'csrf':
utils.get_yf_logger().debug(f'toggling cookie strategy {self._cookie_strategy} -> basic')
self._session.cookies.clear()
self._cookie_strategy = 'basic'
else:
utils.get_yf_logger().debug(f'toggling cookie strategy {self._cookie_strategy} -> csrf')
self._cookie_strategy = 'csrf'
self._cookie = None
self._crumb = None
except Exception:
self._cookie_lock.release()
raise
if not have_lock:
self._cookie_lock.release()
def _save_session_cookies(self):
try:
cache.get_cookie_cache().store('csrf', self._session.cookies)
except Exception:
return False
return True
def _load_session_cookies(self):
cookie_dict = cache.get_cookie_cache().lookup('csrf')
if cookie_dict is None:
return False
# Periodically refresh, 24 hours seems fair.
if cookie_dict['age'] > datetime.timedelta(days=1):
return False
self._session.cookies.update(cookie_dict['cookie'])
utils.get_yf_logger().debug('loaded persistent cookie')
def _save_cookie_basic(self, cookie):
try:
cache.get_cookie_cache().store('basic', cookie)
except Exception:
return False
return True
def _load_cookie_basic(self):
cookie_dict = cache.get_cookie_cache().lookup('basic')
if cookie_dict is None:
return None
# Periodically refresh, 24 hours seems fair.
if cookie_dict['age'] > datetime.timedelta(days=1):
return None
utils.get_yf_logger().debug('loaded persistent cookie')
return cookie_dict['cookie']
def _get_cookie_basic(self, timeout=30):
# To avoid infinite recursion, do NOT use self.get()
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
self._session.get(
url='https://fc.yahoo.com',
timeout=timeout,
allow_redirects=True)
def _get_crumb_basic(self, timeout=30):
if self._crumb is not None:
utils.get_yf_logger().debug('reusing crumb')
return self._crumb
self._get_cookie_basic()
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
get_args = {
'url': "https://query1.finance.yahoo.com/v1/test/getcrumb",
'timeout': timeout,
'allow_redirects': True
}
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
crumb_response = self._session.get(**get_args)
else:
crumb_response = self._session.get(**get_args)
self._crumb = crumb_response.text
if self._crumb is None or '<html>' in self._crumb:
utils.get_yf_logger().debug("Didn't receive crumb")
return None
utils.get_yf_logger().debug(f"crumb = '{self._crumb}'")
return self._crumb
@utils.log_indent_decorator
def _get_cookie_and_crumb_basic(self, timeout):
self._get_cookie_basic(timeout)
crumb = self._get_crumb_basic(timeout)
return crumb
def _get_cookie_csrf(self, timeout):
if self._cookie is not None:
utils.get_yf_logger().debug('reusing cookie')
return True
elif self._load_session_cookies():
utils.get_yf_logger().debug('reusing persistent cookie')
self._cookie = True
return True
base_args = {
'timeout': timeout}
get_args = {**base_args, 'url': 'https://guce.yahoo.com/consent'}
try:
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
response = self._session.get(**get_args)
else:
response = self._session.get(**get_args)
except requests.exceptions.ChunkedEncodingError:
# No idea why happens, but handle nicely so can switch to other cookie method.
utils.get_yf_logger().debug('_get_cookie_csrf() encountering requests.exceptions.ChunkedEncodingError, aborting')
return False
soup = BeautifulSoup(response.content, 'html.parser')
csrfTokenInput = soup.find('input', attrs={'name': 'csrfToken'})
if csrfTokenInput is None:
utils.get_yf_logger().debug('Failed to find "csrfToken" in response')
return False
csrfToken = csrfTokenInput['value']
utils.get_yf_logger().debug(f'csrfToken = {csrfToken}')
sessionIdInput = soup.find('input', attrs={'name': 'sessionId'})
sessionId = sessionIdInput['value']
utils.get_yf_logger().debug(f"sessionId='{sessionId}")
originalDoneUrl = 'https://finance.yahoo.com/'
namespace = 'yahoo'
data = {
'agree': ['agree', 'agree'],
'consentUUID': 'default',
'sessionId': sessionId,
'csrfToken': csrfToken,
'originalDoneUrl': originalDoneUrl,
'namespace': namespace,
}
post_args = {**base_args,
'url': f'https://consent.yahoo.com/v2/collectConsent?sessionId={sessionId}',
'data': data}
get_args = {**base_args,
'url': f'https://guce.yahoo.com/copyConsent?sessionId={sessionId}',
'data': data}
try:
if self._session_is_caching:
post_args['expire_after'] = self._expire_after
get_args['expire_after'] = self._expire_after
self._session.post(**post_args)
self._session.get(**get_args)
else:
self._session.post(**post_args)
self._session.get(**get_args)
except requests.exceptions.ChunkedEncodingError:
# No idea why happens, but handle nicely so can switch to other cookie method.
utils.get_yf_logger().debug('_get_cookie_csrf() encountering requests.exceptions.ChunkedEncodingError, aborting')
self._cookie = True
self._save_session_cookies()
return True
@utils.log_indent_decorator
def _get_crumb_csrf(self, timeout=30):
# Credit goes to @bot-unit #1729
if self._crumb is not None:
utils.get_yf_logger().debug('reusing crumb')
return self._crumb
if not self._get_cookie_csrf(timeout):
# This cookie stored in session
return None
get_args = {
'url': 'https://query2.finance.yahoo.com/v1/test/getcrumb',
'timeout': timeout}
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
r = self._session.get(**get_args)
else:
r = self._session.get(**get_args)
self._crumb = r.text
if self._crumb is None or '<html>' in self._crumb or self._crumb == '':
utils.get_yf_logger().debug("Didn't receive crumb")
return None
utils.get_yf_logger().debug(f"crumb = '{self._crumb}'")
return self._crumb
@utils.log_indent_decorator
def _get_cookie_and_crumb(self, timeout=30):
crumb, strategy = None, None
utils.get_yf_logger().debug(f"cookie_mode = '{self._cookie_strategy}'")
with self._cookie_lock:
if self._cookie_strategy == 'csrf':
crumb = self._get_crumb_csrf()
if crumb is None:
# Fail
self._set_cookie_strategy('basic', have_lock=True)
crumb = self._get_cookie_and_crumb_basic(timeout)
else:
# Fallback strategy
crumb = self._get_cookie_and_crumb_basic(timeout)
if crumb is None:
# Fail
self._set_cookie_strategy('csrf', have_lock=True)
crumb = self._get_crumb_csrf()
strategy = self._cookie_strategy
return crumb, strategy
@utils.log_indent_decorator
def get(self, url, params=None, timeout=30):
return self._make_request(url, request_method = self._session.get, params=params, timeout=timeout)
@utils.log_indent_decorator
def post(self, url, body, params=None, timeout=30):
return self._make_request(url, request_method = self._session.post, body=body, params=params, timeout=timeout)
@utils.log_indent_decorator
def _make_request(self, url, request_method, body=None, params=None, timeout=30):
# Important: treat input arguments as immutable.
if len(url) > 200:
utils.get_yf_logger().debug(f'url={url[:200]}...')
else:
utils.get_yf_logger().debug(f'url={url}')
utils.get_yf_logger().debug(f'params={params}')
if params is None:
params = {}
if 'crumb' in params:
raise Exception("Don't manually add 'crumb' to params dict, let data.py handle it")
crumb, strategy = self._get_cookie_and_crumb()
if crumb is not None:
crumbs = {'crumb': crumb}
else:
crumbs = {}
request_args = {
'url': url,
'params': {**params, **crumbs},
'timeout': timeout
}
if body:
request_args['json'] = body
response = request_method(**request_args)
utils.get_yf_logger().debug(f'response code={response.status_code}')
if response.status_code >= 400:
# Retry with other cookie strategy
if strategy == 'basic':
self._set_cookie_strategy('csrf')
else:
self._set_cookie_strategy('basic')
crumb, strategy = self._get_cookie_and_crumb(timeout)
request_args['params']['crumb'] = crumb
response = request_method(**request_args)
utils.get_yf_logger().debug(f'response code={response.status_code}')
# Raise exception if rate limited
if response.status_code == 429:
raise YFRateLimitError()
return response
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def cache_get(self, url, params=None, timeout=30):
return self.get(url, params, timeout)
def get_raw_json(self, url, params=None, timeout=30):
utils.get_yf_logger().debug(f'get_raw_json(): {url}')
response = self.get(url, params=params, timeout=timeout)
response.raise_for_status()
return response.json()