Files
Buffteks-Website/buffteks/lib/python3.12/site-packages/narwhals/utils.py
2025-05-08 21:10:14 -05:00

1756 lines
58 KiB
Python

from __future__ import annotations
import os
import re
from datetime import timezone
from enum import Enum
from enum import auto
from functools import wraps
from importlib.util import find_spec
from inspect import getattr_static
from secrets import token_hex
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Container
from typing import Iterable
from typing import Literal
from typing import Protocol
from typing import Sequence
from typing import TypeVar
from typing import Union
from typing import cast
from typing import overload
from warnings import warn
from narwhals.dependencies import get_cudf
from narwhals.dependencies import get_dask_dataframe
from narwhals.dependencies import get_duckdb
from narwhals.dependencies import get_ibis
from narwhals.dependencies import get_modin
from narwhals.dependencies import get_pandas
from narwhals.dependencies import get_polars
from narwhals.dependencies import get_pyarrow
from narwhals.dependencies import get_pyspark_sql
from narwhals.dependencies import get_sqlframe
from narwhals.dependencies import is_cudf_series
from narwhals.dependencies import is_modin_series
from narwhals.dependencies import is_pandas_dataframe
from narwhals.dependencies import is_pandas_like_dataframe
from narwhals.dependencies import is_pandas_like_series
from narwhals.dependencies import is_pandas_series
from narwhals.dependencies import is_polars_series
from narwhals.dependencies import is_pyarrow_chunked_array
from narwhals.exceptions import ColumnNotFoundError
from narwhals.exceptions import DuplicateError
from narwhals.exceptions import InvalidOperationError
if TYPE_CHECKING:
from types import ModuleType
from typing import AbstractSet as Set
import pandas as pd
import pyarrow as pa
from typing_extensions import LiteralString
from typing_extensions import ParamSpec
from typing_extensions import Self
from typing_extensions import TypeAlias
from typing_extensions import TypeIs
from narwhals._arrow.namespace import ArrowNamespace
from narwhals._compliant import CompliantExpr
from narwhals._compliant import CompliantExprT
from narwhals._compliant import CompliantFrameT
from narwhals._compliant import CompliantNamespace
from narwhals._compliant import CompliantSeriesOrNativeExprT_co
from narwhals._compliant import CompliantSeriesT
from narwhals._compliant import NativeFrameT_co
from narwhals._compliant import NativeSeriesT_co
from narwhals._dask.namespace import DaskNamespace
from narwhals._duckdb.namespace import DuckDBNamespace
from narwhals._pandas_like.namespace import PandasLikeNamespace
from narwhals._polars.namespace import PolarsNamespace
from narwhals._spark_like.namespace import SparkLikeNamespace
from narwhals._translate import ArrowStreamExportable
from narwhals._translate import IntoArrowTable
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.dtypes import DType
from narwhals.series import Series
from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame
from narwhals.typing import CompliantSeries
from narwhals.typing import DataFrameLike
from narwhals.typing import DTypes
from narwhals.typing import IntoSeriesT
from narwhals.typing import SizeUnit
from narwhals.typing import SupportsNativeNamespace
from narwhals.typing import TimeUnit
FrameOrSeriesT = TypeVar(
"FrameOrSeriesT", bound=Union[LazyFrame[Any], DataFrame[Any], Series[Any]]
)
_T = TypeVar("_T")
_T1 = TypeVar("_T1")
_T2 = TypeVar("_T2")
_T3 = TypeVar("_T3")
_Fn = TypeVar("_Fn", bound="Callable[..., Any]")
P = ParamSpec("P")
R = TypeVar("R")
_PandasLike: TypeAlias = (
"Literal[Implementation.PANDAS, Implementation.CUDF, Implementation.MODIN]"
)
_Arrow: TypeAlias = "Literal[Implementation.PYARROW]"
_Polars: TypeAlias = "Literal[Implementation.POLARS]"
_SparkLike: TypeAlias = "Literal[Implementation.PYSPARK, Implementation.SQLFRAME]"
_Dask: TypeAlias = "Literal[Implementation.DASK]"
_DuckDB: TypeAlias = "Literal[Implementation.DUCKDB]"
_EagerOnly: TypeAlias = "_PandasLike | _Arrow"
_EagerAllowed: TypeAlias = "_Polars | _EagerOnly"
_LazyOnly: TypeAlias = "_SparkLike | _Dask | _DuckDB"
_LazyAllowed: TypeAlias = "_Polars | _LazyOnly"
class _SupportsVersion(Protocol):
__version__: str
class _SupportsGet(Protocol): # noqa: PYI046
def __get__(self, instance: Any, owner: Any | None = None, /) -> Any: ...
class _StoresImplementation(Protocol):
_implementation: Implementation
"""Implementation of native object (pandas, Polars, PyArrow, ...)."""
class _StoresBackendVersion(Protocol):
_backend_version: tuple[int, ...]
"""Version tuple for a native package."""
class _StoresVersion(Protocol):
_version: Version
"""Narwhals API version (V1 or MAIN)."""
class _LimitedContext(_StoresBackendVersion, _StoresVersion, Protocol):
"""Provides 2 attributes.
- `_backend_version`
- `_version`
"""
class _FullContext(_StoresImplementation, _LimitedContext, Protocol):
"""Provides 3 attributes.
- `_implementation`
- `_backend_version`
- `_version`
"""
class _StoresColumns(Protocol):
@property
def columns(self) -> Sequence[str]: ...
NativeT_co = TypeVar("NativeT_co", covariant=True)
CompliantT_co = TypeVar("CompliantT_co", covariant=True)
class _StoresNative(Protocol[NativeT_co]): # noqa: PYI046
"""Provides access to a native object.
Native objects have types like:
>>> from pandas import Series
>>> from pyarrow import Table
"""
@property
def native(self) -> NativeT_co:
"""Return the native object."""
...
class _StoresCompliant(Protocol[CompliantT_co]): # noqa: PYI046
"""Provides access to a compliant object.
Compliant objects have types like:
>>> from narwhals._pandas_like.series import PandasLikeSeries
>>> from narwhals._arrow.dataframe import ArrowDataFrame
"""
@property
def compliant(self) -> CompliantT_co:
"""Return the compliant object."""
...
class Version(Enum):
V1 = auto()
MAIN = auto()
class Implementation(Enum):
"""Implementation of native object (pandas, Polars, PyArrow, ...)."""
PANDAS = auto()
"""Pandas implementation."""
MODIN = auto()
"""Modin implementation."""
CUDF = auto()
"""cuDF implementation."""
PYARROW = auto()
"""PyArrow implementation."""
PYSPARK = auto()
"""PySpark implementation."""
POLARS = auto()
"""Polars implementation."""
DASK = auto()
"""Dask implementation."""
DUCKDB = auto()
"""DuckDB implementation."""
IBIS = auto()
"""Ibis implementation."""
SQLFRAME = auto()
"""SQLFrame implementation."""
UNKNOWN = auto()
"""Unknown implementation."""
@classmethod
def from_native_namespace(
cls: type[Self], native_namespace: ModuleType
) -> Implementation: # pragma: no cover
"""Instantiate Implementation object from a native namespace module.
Arguments:
native_namespace: Native namespace.
Returns:
Implementation.
"""
mapping = {
get_pandas(): Implementation.PANDAS,
get_modin(): Implementation.MODIN,
get_cudf(): Implementation.CUDF,
get_pyarrow(): Implementation.PYARROW,
get_pyspark_sql(): Implementation.PYSPARK,
get_polars(): Implementation.POLARS,
get_dask_dataframe(): Implementation.DASK,
get_duckdb(): Implementation.DUCKDB,
get_ibis(): Implementation.IBIS,
get_sqlframe(): Implementation.SQLFRAME,
}
return mapping.get(native_namespace, Implementation.UNKNOWN)
@classmethod
def from_string(
cls: type[Self], backend_name: str
) -> Implementation: # pragma: no cover
"""Instantiate Implementation object from a native namespace module.
Arguments:
backend_name: Name of backend, expressed as string.
Returns:
Implementation.
"""
mapping = {
"pandas": Implementation.PANDAS,
"modin": Implementation.MODIN,
"cudf": Implementation.CUDF,
"pyarrow": Implementation.PYARROW,
"pyspark": Implementation.PYSPARK,
"polars": Implementation.POLARS,
"dask": Implementation.DASK,
"duckdb": Implementation.DUCKDB,
"ibis": Implementation.IBIS,
"sqlframe": Implementation.SQLFRAME,
}
return mapping.get(backend_name, Implementation.UNKNOWN)
@classmethod
def from_backend(
cls: type[Self], backend: str | Implementation | ModuleType
) -> Implementation:
"""Instantiate from native namespace module, string, or Implementation.
Arguments:
backend: Backend to instantiate Implementation from.
Returns:
Implementation.
"""
return (
cls.from_string(backend)
if isinstance(backend, str)
else backend
if isinstance(backend, Implementation)
else cls.from_native_namespace(backend)
)
def to_native_namespace(self: Self) -> ModuleType:
"""Return the native namespace module corresponding to Implementation.
Returns:
Native module.
"""
if self is Implementation.PANDAS:
import pandas as pd # ignore-banned-import
return pd
if self is Implementation.MODIN:
import modin.pandas
return modin.pandas
if self is Implementation.CUDF: # pragma: no cover
import cudf # ignore-banned-import
return cudf
if self is Implementation.PYARROW:
import pyarrow as pa # ignore-banned-import
return pa
if self is Implementation.PYSPARK: # pragma: no cover
import pyspark.sql
return pyspark.sql
if self is Implementation.POLARS:
import polars as pl # ignore-banned-import
return pl
if self is Implementation.DASK:
import dask.dataframe # ignore-banned-import
return dask.dataframe
if self is Implementation.DUCKDB:
import duckdb # ignore-banned-import
return duckdb
if self is Implementation.SQLFRAME:
import sqlframe # ignore-banned-import
return sqlframe
msg = "Not supported Implementation" # pragma: no cover
raise AssertionError(msg)
def is_pandas(self: Self) -> bool:
"""Return whether implementation is pandas.
Returns:
Boolean.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_pandas()
True
"""
return self is Implementation.PANDAS
def is_pandas_like(self: Self) -> bool:
"""Return whether implementation is pandas, Modin, or cuDF.
Returns:
Boolean.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_pandas_like()
True
"""
return self in {
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
}
def is_spark_like(self: Self) -> bool:
"""Return whether implementation is pyspark or sqlframe.
Returns:
Boolean.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_spark_like()
False
"""
return self in {Implementation.PYSPARK, Implementation.SQLFRAME}
def is_polars(self: Self) -> bool:
"""Return whether implementation is Polars.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_polars()
True
"""
return self is Implementation.POLARS
def is_cudf(self: Self) -> bool:
"""Return whether implementation is cuDF.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_cudf()
False
"""
return self is Implementation.CUDF # pragma: no cover
def is_modin(self: Self) -> bool:
"""Return whether implementation is Modin.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_modin()
False
"""
return self is Implementation.MODIN # pragma: no cover
def is_pyspark(self: Self) -> bool:
"""Return whether implementation is PySpark.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_pyspark()
False
"""
return self is Implementation.PYSPARK # pragma: no cover
def is_pyarrow(self: Self) -> bool:
"""Return whether implementation is PyArrow.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_pyarrow()
False
"""
return self is Implementation.PYARROW # pragma: no cover
def is_dask(self: Self) -> bool:
"""Return whether implementation is Dask.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_dask()
False
"""
return self is Implementation.DASK # pragma: no cover
def is_duckdb(self: Self) -> bool:
"""Return whether implementation is DuckDB.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_duckdb()
False
"""
return self is Implementation.DUCKDB # pragma: no cover
def is_ibis(self: Self) -> bool:
"""Return whether implementation is Ibis.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_ibis()
False
"""
return self is Implementation.IBIS # pragma: no cover
def is_sqlframe(self: Self) -> bool:
"""Return whether implementation is SQLFrame.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_sqlframe()
False
"""
return self is Implementation.SQLFRAME # pragma: no cover
MIN_VERSIONS: dict[Implementation, tuple[int, ...]] = {
Implementation.PANDAS: (0, 25, 3),
Implementation.MODIN: (0, 25, 3),
Implementation.CUDF: (24, 10),
Implementation.PYARROW: (11,),
Implementation.PYSPARK: (3, 5),
Implementation.POLARS: (0, 20, 3),
Implementation.DASK: (2024, 8),
Implementation.DUCKDB: (1,),
Implementation.IBIS: (6,),
Implementation.SQLFRAME: (3, 22, 0),
}
@overload
def _into_compliant_namespace(
impl: _PandasLike, version: Version, /
) -> PandasLikeNamespace: ...
@overload
def _into_compliant_namespace(impl: _Polars, version: Version, /) -> PolarsNamespace: ...
@overload
def _into_compliant_namespace(impl: _Arrow, version: Version, /) -> ArrowNamespace: ...
@overload
def _into_compliant_namespace(
impl: _SparkLike, version: Version, /
) -> SparkLikeNamespace: ...
@overload
def _into_compliant_namespace(impl: _DuckDB, version: Version, /) -> DuckDBNamespace: ...
@overload
def _into_compliant_namespace(impl: _Dask, version: Version, /) -> DaskNamespace: ...
@overload
def _into_compliant_namespace(
impl: _EagerAllowed, version: Version, /
) -> PandasLikeNamespace | PolarsNamespace | ArrowNamespace: ...
def _into_compliant_namespace(
impl: Implementation, version: Version, /
) -> CompliantNamespace[Any, Any]:
native = impl.to_native_namespace()
into_version = native if not impl.is_sqlframe() else native._version
backend_version = parse_version(into_version)
if impl.is_pandas_like():
from narwhals._pandas_like.namespace import PandasLikeNamespace
return PandasLikeNamespace(
implementation=impl, backend_version=backend_version, version=version
)
elif impl.is_polars():
from narwhals._polars.namespace import PolarsNamespace
return PolarsNamespace(backend_version=backend_version, version=version)
elif impl.is_pyarrow():
from narwhals._arrow.namespace import ArrowNamespace
return ArrowNamespace(backend_version=backend_version, version=version)
elif impl.is_spark_like(): # pragma: no cover
from narwhals._spark_like.namespace import SparkLikeNamespace
return SparkLikeNamespace(
implementation=impl, backend_version=backend_version, version=version
)
elif impl.is_duckdb(): # pragma: no cover
from narwhals._duckdb.namespace import DuckDBNamespace
return DuckDBNamespace(backend_version=backend_version, version=version)
elif impl.is_dask(): # pragma: no cover
from narwhals._dask.namespace import DaskNamespace
return DaskNamespace(backend_version=backend_version, version=version)
else:
msg = "Not supported Implementation" # pragma: no cover
raise AssertionError(msg)
def validate_backend_version(
implementation: Implementation, backend_version: tuple[int, ...]
) -> None:
if backend_version < (min_version := MIN_VERSIONS[implementation]):
msg = f"Minimum version of {implementation} supported by Narwhals is {min_version}, found: {backend_version}"
raise ValueError(msg)
def import_dtypes_module(version: Version) -> DTypes:
if version is Version.V1:
from narwhals.stable.v1 import dtypes
elif version is Version.MAIN:
from narwhals import dtypes # type: ignore[no-redef]
else: # pragma: no cover
msg = (
"Congratulations, you have entered unreachable code.\n"
"Please report an issue at https://github.com/narwhals-dev/narwhals/issues.\n"
f"Version: {version}"
)
raise AssertionError(msg)
return dtypes # type: ignore[return-value]
def remove_prefix(text: str, prefix: str) -> str: # pragma: no cover
if text.startswith(prefix):
return text[len(prefix) :]
return text
def remove_suffix(text: str, suffix: str) -> str: # pragma: no cover
if text.endswith(suffix):
return text[: -len(suffix)]
return text # pragma: no cover
def flatten(args: Any) -> list[Any]:
return list(args[0] if (len(args) == 1 and _is_iterable(args[0])) else args)
def tupleify(arg: Any) -> Any:
if not isinstance(arg, (list, tuple)): # pragma: no cover
return (arg,)
return arg
def _is_iterable(arg: Any | Iterable[Any]) -> bool:
from narwhals.series import Series
if is_pandas_dataframe(arg) or is_pandas_series(arg):
msg = f"Expected Narwhals class or scalar, got: {type(arg)}. Perhaps you forgot a `nw.from_native` somewhere?"
raise TypeError(msg)
if (pl := get_polars()) is not None and isinstance(
arg, (pl.Series, pl.Expr, pl.DataFrame, pl.LazyFrame)
):
msg = (
f"Expected Narwhals class or scalar, got: {type(arg)}.\n\n"
"Hint: Perhaps you\n"
"- forgot a `nw.from_native` somewhere?\n"
"- used `pl.col` instead of `nw.col`?"
)
raise TypeError(msg)
return isinstance(arg, Iterable) and not isinstance(arg, (str, bytes, Series))
def parse_version(version: str | ModuleType | _SupportsVersion) -> tuple[int, ...]:
"""Simple version parser; split into a tuple of ints for comparison.
Arguments:
version: Version string, or object with one, to parse.
Returns:
Parsed version number.
"""
# lifted from Polars
# [marco]: Take care of DuckDB pre-releases which end with e.g. `-dev4108`
# and pandas pre-releases which end with e.g. .dev0+618.gb552dc95c9
version_str = version if isinstance(version, str) else version.__version__
version_str = re.sub(r"(\D?dev.*$)", "", version_str)
return tuple(int(re.sub(r"\D", "", v)) for v in version_str.split("."))
@overload
def isinstance_or_issubclass(
obj_or_cls: type, cls_or_tuple: type[_T]
) -> TypeIs[type[_T]]: ...
@overload
def isinstance_or_issubclass(
obj_or_cls: object | type, cls_or_tuple: type[_T]
) -> TypeIs[_T | type[_T]]: ...
@overload
def isinstance_or_issubclass(
obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2]]
) -> TypeIs[type[_T1 | _T2]]: ...
@overload
def isinstance_or_issubclass(
obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2]]
) -> TypeIs[_T1 | _T2 | type[_T1 | _T2]]: ...
@overload
def isinstance_or_issubclass(
obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3]]
) -> TypeIs[type[_T1 | _T2 | _T3]]: ...
@overload
def isinstance_or_issubclass(
obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3]]
) -> TypeIs[_T1 | _T2 | _T3 | type[_T1 | _T2 | _T3]]: ...
@overload
def isinstance_or_issubclass(
obj_or_cls: Any, cls_or_tuple: tuple[type, ...]
) -> TypeIs[Any]: ...
def isinstance_or_issubclass(obj_or_cls: Any, cls_or_tuple: Any) -> bool:
from narwhals.dtypes import DType
if isinstance(obj_or_cls, DType):
return isinstance(obj_or_cls, cls_or_tuple)
return isinstance(obj_or_cls, cls_or_tuple) or (
isinstance(obj_or_cls, type) and issubclass(obj_or_cls, cls_or_tuple)
)
def validate_laziness(items: Iterable[Any]) -> None:
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
if all(isinstance(item, DataFrame) for item in items) or (
all(isinstance(item, LazyFrame) for item in items)
):
return
msg = f"The items to concatenate should either all be eager, or all lazy, got: {[type(item) for item in items]}"
raise TypeError(msg)
def maybe_align_index(
lhs: FrameOrSeriesT, rhs: Series[Any] | DataFrame[Any] | LazyFrame[Any]
) -> FrameOrSeriesT:
"""Align `lhs` to the Index of `rhs`, if they're both pandas-like.
Arguments:
lhs: Dataframe or Series.
rhs: Dataframe or Series to align with.
Returns:
Same type as input.
Notes:
This is only really intended for backwards-compatibility purposes,
for example if your library already aligns indices for users.
If you're designing a new library, we highly encourage you to not
rely on the Index.
For non-pandas-like inputs, this only checks that `lhs` and `rhs`
are the same length.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
>>> s_pd = pd.Series([6, 7], index=[4, 3])
>>> df = nw.from_native(df_pd)
>>> s = nw.from_native(s_pd, series_only=True)
>>> nw.to_native(nw.maybe_align_index(df, s))
a
4 2
3 1
"""
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.series import PandasLikeSeries
def _validate_index(index: Any) -> None:
if not index.is_unique:
msg = "given index doesn't have a unique index"
raise ValueError(msg)
lhs_any = cast("Any", lhs)
rhs_any = cast("Any", rhs)
if isinstance(
getattr(lhs_any, "_compliant_frame", None), PandasLikeDataFrame
) and isinstance(getattr(rhs_any, "_compliant_frame", None), PandasLikeDataFrame):
_validate_index(lhs_any._compliant_frame.native.index)
_validate_index(rhs_any._compliant_frame.native.index)
return lhs_any._with_compliant(
lhs_any._compliant_frame._with_native(
lhs_any._compliant_frame.native.loc[rhs_any._compliant_frame.native.index]
)
)
if isinstance(
getattr(lhs_any, "_compliant_frame", None), PandasLikeDataFrame
) and isinstance(getattr(rhs_any, "_compliant_series", None), PandasLikeSeries):
_validate_index(lhs_any._compliant_frame.native.index)
_validate_index(rhs_any._compliant_series.native.index)
return lhs_any._with_compliant(
lhs_any._compliant_frame._with_native(
lhs_any._compliant_frame.native.loc[
rhs_any._compliant_series.native.index
]
)
)
if isinstance(
getattr(lhs_any, "_compliant_series", None), PandasLikeSeries
) and isinstance(getattr(rhs_any, "_compliant_frame", None), PandasLikeDataFrame):
_validate_index(lhs_any._compliant_series.native.index)
_validate_index(rhs_any._compliant_frame.native.index)
return lhs_any._with_compliant(
lhs_any._compliant_series._with_native(
lhs_any._compliant_series.native.loc[
rhs_any._compliant_frame.native.index
]
)
)
if isinstance(
getattr(lhs_any, "_compliant_series", None), PandasLikeSeries
) and isinstance(getattr(rhs_any, "_compliant_series", None), PandasLikeSeries):
_validate_index(lhs_any._compliant_series.native.index)
_validate_index(rhs_any._compliant_series.native.index)
return lhs_any._with_compliant(
lhs_any._compliant_series._with_native(
lhs_any._compliant_series.native.loc[
rhs_any._compliant_series.native.index
]
)
)
if len(lhs_any) != len(rhs_any):
msg = f"Expected `lhs` and `rhs` to have the same length, got {len(lhs_any)} and {len(rhs_any)}"
raise ValueError(msg)
return lhs
def maybe_get_index(obj: DataFrame[Any] | LazyFrame[Any] | Series[Any]) -> Any | None:
"""Get the index of a DataFrame or a Series, if it's pandas-like.
Arguments:
obj: Dataframe or Series.
Returns:
Same type as input.
Notes:
This is only really intended for backwards-compatibility purposes,
for example if your library already aligns indices for users.
If you're designing a new library, we highly encourage you to not
rely on the Index.
For non-pandas-like inputs, this returns `None`.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df = nw.from_native(df_pd)
>>> nw.maybe_get_index(df)
RangeIndex(start=0, stop=2, step=1)
>>> series_pd = pd.Series([1, 2])
>>> series = nw.from_native(series_pd, series_only=True)
>>> nw.maybe_get_index(series)
RangeIndex(start=0, stop=2, step=1)
"""
obj_any = cast("Any", obj)
native_obj = obj_any.to_native()
if is_pandas_like_dataframe(native_obj) or is_pandas_like_series(native_obj):
return native_obj.index
return None
def maybe_set_index(
obj: FrameOrSeriesT,
column_names: str | list[str] | None = None,
*,
index: Series[IntoSeriesT] | list[Series[IntoSeriesT]] | None = None,
) -> FrameOrSeriesT:
"""Set the index of a DataFrame or a Series, if it's pandas-like.
Arguments:
obj: object for which maybe set the index (can be either a Narwhals `DataFrame`
or `Series`).
column_names: name or list of names of the columns to set as index.
For dataframes, only one of `column_names` and `index` can be specified but
not both. If `column_names` is passed and `df` is a Series, then a
`ValueError` is raised.
index: series or list of series to set as index.
Returns:
Same type as input.
Raises:
ValueError: If one of the following condition happens:
- none of `column_names` and `index` are provided
- both `column_names` and `index` are provided
- `column_names` is provided and `df` is a Series
Notes:
This is only really intended for backwards-compatibility purposes, for example if
your library already aligns indices for users.
If you're designing a new library, we highly encourage you to not
rely on the Index.
For non-pandas-like inputs, this is a no-op.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df = nw.from_native(df_pd)
>>> nw.to_native(nw.maybe_set_index(df, "b")) # doctest: +NORMALIZE_WHITESPACE
a
b
4 1
5 2
"""
from narwhals.translate import to_native
df_any = cast("Any", obj)
native_obj = df_any.to_native()
if column_names is not None and index is not None:
msg = "Only one of `column_names` or `index` should be provided"
raise ValueError(msg)
if not column_names and index is None:
msg = "Either `column_names` or `index` should be provided"
raise ValueError(msg)
if index is not None:
keys = (
[to_native(idx, pass_through=True) for idx in index]
if _is_iterable(index)
else to_native(index, pass_through=True)
)
else:
keys = column_names
if is_pandas_like_dataframe(native_obj):
return df_any._with_compliant(
df_any._compliant_frame._with_native(native_obj.set_index(keys))
)
elif is_pandas_like_series(native_obj):
from narwhals._pandas_like.utils import set_index
if column_names:
msg = "Cannot set index using column names on a Series"
raise ValueError(msg)
native_obj = set_index(
native_obj,
keys,
implementation=obj._compliant_series._implementation, # type: ignore[union-attr]
backend_version=obj._compliant_series._backend_version, # type: ignore[union-attr]
)
return df_any._with_compliant(df_any._compliant_series._with_native(native_obj))
else:
return df_any
def maybe_reset_index(obj: FrameOrSeriesT) -> FrameOrSeriesT:
"""Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.
Arguments:
obj: Dataframe or Series.
Returns:
Same type as input.
Notes:
This is only really intended for backwards-compatibility purposes,
for example if your library already resets the index for users.
If you're designing a new library, we highly encourage you to not
rely on the Index.
For non-pandas-like inputs, this is a no-op.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7]))
>>> df = nw.from_native(df_pd)
>>> nw.to_native(nw.maybe_reset_index(df))
a b
0 1 4
1 2 5
>>> series_pd = pd.Series([1, 2])
>>> series = nw.from_native(series_pd, series_only=True)
>>> nw.maybe_get_index(series)
RangeIndex(start=0, stop=2, step=1)
"""
obj_any = cast("Any", obj)
native_obj = obj_any.to_native()
if is_pandas_like_dataframe(native_obj):
native_namespace = obj_any.__native_namespace__()
if _has_default_index(native_obj, native_namespace):
return obj_any
return obj_any._with_compliant(
obj_any._compliant_frame._with_native(native_obj.reset_index(drop=True))
)
if is_pandas_like_series(native_obj):
native_namespace = obj_any.__native_namespace__()
if _has_default_index(native_obj, native_namespace):
return obj_any
return obj_any._with_compliant(
obj_any._compliant_series._with_native(native_obj.reset_index(drop=True))
)
return obj_any
def _is_range_index(obj: Any, native_namespace: Any) -> TypeIs[pd.RangeIndex]:
return isinstance(obj, native_namespace.RangeIndex)
# NOTE: Remove ignore(s) after release w/ (https://github.com/pandas-dev/pandas-stubs/pull/1115)
def _has_default_index(
native_frame_or_series: pd.Series[Any] | pd.DataFrame, native_namespace: Any
) -> bool:
index = native_frame_or_series.index
return (
_is_range_index(index, native_namespace)
and index.start == 0
and index.stop == len(index)
and index.step == 1
)
def maybe_convert_dtypes(
obj: FrameOrSeriesT, *args: bool, **kwargs: bool | str
) -> FrameOrSeriesT:
"""Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
Arguments:
obj: DataFrame or Series.
*args: Additional arguments which gets passed through.
**kwargs: Additional arguments which gets passed through.
Returns:
Same type as input.
Notes:
For non-pandas-like inputs, this is a no-op.
Also, `args` and `kwargs` just get passed down to the underlying library as-is.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> import numpy as np
>>> df_pd = pd.DataFrame(
... {
... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
... }
... )
>>> df = nw.from_native(df_pd)
>>> nw.to_native(
... nw.maybe_convert_dtypes(df)
... ).dtypes # doctest: +NORMALIZE_WHITESPACE
a Int32
b boolean
dtype: object
"""
obj_any = cast("Any", obj)
native_obj = obj_any.to_native()
if is_pandas_like_dataframe(native_obj):
return obj_any._with_compliant(
obj_any._compliant_frame._with_native(
native_obj.convert_dtypes(*args, **kwargs)
)
)
if is_pandas_like_series(native_obj):
return obj_any._with_compliant(
obj_any._compliant_series._with_native(
native_obj.convert_dtypes(*args, **kwargs)
)
)
return obj_any
def scale_bytes(sz: int, unit: SizeUnit) -> int | float:
"""Scale size in bytes to other size units (eg: "kb", "mb", "gb", "tb").
Arguments:
sz: original size in bytes
unit: size unit to convert into
Returns:
Integer or float.
"""
if unit in {"b", "bytes"}:
return sz
elif unit in {"kb", "kilobytes"}:
return sz / 1024
elif unit in {"mb", "megabytes"}:
return sz / 1024**2
elif unit in {"gb", "gigabytes"}:
return sz / 1024**3
elif unit in {"tb", "terabytes"}:
return sz / 1024**4
else:
msg = f"`unit` must be one of {{'b', 'kb', 'mb', 'gb', 'tb'}}, got {unit!r}"
raise ValueError(msg)
def is_ordered_categorical(series: Series[Any]) -> bool:
"""Return whether indices of categories are semantically meaningful.
This is a convenience function to accessing what would otherwise be
the `is_ordered` property from the DataFrame Interchange Protocol,
see https://data-apis.org/dataframe-protocol/latest/API.html.
- For Polars:
- Enums are always ordered.
- Categoricals are ordered if `dtype.ordering == "physical"`.
- For pandas-like APIs:
- Categoricals are ordered if `dtype.cat.ordered == True`.
- For PyArrow table:
- Categoricals are ordered if `dtype.type.ordered == True`.
Arguments:
series: Input Series.
Returns:
Whether the Series is an ordered categorical.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = ["x", "y"]
>>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
>>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))
Let's define a library-agnostic function:
>>> @nw.narwhalify
... def func(s):
... return nw.is_ordered_categorical(s)
Then, we can pass any supported library to `func`:
>>> func(s_pd)
True
>>> func(s_pl)
True
"""
from narwhals._interchange.series import InterchangeSeries
dtypes = import_dtypes_module(series._compliant_series._version)
if (
isinstance(series._compliant_series, InterchangeSeries)
and series.dtype == dtypes.Categorical
):
return series._compliant_series.native.describe_categorical["is_ordered"]
if series.dtype == dtypes.Enum:
return True
if series.dtype != dtypes.Categorical:
return False
native_series = series.to_native()
if is_polars_series(native_series):
return native_series.dtype.ordering == "physical" # type: ignore[attr-defined]
if is_pandas_series(native_series):
return bool(native_series.cat.ordered)
if is_modin_series(native_series): # pragma: no cover
return native_series.cat.ordered
if is_cudf_series(native_series): # pragma: no cover
return native_series.cat.ordered
if is_pyarrow_chunked_array(native_series):
from narwhals._arrow.utils import is_dictionary
return is_dictionary(native_series.type) and native_series.type.ordered
# If it doesn't match any of the above, let's just play it safe and return False.
return False # pragma: no cover
def generate_unique_token(
n_bytes: int, columns: Sequence[str]
) -> str: # pragma: no cover
msg = (
"Use `generate_temporary_column_name` instead. `generate_unique_token` is "
"deprecated and it will be removed in future versions"
)
issue_deprecation_warning(msg, _version="1.13.0")
return generate_temporary_column_name(n_bytes=n_bytes, columns=columns)
def generate_temporary_column_name(n_bytes: int, columns: Sequence[str]) -> str:
"""Generates a unique column name that is not present in the given list of columns.
It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex)
function to return a string nbytes random bytes.
Arguments:
n_bytes: The number of bytes to generate for the token.
columns: The list of columns to check for uniqueness.
Returns:
A unique token that is not present in the given list of columns.
Raises:
AssertionError: If a unique token cannot be generated after 100 attempts.
Examples:
>>> import narwhals as nw
>>> columns = ["abc", "xyz"]
>>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
True
"""
counter = 0
while True:
token = token_hex(n_bytes)
if token not in columns:
return token
counter += 1
if counter > 100:
msg = (
"Internal Error: Narwhals was not able to generate a column name with "
f"{n_bytes=} and not in {columns}"
)
raise AssertionError(msg)
def parse_columns_to_drop(
compliant_frame: Any,
columns: Iterable[str],
strict: bool, # noqa: FBT001
) -> list[str]:
cols = compliant_frame.columns
to_drop = list(columns)
if strict:
missing_columns = [x for x in to_drop if x not in cols]
if missing_columns:
raise ColumnNotFoundError.from_missing_and_available_column_names(
missing_columns=missing_columns, available_columns=cols
)
else:
to_drop = list(set(cols).intersection(set(to_drop)))
return to_drop
def is_sequence_but_not_str(sequence: Any | Sequence[_T]) -> TypeIs[Sequence[_T]]:
return isinstance(sequence, Sequence) and not isinstance(sequence, str)
def find_stacklevel() -> int:
"""Find the first place in the stack that is not inside narwhals.
Returns:
Stacklevel.
Taken from:
https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
"""
import inspect
from pathlib import Path
import narwhals as nw
pkg_dir = str(Path(nw.__file__).parent)
# https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
frame = inspect.currentframe()
n = 0
try:
while frame:
fname = inspect.getfile(frame)
if fname.startswith(pkg_dir) or (
(qualname := getattr(frame.f_code, "co_qualname", None))
# ignore @singledispatch wrappers
and qualname.startswith("singledispatch.")
):
frame = frame.f_back
n += 1
else: # pragma: no cover
break
else: # pragma: no cover
pass
finally:
# https://docs.python.org/3/library/inspect.html
# > Though the cycle detector will catch these, destruction of the frames
# > (and local variables) can be made deterministic by removing the cycle
# > in a finally clause.
del frame
return n
def issue_deprecation_warning(message: str, _version: str) -> None:
"""Issue a deprecation warning.
Arguments:
message: The message associated with the warning.
_version: Narwhals version when the warning was introduced. Just used for internal
bookkeeping.
"""
warn(message=message, category=DeprecationWarning, stacklevel=find_stacklevel())
def validate_strict_and_pass_though(
strict: bool | None, # noqa: FBT001
pass_through: bool | None, # noqa: FBT001
*,
pass_through_default: bool,
emit_deprecation_warning: bool,
) -> bool:
if strict is None and pass_through is None:
pass_through = pass_through_default
elif strict is not None and pass_through is None:
if emit_deprecation_warning:
msg = (
"`strict` in `from_native` is deprecated, please use `pass_through` instead.\n\n"
"Note: `strict` will remain available in `narwhals.stable.v1`.\n"
"See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
)
issue_deprecation_warning(msg, _version="1.13.0")
pass_through = not strict
elif strict is None and pass_through is not None:
pass
else:
msg = "Cannot pass both `strict` and `pass_through`"
raise ValueError(msg)
return pass_through
def deprecate_native_namespace(
*, warn_version: str = "", required: bool = False
) -> Callable[[Callable[P, R]], Callable[P, R]]:
"""Decorator to transition from `native_namespace` to `backend` argument.
Arguments:
warn_version: Emit a deprecation warning from this version.
required: Raise when both `native_namespace`, `backend` are `None`.
Returns:
Wrapped function, with `native_namespace` **removed**.
"""
def decorate(fn: Callable[P, R], /) -> Callable[P, R]:
@wraps(fn)
def wrapper(*args: P.args, **kwds: P.kwargs) -> R:
backend = kwds.pop("backend", None)
native_namespace = kwds.pop("native_namespace", None)
if native_namespace is not None and backend is None:
if warn_version:
msg = (
"`native_namespace` is deprecated, please use `backend` instead.\n\n"
"Note: `native_namespace` will remain available in `narwhals.stable.v1`.\n"
"See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
)
issue_deprecation_warning(msg, _version=warn_version)
backend = native_namespace
elif native_namespace is not None and backend is not None:
msg = "Can't pass both `native_namespace` and `backend`"
raise ValueError(msg)
elif native_namespace is None and backend is None and required:
msg = f"`backend` must be specified in `{fn.__name__}`."
raise ValueError(msg)
kwds["backend"] = backend
return fn(*args, **kwds)
return wrapper
return decorate
def _validate_rolling_arguments(
window_size: int, min_samples: int | None
) -> tuple[int, int]:
if window_size < 1:
msg = "window_size must be greater or equal than 1"
raise ValueError(msg)
if not isinstance(window_size, int):
_type = window_size.__class__.__name__
msg = (
f"argument 'window_size': '{_type}' object cannot be "
"interpreted as an integer"
)
raise TypeError(msg)
if min_samples is not None:
if min_samples < 1:
msg = "min_samples must be greater or equal than 1"
raise ValueError(msg)
if not isinstance(min_samples, int):
_type = min_samples.__class__.__name__
msg = (
f"argument 'min_samples': '{_type}' object cannot be "
"interpreted as an integer"
)
raise TypeError(msg)
if min_samples > window_size:
msg = "`min_samples` must be less or equal than `window_size`"
raise InvalidOperationError(msg)
else:
min_samples = window_size
return window_size, min_samples
def generate_repr(header: str, native_repr: str) -> str:
try:
terminal_width = os.get_terminal_size().columns
except OSError:
terminal_width = int(os.getenv("COLUMNS", 80)) # noqa: PLW1508
native_lines = native_repr.splitlines()
max_native_width = max(len(line) for line in native_lines)
if max_native_width + 2 <= terminal_width:
length = max(max_native_width, len(header))
output = f"{'' * length}\n"
header_extra = length - len(header)
output += f"|{' ' * (header_extra // 2)}{header}{' ' * (header_extra // 2 + header_extra % 2)}|\n"
output += f"|{'-' * (length)}|\n"
start_extra = (length - max_native_width) // 2
end_extra = (length - max_native_width) // 2 + (length - max_native_width) % 2
for line in native_lines:
output += f"|{' ' * (start_extra)}{line}{' ' * (end_extra + max_native_width - len(line))}|\n"
output += f"{'' * length}"
return output
diff = 39 - len(header)
return (
f"{'' * (39)}\n"
f"|{' ' * (diff // 2)}{header}{' ' * (diff // 2 + diff % 2)}|\n"
"| Use `.to_native` to see native output |\n"
f"{'' * 39}"
)
def check_column_exists(columns: Sequence[str], subset: Sequence[str] | None) -> None:
if subset is not None and (missing := set(subset).difference(columns)):
msg = f"Column(s) {sorted(missing)} not found in {columns}"
raise ColumnNotFoundError(msg)
def check_column_names_are_unique(columns: Sequence[str]) -> None:
len_unique_columns = len(set(columns))
if len(columns) != len_unique_columns:
from collections import Counter
counter = Counter(columns)
duplicates = {k: v for k, v in counter.items() if v > 1}
msg = "".join(f"\n- '{k}' {v} times" for k, v in duplicates.items())
msg = f"Expected unique column names, got:{msg}"
raise DuplicateError(msg)
def _parse_time_unit_and_time_zone(
time_unit: TimeUnit | Iterable[TimeUnit] | None,
time_zone: str | timezone | Iterable[str | timezone | None] | None,
) -> tuple[Set[TimeUnit], Set[str | None]]:
time_units: Set[TimeUnit] = (
{"ms", "us", "ns", "s"}
if time_unit is None
else {time_unit}
if isinstance(time_unit, str)
else set(time_unit)
)
time_zones: Set[str | None] = (
{None}
if time_zone is None
else {str(time_zone)}
if isinstance(time_zone, (str, timezone))
else {str(tz) if tz is not None else None for tz in time_zone}
)
return time_units, time_zones
def dtype_matches_time_unit_and_time_zone(
dtype: DType, dtypes: DTypes, time_units: Set[TimeUnit], time_zones: Set[str | None]
) -> bool:
return (
isinstance(dtype, dtypes.Datetime)
and (dtype.time_unit in time_units)
and (
dtype.time_zone in time_zones
or ("*" in time_zones and dtype.time_zone is not None)
)
)
def get_column_names(frame: _StoresColumns, /) -> Sequence[str]:
return frame.columns
def exclude_column_names(frame: _StoresColumns, names: Container[str]) -> Sequence[str]:
return [col_name for col_name in frame.columns if col_name not in names]
def passthrough_column_names(names: Sequence[str], /) -> Callable[[Any], Sequence[str]]:
def fn(_frame: Any, /) -> Sequence[str]:
return names
return fn
def _hasattr_static(obj: Any, attr: str) -> bool:
sentinel = object()
return getattr_static(obj, attr, sentinel) is not sentinel
def is_compliant_dataframe(
obj: CompliantDataFrame[CompliantSeriesT, CompliantExprT, NativeFrameT_co] | Any,
) -> TypeIs[CompliantDataFrame[CompliantSeriesT, CompliantExprT, NativeFrameT_co]]:
return _hasattr_static(obj, "__narwhals_dataframe__")
def is_compliant_lazyframe(
obj: CompliantLazyFrame[CompliantExprT, NativeFrameT_co] | Any,
) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeFrameT_co]]:
return _hasattr_static(obj, "__narwhals_lazyframe__")
def is_compliant_series(
obj: CompliantSeries[NativeSeriesT_co] | Any,
) -> TypeIs[CompliantSeries[NativeSeriesT_co]]:
return _hasattr_static(obj, "__narwhals_series__")
def is_compliant_expr(
obj: CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | Any,
) -> TypeIs[CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co]]:
return hasattr(obj, "__narwhals_expr__")
def is_eager_allowed(obj: Implementation) -> TypeIs[_EagerAllowed]:
return obj in {
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
Implementation.POLARS,
Implementation.PYARROW,
}
def is_lazy_allowed(obj: Implementation) -> TypeIs[_LazyAllowed]: # pragma: no cover
return obj in {
Implementation.POLARS,
Implementation.PYSPARK,
Implementation.SQLFRAME,
Implementation.DASK,
Implementation.DUCKDB,
}
def has_native_namespace(obj: Any) -> TypeIs[SupportsNativeNamespace]:
return hasattr(obj, "__native_namespace__")
def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]:
return hasattr(obj, "__dataframe__")
def supports_arrow_c_stream(obj: Any) -> TypeIs[ArrowStreamExportable]:
return _hasattr_static(obj, "__arrow_c_stream__")
def _remap_full_join_keys(
left_on: Sequence[str], right_on: Sequence[str], suffix: str
) -> dict[str, str]:
"""Remap join keys to avoid collisions.
If left keys collide with the right keys, append the suffix.
If there's no collision, let the right keys be.
Arguments:
left_on: Left keys.
right_on: Right keys.
suffix: Suffix to append to right keys.
Returns:
A map of old to new right keys.
"""
right_keys_suffixed = (
f"{key}{suffix}" if key in left_on else key for key in right_on
)
return dict(zip(right_on, right_keys_suffixed))
def _into_arrow_table(data: IntoArrowTable, context: _FullContext, /) -> pa.Table:
"""Guards `ArrowDataFrame.from_arrow` w/ safer imports.
Arguments:
data: Object which implements `__arrow_c_stream__`.
context: Initialized compliant object.
Returns:
A PyArrow Table.
"""
if find_spec("pyarrow"):
import pyarrow as pa # ignore-banned-import
from narwhals._arrow.namespace import ArrowNamespace
version = context._version
ns = ArrowNamespace(backend_version=parse_version(pa), version=version)
return ns._dataframe.from_arrow(data, context=ns).native
else: # pragma: no cover
msg = f"PyArrow>=14.0.0 is required for `from_arrow` for object of type {type(data).__name__!r}."
raise ModuleNotFoundError(msg)
# TODO @dangotbanned: Extend with runtime behavior for `v1.*`
# See `narwhals.exceptions.NarwhalsUnstableWarning`
def unstable(fn: _Fn, /) -> _Fn:
"""Visual-only marker for unstable functionality.
Arguments:
fn: Function to decorate.
Returns:
Decorated function (unchanged).
Examples:
>>> from narwhals.utils import unstable
>>> @unstable
... def a_work_in_progress_feature(*args):
... return args
>>>
>>> a_work_in_progress_feature.__name__
'a_work_in_progress_feature'
>>> a_work_in_progress_feature(1, 2, 3)
(1, 2, 3)
"""
return fn
if TYPE_CHECKING:
import sys
if sys.version_info >= (3, 13):
# NOTE: avoids `mypy`
# error: Module "narwhals.utils" does not explicitly export attribute "deprecated" [attr-defined]
from warnings import deprecated as deprecated # noqa: PLC0414
else:
from typing_extensions import deprecated as deprecated # noqa: PLC0414
else:
def deprecated(message: str, /) -> Callable[[_Fn], _Fn]: # noqa: ARG001
def wrapper(func: _Fn, /) -> _Fn:
return func
return wrapper
class not_implemented: # noqa: N801
"""Mark some functionality as unsupported.
Arguments:
alias: optional name used instead of the data model hook [`__set_name__`].
Returns:
An exception-raising [descriptor].
Notes:
- Attribute/method name *doesn't* need to be declared twice
- Allows different behavior when looked up on the class vs instance
- Allows us to use `isinstance(...)` instead of monkeypatching an attribute to the function
Examples:
>>> from narwhals.utils import not_implemented
>>> class Thing:
... def totally_ready(self) -> str:
... return "I'm ready!"
...
... not_ready_yet = not_implemented()
>>>
>>> thing = Thing()
>>> thing.totally_ready()
"I'm ready!"
>>> thing.not_ready_yet()
Traceback (most recent call last):
...
NotImplementedError: 'not_ready_yet' is not implemented for: 'Thing'.
...
>>> isinstance(Thing.not_ready_yet, not_implemented)
True
[`__set_name__`]: https://docs.python.org/3/reference/datamodel.html#object.__set_name__
[descriptor]: https://docs.python.org/3/howto/descriptor.html
"""
def __init__(self, alias: str | None = None, /) -> None:
# NOTE: Don't like this
# Trying to workaround `mypy` requiring `@property` everywhere
self._alias: str | None = alias
def __repr__(self) -> str:
return f"<{type(self).__name__}>: {self._name_owner}.{self._name}"
def __set_name__(self, owner: type[_T], name: str) -> None:
# https://docs.python.org/3/howto/descriptor.html#customized-names
self._name_owner: str = owner.__name__
self._name: str = self._alias or name
def __get__(
self, instance: _T | Literal["raise"] | None, owner: type[_T] | None = None, /
) -> Any:
if instance is None:
# NOTE: Branch for `cls._name`
# We can check that to see if an instance of `type(self)` for
# https://narwhals-dev.github.io/narwhals/api-completeness/expr/
return self
# NOTE: Prefer not exposing the actual class we're defining in
# `_implementation` may not be available everywhere
who = getattr(instance, "_implementation", self._name_owner)
raise _not_implemented_error(self._name, who)
def __call__(self, *args: Any, **kwds: Any) -> Any:
# NOTE: Purely to duck-type as assignable to **any** instance method
# Wouldn't be reachable through *regular* attribute access
return self.__get__("raise")
@classmethod
def deprecated(cls, message: LiteralString, /) -> Self:
"""Alt constructor, wraps with `@deprecated`.
Arguments:
message: **Static-only** deprecation message, emitted in an IDE.
Returns:
An exception-raising [descriptor].
[descriptor]: https://docs.python.org/3/howto/descriptor.html
"""
obj = cls()
return deprecated(message)(obj)
def _not_implemented_error(what: str, who: str, /) -> NotImplementedError:
msg = (
f"{what!r} is not implemented for: {who!r}.\n\n"
"If you would like to see this functionality in `narwhals`, "
"please open an issue at: https://github.com/narwhals-dev/narwhals/issues"
)
return NotImplementedError(msg)