Files
Buffteks-Website/streamlit-venv/lib/python3.10/site-packages/narwhals/translate.py
2025-01-10 21:40:35 +00:00

905 lines
27 KiB
Python
Executable File

from __future__ import annotations
from datetime import datetime
from datetime import timedelta
from functools import wraps
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Literal
from typing import TypeVar
from typing import overload
from narwhals.dependencies import get_cudf
from narwhals.dependencies import get_cupy
from narwhals.dependencies import get_dask
from narwhals.dependencies import get_dask_expr
from narwhals.dependencies import get_modin
from narwhals.dependencies import get_numpy
from narwhals.dependencies import get_pandas
from narwhals.dependencies import get_polars
from narwhals.dependencies import get_pyarrow
from narwhals.dependencies import is_cudf_dataframe
from narwhals.dependencies import is_cudf_series
from narwhals.dependencies import is_dask_dataframe
from narwhals.dependencies import is_duckdb_relation
from narwhals.dependencies import is_ibis_table
from narwhals.dependencies import is_modin_dataframe
from narwhals.dependencies import is_modin_series
from narwhals.dependencies import is_pandas_dataframe
from narwhals.dependencies import is_pandas_series
from narwhals.dependencies import is_polars_dataframe
from narwhals.dependencies import is_polars_lazyframe
from narwhals.dependencies import is_polars_series
from narwhals.dependencies import is_pyarrow_chunked_array
from narwhals.dependencies import is_pyarrow_table
if TYPE_CHECKING:
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.series import Series
from narwhals.typing import DTypes
from narwhals.typing import IntoDataFrameT
from narwhals.typing import IntoFrameT
from narwhals.typing import IntoSeriesT
T = TypeVar("T")
NON_TEMPORAL_SCALAR_TYPES = (
bool,
bytes,
str,
int,
float,
complex,
)
@overload
def to_native(
narwhals_object: DataFrame[IntoDataFrameT], *, strict: Literal[True] = ...
) -> IntoDataFrameT: ...
@overload
def to_native(
narwhals_object: LazyFrame[IntoFrameT], *, strict: Literal[True] = ...
) -> IntoFrameT: ...
@overload
def to_native(narwhals_object: Series, *, strict: Literal[True] = ...) -> Any: ...
@overload
def to_native(narwhals_object: Any, *, strict: bool) -> Any: ...
def to_native(
narwhals_object: DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series,
*,
strict: bool = True,
) -> IntoFrameT | Any:
"""
Convert Narwhals object to native one.
Arguments:
narwhals_object: Narwhals object.
strict: whether to raise on non-Narwhals input.
Returns:
Object of class that user started with.
"""
from narwhals.dataframe import BaseFrame
from narwhals.series import Series
if isinstance(narwhals_object, BaseFrame):
return narwhals_object._compliant_frame._native_frame
if isinstance(narwhals_object, Series):
return narwhals_object._compliant_series._native_series
if strict:
msg = f"Expected Narwhals object, got {type(narwhals_object)}."
raise TypeError(msg)
return narwhals_object
@overload
def from_native(
native_object: IntoDataFrameT | IntoSeriesT,
*,
strict: Literal[False],
eager_only: None = ...,
eager_or_interchange_only: Literal[True],
series_only: None = ...,
allow_series: Literal[True],
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: IntoDataFrameT | IntoSeriesT,
*,
strict: Literal[False],
eager_only: Literal[True],
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: Literal[True],
) -> DataFrame[IntoDataFrameT] | Series: ...
@overload
def from_native(
native_object: IntoDataFrameT,
*,
strict: Literal[False],
eager_only: None = ...,
eager_or_interchange_only: Literal[True],
series_only: None = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: T,
*,
strict: Literal[False],
eager_only: None = ...,
eager_or_interchange_only: Literal[True],
series_only: None = ...,
allow_series: None = ...,
) -> T: ...
@overload
def from_native(
native_object: IntoDataFrameT,
*,
strict: Literal[False],
eager_only: Literal[True],
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: T,
*,
strict: Literal[False],
eager_only: Literal[True],
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: None = ...,
) -> T: ...
@overload
def from_native(
native_object: IntoFrameT | IntoSeriesT,
*,
strict: Literal[False],
eager_only: None = ...,
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: Literal[True],
) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series: ...
@overload
def from_native(
native_object: IntoSeriesT,
*,
strict: Literal[False],
eager_only: None = ...,
eager_or_interchange_only: None = ...,
series_only: Literal[True],
allow_series: None = ...,
) -> Series: ...
@overload
def from_native(
native_object: IntoFrameT,
*,
strict: Literal[False],
eager_only: None = ...,
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: None = ...,
) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ...
@overload
def from_native(
native_object: T,
*,
strict: Literal[False],
eager_only: None = ...,
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: None = ...,
) -> T: ...
@overload
def from_native(
native_object: IntoDataFrameT,
*,
strict: Literal[True] = ...,
eager_only: None = ...,
eager_or_interchange_only: Literal[True],
series_only: None = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]:
"""
from_native(df, strict=True, eager_or_interchange_only=True)
from_native(df, eager_or_interchange_only=True)
"""
@overload
def from_native(
native_object: IntoDataFrameT,
*,
strict: Literal[True] = ...,
eager_only: Literal[True],
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]:
"""
from_native(df, strict=True, eager_only=True)
from_native(df, eager_only=True)
"""
@overload
def from_native(
native_object: IntoFrameT | IntoSeriesT,
*,
strict: Literal[True] = ...,
eager_only: None = ...,
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: Literal[True],
) -> DataFrame[Any] | LazyFrame[Any] | Series:
"""
from_native(df, strict=True, allow_series=True)
from_native(df, allow_series=True)
"""
@overload
def from_native(
native_object: IntoSeriesT,
*,
strict: Literal[True] = ...,
eager_only: None = ...,
eager_or_interchange_only: None = ...,
series_only: Literal[True],
allow_series: None = ...,
) -> Series:
"""
from_native(df, strict=True, series_only=True)
from_native(df, series_only=True)
"""
@overload
def from_native(
native_object: IntoFrameT,
*,
strict: Literal[True] = ...,
eager_only: None = ...,
eager_or_interchange_only: None = ...,
series_only: None = ...,
allow_series: None = ...,
) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]:
"""
from_native(df, strict=True)
from_native(df)
"""
# All params passed in as variables
@overload
def from_native(
native_object: Any,
*,
strict: bool,
eager_only: bool | None,
eager_or_interchange_only: bool | None = None,
series_only: bool | None,
allow_series: bool | None,
) -> Any: ...
def from_native(
native_object: Any,
*,
strict: bool = True,
eager_only: bool | None = None,
eager_or_interchange_only: bool | None = None,
series_only: bool | None = None,
allow_series: bool | None = None,
) -> Any:
"""
Convert dataframe/series to Narwhals DataFrame, LazyFrame, or Series.
Arguments:
native_object: Raw object from user.
Depending on the other arguments, input object can be:
- pandas.DataFrame
- polars.DataFrame
- polars.LazyFrame
- anything with a `__narwhals_dataframe__` or `__narwhals_lazyframe__` method
- pandas.Series
- polars.Series
- anything with a `__narwhals_series__` method
strict: Whether to raise if object can't be converted (default) or
to just leave it as-is.
eager_only: Whether to only allow eager objects.
eager_or_interchange_only: Whether to only allow eager objects or objects which
implement the Dataframe Interchange Protocol.
series_only: Whether to only allow series.
allow_series: Whether to allow series (default is only dataframe / lazyframe).
Returns:
narwhals.DataFrame or narwhals.LazyFrame or narwhals.Series
"""
from narwhals import dtypes
return _from_native_impl(
native_object,
strict=strict,
eager_only=eager_only,
eager_or_interchange_only=eager_or_interchange_only,
series_only=series_only,
allow_series=allow_series,
dtypes=dtypes, # type: ignore[arg-type]
)
def _from_native_impl( # noqa: PLR0915
native_object: Any,
*,
strict: bool = True,
eager_only: bool | None = None,
eager_or_interchange_only: bool | None = None,
series_only: bool | None = None,
allow_series: bool | None = None,
dtypes: DTypes,
) -> Any:
from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.series import ArrowSeries
from narwhals._dask.dataframe import DaskLazyFrame
from narwhals._duckdb.dataframe import DuckDBInterchangeFrame
from narwhals._ibis.dataframe import IbisInterchangeFrame
from narwhals._interchange.dataframe import InterchangeFrame
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._polars.dataframe import PolarsDataFrame
from narwhals._polars.dataframe import PolarsLazyFrame
from narwhals._polars.series import PolarsSeries
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.series import Series
from narwhals.utils import Implementation
from narwhals.utils import parse_version
# Early returns
if isinstance(native_object, (DataFrame, LazyFrame)) and not series_only:
return native_object
if isinstance(native_object, Series) and (series_only or allow_series):
return native_object
if series_only:
if allow_series is False:
msg = "Invalid parameter combination: `series_only=True` and `allow_series=False`"
raise ValueError(msg)
allow_series = True
if eager_only and eager_or_interchange_only:
msg = "Invalid parameter combination: `eager_only=True` and `eager_or_interchange_only=True`"
raise ValueError(msg)
# Extensions
if hasattr(native_object, "__narwhals_dataframe__"):
if series_only:
if strict:
msg = "Cannot only use `series_only` with dataframe"
raise TypeError(msg)
return native_object
return DataFrame(
native_object.__narwhals_dataframe__(),
level="full",
)
elif hasattr(native_object, "__narwhals_lazyframe__"):
if series_only:
if strict:
msg = "Cannot only use `series_only` with lazyframe"
raise TypeError(msg)
return native_object
if eager_only or eager_or_interchange_only:
if strict:
msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe"
raise TypeError(msg)
return native_object
return LazyFrame(
native_object.__narwhals_lazyframe__(),
level="full",
)
elif hasattr(native_object, "__narwhals_series__"):
if not allow_series:
if strict:
msg = "Please set `allow_series=True` or `series_only=True`"
raise TypeError(msg)
return native_object
return Series(
native_object.__narwhals_series__(),
level="full",
)
# Polars
elif is_polars_dataframe(native_object):
if series_only:
if strict:
msg = "Cannot only use `series_only` with polars.DataFrame"
raise TypeError(msg)
return native_object
pl = get_polars()
return DataFrame(
PolarsDataFrame(
native_object,
backend_version=parse_version(pl.__version__),
dtypes=dtypes,
),
level="full",
)
elif is_polars_lazyframe(native_object):
if series_only:
if strict:
msg = "Cannot only use `series_only` with polars.LazyFrame"
raise TypeError(msg)
return native_object
if eager_only or eager_or_interchange_only:
if strict:
msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with polars.LazyFrame"
raise TypeError(msg)
return native_object
pl = get_polars()
return LazyFrame(
PolarsLazyFrame(
native_object,
backend_version=parse_version(pl.__version__),
dtypes=dtypes,
),
level="full",
)
elif is_polars_series(native_object):
pl = get_polars()
if not allow_series:
if strict:
msg = "Please set `allow_series=True` or `series_only=True`"
raise TypeError(msg)
return native_object
return Series(
PolarsSeries(
native_object,
backend_version=parse_version(pl.__version__),
dtypes=dtypes,
),
level="full",
)
# pandas
elif is_pandas_dataframe(native_object):
if series_only:
if strict:
msg = "Cannot only use `series_only` with dataframe"
raise TypeError(msg)
return native_object
pd = get_pandas()
return DataFrame(
PandasLikeDataFrame(
native_object,
backend_version=parse_version(pd.__version__),
implementation=Implementation.PANDAS,
dtypes=dtypes,
),
level="full",
)
elif is_pandas_series(native_object):
if not allow_series:
if strict:
msg = "Please set `allow_series=True` or `series_only=True`"
raise TypeError(msg)
return native_object
pd = get_pandas()
return Series(
PandasLikeSeries(
native_object,
implementation=Implementation.PANDAS,
backend_version=parse_version(pd.__version__),
dtypes=dtypes,
),
level="full",
)
# Modin
elif is_modin_dataframe(native_object): # pragma: no cover
mpd = get_modin()
if series_only:
if strict:
msg = "Cannot only use `series_only` with modin.DataFrame"
raise TypeError(msg)
return native_object
return DataFrame(
PandasLikeDataFrame(
native_object,
implementation=Implementation.MODIN,
backend_version=parse_version(mpd.__version__),
dtypes=dtypes,
),
level="full",
)
elif is_modin_series(native_object): # pragma: no cover
mpd = get_modin()
if not allow_series:
if strict:
msg = "Please set `allow_series=True` or `series_only=True`"
raise TypeError(msg)
return native_object
return Series(
PandasLikeSeries(
native_object,
implementation=Implementation.MODIN,
backend_version=parse_version(mpd.__version__),
dtypes=dtypes,
),
level="full",
)
# cuDF
elif is_cudf_dataframe(native_object): # pragma: no cover
cudf = get_cudf()
if series_only:
if strict:
msg = "Cannot only use `series_only` with cudf.DataFrame"
raise TypeError(msg)
return native_object
return DataFrame(
PandasLikeDataFrame(
native_object,
implementation=Implementation.CUDF,
backend_version=parse_version(cudf.__version__),
dtypes=dtypes,
),
level="full",
)
elif is_cudf_series(native_object): # pragma: no cover
cudf = get_cudf()
if not allow_series:
if strict:
msg = "Please set `allow_series=True` or `series_only=True`"
raise TypeError(msg)
return native_object
return Series(
PandasLikeSeries(
native_object,
implementation=Implementation.CUDF,
backend_version=parse_version(cudf.__version__),
dtypes=dtypes,
),
level="full",
)
# PyArrow
elif is_pyarrow_table(native_object):
pa = get_pyarrow()
if series_only:
if strict:
msg = "Cannot only use `series_only` with arrow table"
raise TypeError(msg)
return native_object
return DataFrame(
ArrowDataFrame(
native_object,
backend_version=parse_version(pa.__version__),
dtypes=dtypes,
),
level="full",
)
elif is_pyarrow_chunked_array(native_object):
pa = get_pyarrow()
if not allow_series:
if strict:
msg = "Please set `allow_series=True` or `series_only=True`"
raise TypeError(msg)
return native_object
return Series(
ArrowSeries(
native_object,
backend_version=parse_version(pa.__version__),
name="",
dtypes=dtypes,
),
level="full",
)
# Dask
elif is_dask_dataframe(native_object):
if series_only:
if strict:
msg = "Cannot only use `series_only` with dask DataFrame"
raise TypeError(msg)
return native_object
if eager_only or eager_or_interchange_only:
if strict:
msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with dask DataFrame"
raise TypeError(msg)
return native_object
if get_dask_expr() is None: # pragma: no cover
msg = "Please install dask-expr"
raise ImportError(msg)
return LazyFrame(
DaskLazyFrame(
native_object,
backend_version=parse_version(get_dask().__version__),
dtypes=dtypes,
),
level="full",
)
# DuckDB
elif is_duckdb_relation(native_object):
if eager_only or series_only: # pragma: no cover
if strict:
msg = (
"Cannot only use `series_only=True` or `eager_only=False` "
"with DuckDB Relation"
)
else:
return native_object
raise TypeError(msg)
return DataFrame(
DuckDBInterchangeFrame(native_object, dtypes=dtypes),
level="interchange",
)
# Ibis
elif is_ibis_table(native_object): # pragma: no cover
if eager_only or series_only:
if strict:
msg = (
"Cannot only use `series_only=True` or `eager_only=False` "
"with Ibis table"
)
raise TypeError(msg)
return native_object
return DataFrame(
IbisInterchangeFrame(native_object, dtypes=dtypes),
level="interchange",
)
# Interchange protocol
elif hasattr(native_object, "__dataframe__"):
if eager_only or series_only:
if strict:
msg = (
"Cannot only use `series_only=True` or `eager_only=False` "
"with object which only implements __dataframe__"
)
raise TypeError(msg)
return native_object
return DataFrame(
InterchangeFrame(native_object, dtypes=dtypes),
level="interchange",
)
elif strict:
msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(native_object)}"
raise TypeError(msg)
return native_object
def get_native_namespace(obj: Any) -> Any:
"""
Get native namespace from object.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
>>> nw.get_native_namespace(df)
<module 'pandas'...>
>>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))
>>> nw.get_native_namespace(df)
<module 'polars'...>
"""
return obj.__native_namespace__()
def narwhalify(
func: Callable[..., Any] | None = None,
*,
strict: bool = False,
eager_only: bool | None = False,
eager_or_interchange_only: bool | None = False,
series_only: bool | None = False,
allow_series: bool | None = True,
) -> Callable[..., Any]:
"""
Decorate function so it becomes dataframe-agnostic.
`narwhalify` will try to convert any dataframe/series-like object into the narwhal
respective DataFrame/Series, while leaving the other parameters as they are.
Similarly, if the output of the function is a narwhals DataFrame or Series, it will be
converted back to the original dataframe/series type, while if the output is another
type it will be left as is.
By setting `strict=True`, then every input and every output will be required to be a
dataframe/series-like object.
Instead of writing
```python
import narwhals as nw
def func(df):
df = nw.from_native(df, strict=False)
df = df.group_by("a").agg(nw.col("b").sum())
return nw.to_native(df)
```
you can just write
```python
import narwhals as nw
@nw.narwhalify
def func(df):
return df.group_by("a").agg(nw.col("b").sum())
```
You can also pass in extra arguments, e.g.
```python
@nw.narwhalify(eager_only=True)
```
that will get passed down to `nw.from_native`.
Arguments:
func: Function to wrap in a `from_native`-`to_native` block.
strict: Whether to raise if object can't be converted or to just leave it as-is
(default).
eager_only: Whether to only allow eager objects.
eager_or_interchange_only: Whether to only allow eager objects or objects which
implement the Dataframe Interchange Protocol.
series_only: Whether to only allow series.
allow_series: Whether to allow series (default is only dataframe / lazyframe).
"""
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
args = [
from_native(
arg,
strict=strict,
eager_only=eager_only,
eager_or_interchange_only=eager_or_interchange_only,
series_only=series_only,
allow_series=allow_series,
)
for arg in args
] # type: ignore[assignment]
kwargs = {
name: from_native(
value,
strict=strict,
eager_only=eager_only,
eager_or_interchange_only=eager_or_interchange_only,
series_only=series_only,
allow_series=allow_series,
)
for name, value in kwargs.items()
}
backends = {
b()
for v in (*args, *kwargs.values())
if (b := getattr(v, "__native_namespace__", None))
}
if len(backends) > 1:
msg = "Found multiple backends. Make sure that all dataframe/series inputs come from the same backend."
raise ValueError(msg)
result = func(*args, **kwargs)
return to_native(result, strict=strict)
return wrapper
if func is None:
return decorator
else:
# If func is not None, it means the decorator is used without arguments
return decorator(func)
def to_py_scalar(scalar_like: Any) -> Any:
"""If a scalar is not Python native, converts it to Python native.
Raises:
ValueError: If the object is not convertible to a scalar.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
>>> nw.to_py_scalar(df["a"].item(0))
1
>>> import pyarrow as pa
>>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
>>> nw.to_py_scalar(df["a"].item(0))
1
>>> nw.to_py_scalar(1)
1
"""
if scalar_like is None:
return None
if isinstance(scalar_like, NON_TEMPORAL_SCALAR_TYPES):
return scalar_like
np = get_numpy()
if np and np.isscalar(scalar_like) and hasattr(scalar_like, "item"):
return scalar_like.item()
pd = get_pandas()
if pd and isinstance(scalar_like, pd.Timestamp):
return scalar_like.to_pydatetime()
if pd and isinstance(scalar_like, pd.Timedelta):
return scalar_like.to_pytimedelta()
if pd and pd.api.types.is_scalar(scalar_like):
try:
is_na = pd.isna(scalar_like)
except Exception: # pragma: no cover # noqa: BLE001, S110
pass
else:
if is_na:
return None
# pd.Timestamp and pd.Timedelta subclass datetime and timedelta,
# so we need to check this separately
if isinstance(scalar_like, (datetime, timedelta)):
return scalar_like
pa = get_pyarrow()
if pa and isinstance(scalar_like, pa.Scalar):
return scalar_like.as_py()
cupy = get_cupy()
if ( # pragma: no cover
cupy and isinstance(scalar_like, cupy.ndarray) and scalar_like.size == 1
):
return scalar_like.item()
msg = (
f"Expected object convertible to a scalar, found {type(scalar_like)}. "
"Please report a bug to https://github.com/narwhals-dev/narwhals/issues"
)
raise ValueError(msg)
__all__ = [
"get_native_namespace",
"to_native",
"narwhalify",
"to_py_scalar",
]