Files
Buffteks-Website/venv/lib/python3.12/site-packages/narwhals/_polars/series.py
2025-05-08 21:10:14 -05:00

788 lines
25 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Iterator
from typing import Mapping
from typing import Sequence
from typing import cast
from typing import overload
import polars as pl
from narwhals._polars.utils import catch_polars_exception
from narwhals._polars.utils import extract_args_kwargs
from narwhals._polars.utils import extract_native
from narwhals._polars.utils import narwhals_to_native_dtype
from narwhals._polars.utils import native_to_narwhals_dtype
from narwhals.dependencies import is_numpy_array_1d
from narwhals.utils import Implementation
from narwhals.utils import requires
from narwhals.utils import validate_backend_version
if TYPE_CHECKING:
from types import ModuleType
from typing import TypeVar
import pandas as pd
import pyarrow as pa
from typing_extensions import Self
from typing_extensions import TypeIs
from narwhals._polars.dataframe import Method
from narwhals._polars.dataframe import PolarsDataFrame
from narwhals._polars.expr import PolarsExpr
from narwhals._polars.namespace import PolarsNamespace
from narwhals.dtypes import DType
from narwhals.series import Series
from narwhals.typing import Into1DArray
from narwhals.typing import MultiIndexSelector
from narwhals.typing import _1DArray
from narwhals.utils import Version
from narwhals.utils import _FullContext
T = TypeVar("T")
# Series methods where PolarsSeries just defers to Polars.Series directly.
INHERITED_METHODS = frozenset(
[
"__add__",
"__and__",
"__floordiv__",
"__invert__",
"__iter__",
"__mod__",
"__mul__",
"__or__",
"__pow__",
"__radd__",
"__rand__",
"__rfloordiv__",
"__rmod__",
"__rmul__",
"__ror__",
"__rsub__",
"__rtruediv__",
"__sub__",
"__truediv__",
"abs",
"all",
"any",
"arg_max",
"arg_min",
"arg_true",
"clip",
"count",
"cum_max",
"cum_min",
"cum_prod",
"cum_sum",
"diff",
"drop_nulls",
"fill_null",
"filter",
"gather_every",
"head",
"is_between",
"is_finite",
"is_first_distinct",
"is_in",
"is_last_distinct",
"is_null",
"is_sorted",
"is_unique",
"item",
"len",
"max",
"mean",
"min",
"mode",
"n_unique",
"null_count",
"quantile",
"rank",
"round",
"sample",
"shift",
"skew",
"std",
"sum",
"tail",
"to_arrow",
"to_frame",
"to_list",
"to_pandas",
"unique",
"var",
"zip_with",
]
)
class PolarsSeries:
def __init__(
self,
series: pl.Series,
*,
backend_version: tuple[int, ...],
version: Version,
) -> None:
self._native_series: pl.Series = series
self._backend_version = backend_version
self._implementation = Implementation.POLARS
self._version = version
validate_backend_version(self._implementation, self._backend_version)
def __repr__(self) -> str: # pragma: no cover
return "PolarsSeries"
def __narwhals_namespace__(self) -> PolarsNamespace:
from narwhals._polars.namespace import PolarsNamespace
return PolarsNamespace(
backend_version=self._backend_version, version=self._version
)
def __narwhals_series__(self) -> Self:
return self
def __native_namespace__(self) -> ModuleType:
if self._implementation is Implementation.POLARS:
return self._implementation.to_native_namespace()
msg = f"Expected polars, got: {type(self._implementation)}" # pragma: no cover
raise AssertionError(msg)
def _with_version(self, version: Version) -> Self:
return self.__class__(
self.native, backend_version=self._backend_version, version=version
)
@classmethod
def from_iterable(
cls,
data: Iterable[Any],
*,
context: _FullContext,
name: str = "",
dtype: DType | type[DType] | None = None,
) -> Self:
version = context._version
backend_version = context._backend_version
dtype_pl = (
narwhals_to_native_dtype(dtype, version, backend_version) if dtype else None
)
# NOTE: `Iterable` is fine, annotation is overly narrow
# https://github.com/pola-rs/polars/blob/82d57a4ee41f87c11ca1b1af15488459727efdd7/py-polars/polars/series/series.py#L332-L333
native = pl.Series(name=name, values=cast("Sequence[Any]", data), dtype=dtype_pl)
return cls.from_native(native, context=context)
@staticmethod
def _is_native(obj: pl.Series | Any) -> TypeIs[pl.Series]:
return isinstance(obj, pl.Series)
@classmethod
def from_native(cls, data: pl.Series, /, *, context: _FullContext) -> Self:
return cls(
data, backend_version=context._backend_version, version=context._version
)
@classmethod
def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self:
native = pl.Series(data if is_numpy_array_1d(data) else [data])
return cls.from_native(native, context=context)
def to_narwhals(self) -> Series[pl.Series]:
return self._version.series(self, level="full")
def _with_native(self, series: pl.Series) -> Self:
return self.__class__(
series, backend_version=self._backend_version, version=self._version
)
@overload
def _from_native_object(self, series: pl.Series) -> Self: ...
@overload
def _from_native_object(self, series: pl.DataFrame) -> PolarsDataFrame: ...
@overload
def _from_native_object(self, series: T) -> T: ...
def _from_native_object(
self, series: pl.Series | pl.DataFrame | T
) -> Self | PolarsDataFrame | T:
if self._is_native(series):
return self._with_native(series)
if isinstance(series, pl.DataFrame):
from narwhals._polars.dataframe import PolarsDataFrame
return PolarsDataFrame.from_native(series, context=self)
# scalar
return series
def _to_expr(self) -> PolarsExpr:
return self.__narwhals_namespace__()._expr._from_series(self)
def __getattr__(self, attr: str) -> Any:
if attr not in INHERITED_METHODS:
msg = f"{self.__class__.__name__} has not attribute '{attr}'."
raise AttributeError(msg)
def func(*args: Any, **kwargs: Any) -> Any:
pos, kwds = extract_args_kwargs(args, kwargs)
return self._from_native_object(getattr(self.native, attr)(*pos, **kwds))
return func
def __len__(self) -> int:
return len(self.native)
@property
def name(self) -> str:
return self.native.name
@property
def dtype(self) -> DType:
return native_to_narwhals_dtype(
self.native.dtype, self._version, self._backend_version
)
@property
def native(self) -> pl.Series:
return self._native_series
def alias(self, name: str) -> Self:
return self._from_native_object(self.native.alias(name))
def __getitem__(self, item: MultiIndexSelector[Self]) -> Any | Self:
if isinstance(item, PolarsSeries):
return self._from_native_object(self.native.__getitem__(item.native))
return self._from_native_object(self.native.__getitem__(item))
def cast(self, dtype: DType | type[DType]) -> Self:
dtype_pl = narwhals_to_native_dtype(dtype, self._version, self._backend_version)
return self._with_native(self.native.cast(dtype_pl))
@requires.backend_version((1,))
def replace_strict(
self,
old: Sequence[Any] | Mapping[Any, Any],
new: Sequence[Any],
*,
return_dtype: DType | type[DType] | None,
) -> Self:
ser = self.native
dtype = (
narwhals_to_native_dtype(return_dtype, self._version, self._backend_version)
if return_dtype
else None
)
return self._with_native(ser.replace_strict(old, new, return_dtype=dtype))
def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
return self.__array__(dtype, copy=copy)
def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray:
if self._backend_version < (0, 20, 29):
return self.native.__array__(dtype=dtype)
return self.native.__array__(dtype=dtype, copy=copy)
def __eq__(self, other: object) -> Self: # type: ignore[override]
return self._with_native(self.native.__eq__(extract_native(other)))
def __ne__(self, other: object) -> Self: # type: ignore[override]
return self._with_native(self.native.__ne__(extract_native(other)))
# NOTE: `pyright` is being reasonable here
def __ge__(self, other: Any) -> Self:
return self._with_native(self.native.__ge__(extract_native(other))) # pyright: ignore[reportArgumentType]
def __gt__(self, other: Any) -> Self:
return self._with_native(self.native.__gt__(extract_native(other))) # pyright: ignore[reportArgumentType]
def __le__(self, other: Any) -> Self:
return self._with_native(self.native.__le__(extract_native(other))) # pyright: ignore[reportArgumentType]
def __lt__(self, other: Any) -> Self:
return self._with_native(self.native.__lt__(extract_native(other))) # pyright: ignore[reportArgumentType]
def __rpow__(self, other: PolarsSeries | Any) -> Self:
result = self.native.__rpow__(extract_native(other))
if self._backend_version < (1, 16, 1):
# Explicitly set alias to work around https://github.com/pola-rs/polars/issues/20071
result = result.alias(self.name)
return self._with_native(result)
def is_nan(self) -> Self:
try:
native_is_nan = self.native.is_nan()
except Exception as e: # noqa: BLE001
raise catch_polars_exception(e, self._backend_version) from None
if self._backend_version < (1, 18): # pragma: no cover
select = pl.when(self.native.is_not_null()).then(native_is_nan)
return self._with_native(pl.select(select)[self.name])
return self._with_native(native_is_nan)
def median(self) -> Any:
from narwhals.exceptions import InvalidOperationError
if not self.dtype.is_numeric():
msg = "`median` operation not supported for non-numeric input type."
raise InvalidOperationError(msg)
return self.native.median()
def to_dummies(self, *, separator: str, drop_first: bool) -> PolarsDataFrame:
from narwhals._polars.dataframe import PolarsDataFrame
if self._backend_version < (0, 20, 15):
has_nulls = self.native.is_null().any()
result = self.native.to_dummies(separator=separator)
output_columns = result.columns
if drop_first:
_ = output_columns.pop(int(has_nulls))
result = result.select(output_columns)
else:
result = self.native.to_dummies(separator=separator, drop_first=drop_first)
result = result.with_columns(pl.all().cast(pl.Int8))
return PolarsDataFrame.from_native(result, context=self)
def ewm_mean(
self,
*,
com: float | None,
span: float | None,
half_life: float | None,
alpha: float | None,
adjust: bool,
min_samples: int,
ignore_nulls: bool,
) -> Self:
extra_kwargs = (
{"min_periods": min_samples}
if self._backend_version < (1, 21, 0)
else {"min_samples": min_samples}
)
native_result = self.native.ewm_mean(
com=com,
span=span,
half_life=half_life,
alpha=alpha,
adjust=adjust,
ignore_nulls=ignore_nulls,
**extra_kwargs,
)
if self._backend_version < (1,): # pragma: no cover
return self._with_native(
pl.select(
pl.when(~self.native.is_null()).then(native_result).otherwise(None)
)[self.native.name]
)
return self._with_native(native_result)
@requires.backend_version((1,))
def rolling_var(
self,
window_size: int,
*,
min_samples: int,
center: bool,
ddof: int,
) -> Self:
extra_kwargs: dict[str, Any] = (
{"min_periods": min_samples}
if self._backend_version < (1, 21, 0)
else {"min_samples": min_samples}
)
return self._with_native(
self.native.rolling_var(
window_size=window_size, center=center, ddof=ddof, **extra_kwargs
)
)
@requires.backend_version((1,))
def rolling_std(
self,
window_size: int,
*,
min_samples: int,
center: bool,
ddof: int,
) -> Self:
extra_kwargs: dict[str, Any] = (
{"min_periods": min_samples}
if self._backend_version < (1, 21, 0)
else {"min_samples": min_samples}
)
return self._with_native(
self.native.rolling_std(
window_size=window_size, center=center, ddof=ddof, **extra_kwargs
)
)
def rolling_sum(
self,
window_size: int,
*,
min_samples: int,
center: bool,
) -> Self:
extra_kwargs: dict[str, Any] = (
{"min_periods": min_samples}
if self._backend_version < (1, 21, 0)
else {"min_samples": min_samples}
)
return self._with_native(
self.native.rolling_sum(
window_size=window_size, center=center, **extra_kwargs
)
)
def rolling_mean(
self,
window_size: int,
*,
min_samples: int,
center: bool,
) -> Self:
extra_kwargs: dict[str, Any] = (
{"min_periods": min_samples}
if self._backend_version < (1, 21, 0)
else {"min_samples": min_samples}
)
return self._with_native(
self.native.rolling_mean(
window_size=window_size, center=center, **extra_kwargs
)
)
def sort(self, *, descending: bool, nulls_last: bool) -> Self:
if self._backend_version < (0, 20, 6):
result = self.native.sort(descending=descending)
if nulls_last:
is_null = result.is_null()
result = pl.concat([result.filter(~is_null), result.filter(is_null)])
else:
result = self.native.sort(descending=descending, nulls_last=nulls_last)
return self._with_native(result)
def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
s = self.native.clone().scatter(indices, extract_native(values))
return self._with_native(s)
def value_counts(
self,
*,
sort: bool,
parallel: bool,
name: str | None,
normalize: bool,
) -> PolarsDataFrame:
from narwhals._polars.dataframe import PolarsDataFrame
if self._backend_version < (1, 0, 0):
value_name_ = name or ("proportion" if normalize else "count")
result = self.native.value_counts(sort=sort, parallel=parallel).select(
**{
(self.native.name): pl.col(self.native.name),
value_name_: pl.col("count") / pl.sum("count")
if normalize
else pl.col("count"),
}
)
else:
result = self.native.value_counts(
sort=sort, parallel=parallel, name=name, normalize=normalize
)
return PolarsDataFrame.from_native(result, context=self)
def cum_count(self, *, reverse: bool) -> Self:
if self._backend_version < (0, 20, 4):
not_null_series = ~self.native.is_null()
result = not_null_series.cum_sum(reverse=reverse)
else:
result = self.native.cum_count(reverse=reverse)
return self._with_native(result)
def __contains__(self, other: Any) -> bool:
try:
return self.native.__contains__(other)
except Exception as e: # noqa: BLE001
raise catch_polars_exception(e, self._backend_version) from None
def hist( # noqa: C901, PLR0912
self,
bins: list[float | int] | None,
*,
bin_count: int | None,
include_breakpoint: bool,
) -> PolarsDataFrame:
from narwhals._polars.dataframe import PolarsDataFrame
if (bins is not None and len(bins) <= 1) or (bin_count == 0): # pragma: no cover
data: list[pl.Series] = []
if include_breakpoint:
data.append(pl.Series("breakpoint", [], dtype=pl.Float64))
data.append(pl.Series("count", [], dtype=pl.UInt32))
return PolarsDataFrame.from_native(pl.DataFrame(data), context=self)
if self.native.count() < 1:
data_dict: dict[str, Sequence[Any] | pl.Series]
if bins is not None:
data_dict = {
"breakpoint": bins[1:],
"count": pl.zeros(n=len(bins) - 1, dtype=pl.Int64, eager=True),
}
elif (bin_count is not None) and bin_count == 1:
data_dict = {"breakpoint": [1.0], "count": [0]}
elif (bin_count is not None) and bin_count > 1:
data_dict = {
"breakpoint": pl.int_range(1, bin_count + 1, eager=True) / bin_count,
"count": pl.zeros(n=bin_count, dtype=pl.Int64, eager=True),
}
else: # pragma: no cover
msg = (
"congratulations, you entered unreachable code - please report a bug"
)
raise AssertionError(msg)
if not include_breakpoint:
del data_dict["breakpoint"]
return PolarsDataFrame.from_native(pl.DataFrame(data_dict), context=self)
# polars <1.15 does not adjust the bins when they have equivalent min/max
# polars <1.5 with bin_count=...
# returns bins that range from -inf to +inf and has bin_count + 1 bins.
# for compat: convert `bin_count=` call to `bins=`
if (self._backend_version < (1, 15)) and (
bin_count is not None
): # pragma: no cover
lower = cast("float", self.native.min())
upper = cast("float", self.native.max())
if lower == upper:
width = 1 / bin_count
lower -= 0.5
upper += 0.5
else:
width = (upper - lower) / bin_count
bins = (pl.int_range(0, bin_count + 1, eager=True) * width + lower).to_list()
bin_count = None
# Polars inconsistently handles NaN values when computing histograms
# against predefined bins: https://github.com/pola-rs/polars/issues/21082
series = self.native
if self._backend_version < (1, 15) or bins is not None:
series = series.set(series.is_nan(), None)
df = series.hist(
bins,
bin_count=bin_count,
include_category=False,
include_breakpoint=include_breakpoint,
)
if not include_breakpoint:
df.columns = ["count"]
if self._backend_version < (1, 0) and include_breakpoint:
df = df.rename({"break_point": "breakpoint"})
# polars<1.15 implicitly adds -inf and inf to either end of bins
if self._backend_version < (1, 15) and bins is not None: # pragma: no cover
r = pl.int_range(0, len(df))
df = df.filter((r > 0) & (r < len(df) - 1))
# polars<1.27 makes the lowest bin a left/right closed interval.
if self._backend_version < (1, 27) and bins is not None:
df[0, "count"] += (series == bins[0]).sum()
return PolarsDataFrame.from_native(df, context=self)
def to_polars(self) -> pl.Series:
return self.native
@property
def dt(self) -> PolarsSeriesDateTimeNamespace:
return PolarsSeriesDateTimeNamespace(self)
@property
def str(self) -> PolarsSeriesStringNamespace:
return PolarsSeriesStringNamespace(self)
@property
def cat(self) -> PolarsSeriesCatNamespace:
return PolarsSeriesCatNamespace(self)
@property
def struct(self) -> PolarsSeriesStructNamespace:
return PolarsSeriesStructNamespace(self)
__add__: Method[Self]
__and__: Method[Self]
__floordiv__: Method[Self]
__invert__: Method[Self]
__iter__: Method[Iterator[Any]]
__mod__: Method[Self]
__mul__: Method[Self]
__or__: Method[Self]
__pow__: Method[Self]
__radd__: Method[Self]
__rand__: Method[Self]
__rfloordiv__: Method[Self]
__rmod__: Method[Self]
__rmul__: Method[Self]
__ror__: Method[Self]
__rsub__: Method[Self]
__rtruediv__: Method[Self]
__sub__: Method[Self]
__truediv__: Method[Self]
abs: Method[Self]
all: Method[bool]
any: Method[bool]
arg_max: Method[int]
arg_min: Method[int]
arg_true: Method[Self]
clip: Method[Self]
count: Method[int]
cum_max: Method[Self]
cum_min: Method[Self]
cum_prod: Method[Self]
cum_sum: Method[Self]
diff: Method[Self]
drop_nulls: Method[Self]
fill_null: Method[Self]
filter: Method[Self]
gather_every: Method[Self]
head: Method[Self]
is_between: Method[Self]
is_finite: Method[Self]
is_first_distinct: Method[Self]
is_in: Method[Self]
is_last_distinct: Method[Self]
is_null: Method[Self]
is_sorted: Method[bool]
is_unique: Method[Self]
item: Method[Any]
len: Method[int]
max: Method[Any]
mean: Method[float]
min: Method[Any]
mode: Method[Self]
n_unique: Method[int]
null_count: Method[int]
quantile: Method[float]
rank: Method[Self]
round: Method[Self]
sample: Method[Self]
shift: Method[Self]
skew: Method[float | None]
std: Method[float]
sum: Method[float]
tail: Method[Self]
to_arrow: Method[pa.Array[Any]]
to_frame: Method[PolarsDataFrame]
to_list: Method[list[Any]]
to_pandas: Method[pd.Series[Any]]
unique: Method[Self]
var: Method[float]
zip_with: Method[Self]
@property
def list(self) -> PolarsSeriesListNamespace:
return PolarsSeriesListNamespace(self)
class PolarsSeriesDateTimeNamespace:
def __init__(self, series: PolarsSeries) -> None:
self._compliant_series = series
def __getattr__(self, attr: str) -> Any:
def func(*args: Any, **kwargs: Any) -> Any:
pos, kwds = extract_args_kwargs(args, kwargs)
return self._compliant_series._with_native(
getattr(self._compliant_series.native.dt, attr)(*pos, **kwds)
)
return func
class PolarsSeriesStringNamespace:
def __init__(self, series: PolarsSeries) -> None:
self._compliant_series = series
def __getattr__(self, attr: str) -> Any:
def func(*args: Any, **kwargs: Any) -> Any:
pos, kwds = extract_args_kwargs(args, kwargs)
return self._compliant_series._with_native(
getattr(self._compliant_series.native.str, attr)(*pos, **kwds)
)
return func
class PolarsSeriesCatNamespace:
def __init__(self, series: PolarsSeries) -> None:
self._compliant_series = series
def __getattr__(self, attr: str) -> Any:
def func(*args: Any, **kwargs: Any) -> Any:
pos, kwds = extract_args_kwargs(args, kwargs)
return self._compliant_series._with_native(
getattr(self._compliant_series.native.cat, attr)(*pos, **kwds)
)
return func
class PolarsSeriesListNamespace:
def __init__(self, series: PolarsSeries) -> None:
self._series = series
def len(self) -> PolarsSeries:
native_series = self._series.native
native_result = native_series.list.len()
if self._series._backend_version < (1, 16): # pragma: no cover
native_result = pl.select(
pl.when(~native_series.is_null()).then(native_result).otherwise(None)
)[native_series.name].cast(pl.UInt32())
elif self._series._backend_version < (1, 17): # pragma: no cover
native_result = native_series.cast(pl.UInt32())
return self._series._with_native(native_result)
# TODO(FBruzzesi): Remove `pragma: no cover` once other namespace methods are added
def __getattr__(self, attr: str) -> Any: # pragma: no cover
def func(*args: Any, **kwargs: Any) -> Any:
pos, kwds = extract_args_kwargs(args, kwargs)
return self._series._with_native(
getattr(self._series.native.list, attr)(*pos, **kwds)
)
return func
class PolarsSeriesStructNamespace:
def __init__(self, series: PolarsSeries) -> None:
self._compliant_series = series
def __getattr__(self, attr: str) -> Any:
def func(*args: Any, **kwargs: Any) -> Any:
pos, kwds = extract_args_kwargs(args, kwargs)
return self._compliant_series._with_native(
getattr(self._compliant_series.native.struct, attr)(*pos, **kwds)
)
return func