Files
Buffteks-Website/buffteks/lib/python3.12/site-packages/narwhals/_pandas_like/series.py
2025-05-08 21:10:14 -05:00

1072 lines
39 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Iterator
from typing import Literal
from typing import Mapping
from typing import Sequence
from typing import cast
from typing import overload
import numpy as np
from narwhals._compliant import EagerSeries
from narwhals._pandas_like.series_cat import PandasLikeSeriesCatNamespace
from narwhals._pandas_like.series_dt import PandasLikeSeriesDateTimeNamespace
from narwhals._pandas_like.series_list import PandasLikeSeriesListNamespace
from narwhals._pandas_like.series_str import PandasLikeSeriesStringNamespace
from narwhals._pandas_like.series_struct import PandasLikeSeriesStructNamespace
from narwhals._pandas_like.utils import align_and_extract_native
from narwhals._pandas_like.utils import get_dtype_backend
from narwhals._pandas_like.utils import narwhals_to_native_dtype
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals._pandas_like.utils import object_native_to_narwhals_dtype
from narwhals._pandas_like.utils import rename
from narwhals._pandas_like.utils import select_columns_by_name
from narwhals._pandas_like.utils import set_index
from narwhals.dependencies import is_numpy_array_1d
from narwhals.dependencies import is_numpy_scalar
from narwhals.exceptions import InvalidOperationError
from narwhals.utils import Implementation
from narwhals.utils import import_dtypes_module
from narwhals.utils import parse_version
from narwhals.utils import validate_backend_version
if TYPE_CHECKING:
from types import ModuleType
from typing import Hashable
import pandas as pd
import polars as pl
from typing_extensions import Self
from narwhals._arrow.typing import ArrowArray
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.namespace import PandasLikeNamespace
from narwhals.dtypes import DType
from narwhals.typing import Into1DArray
from narwhals.typing import _1DArray
from narwhals.typing import _AnyDArray
from narwhals.utils import Version
from narwhals.utils import _FullContext
PANDAS_TO_NUMPY_DTYPE_NO_MISSING = {
"Int64": "int64",
"int64[pyarrow]": "int64",
"Int32": "int32",
"int32[pyarrow]": "int32",
"Int16": "int16",
"int16[pyarrow]": "int16",
"Int8": "int8",
"int8[pyarrow]": "int8",
"UInt64": "uint64",
"uint64[pyarrow]": "uint64",
"UInt32": "uint32",
"uint32[pyarrow]": "uint32",
"UInt16": "uint16",
"uint16[pyarrow]": "uint16",
"UInt8": "uint8",
"uint8[pyarrow]": "uint8",
"Float64": "float64",
"float64[pyarrow]": "float64",
"Float32": "float32",
"float32[pyarrow]": "float32",
}
PANDAS_TO_NUMPY_DTYPE_MISSING = {
"Int64": "float64",
"int64[pyarrow]": "float64",
"Int32": "float64",
"int32[pyarrow]": "float64",
"Int16": "float64",
"int16[pyarrow]": "float64",
"Int8": "float64",
"int8[pyarrow]": "float64",
"UInt64": "float64",
"uint64[pyarrow]": "float64",
"UInt32": "float64",
"uint32[pyarrow]": "float64",
"UInt16": "float64",
"uint16[pyarrow]": "float64",
"UInt8": "float64",
"uint8[pyarrow]": "float64",
"Float64": "float64",
"float64[pyarrow]": "float64",
"Float32": "float32",
"float32[pyarrow]": "float32",
}
class PandasLikeSeries(EagerSeries[Any]):
def __init__(
self: Self,
native_series: Any,
*,
implementation: Implementation,
backend_version: tuple[int, ...],
version: Version,
) -> None:
self._name = native_series.name
self._native_series = native_series
self._implementation = implementation
self._backend_version = backend_version
self._version = version
validate_backend_version(self._implementation, self._backend_version)
# Flag which indicates if, in the final step before applying an operation,
# the single value behind the PandasLikeSeries should be extract and treated
# as a Scalar. For example, in `nw.col('a') - nw.lit(3)`, the latter would
# become a Series of length 1. Rather that doing a full broadcast so it matches
# the length of the whole dataframe, we just extract the scalar.
self._broadcast = False
@property
def native(self) -> Any:
return self._native_series
def __native_namespace__(self: Self) -> ModuleType:
if self._implementation.is_pandas_like():
return self._implementation.to_native_namespace()
msg = f"Expected pandas/modin/cudf, got: {type(self._implementation)}" # pragma: no cover
raise AssertionError(msg)
def __narwhals_namespace__(self) -> PandasLikeNamespace:
from narwhals._pandas_like.namespace import PandasLikeNamespace
return PandasLikeNamespace(
self._implementation, self._backend_version, self._version
)
@overload
def __getitem__(self: Self, idx: int) -> Any: ...
@overload
def __getitem__(self: Self, idx: slice | Sequence[int]) -> Self: ...
def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self:
if isinstance(idx, int) or is_numpy_scalar(idx):
return self.native.iloc[idx]
return self._with_native(self.native.iloc[idx])
def _with_version(self: Self, version: Version) -> Self:
return self.__class__(
self.native,
implementation=self._implementation,
backend_version=self._backend_version,
version=version,
)
def _with_native(
self: Self, series: Any, *, preserve_broadcast: bool = False
) -> Self:
result = self.__class__(
series,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
)
if preserve_broadcast:
result._broadcast = self._broadcast
return result
@classmethod
def from_iterable(
cls,
data: Iterable[Any],
*,
context: _FullContext,
name: str = "",
dtype: DType | type[DType] | None = None,
index: Any = None,
) -> Self:
implementation = context._implementation
backend_version = context._backend_version
version = context._version
ns = implementation.to_native_namespace()
kwds: dict[str, Any] = {}
if dtype:
kwds["dtype"] = narwhals_to_native_dtype(
dtype, None, implementation, backend_version, version
)
else:
if implementation.is_pandas():
kwds["copy"] = False
if index is not None and len(index):
kwds["index"] = index
return cls(
ns.Series(data, name=name, **kwds),
implementation=implementation,
backend_version=backend_version,
version=version,
)
@classmethod
def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self:
implementation = context._implementation
arr = data if is_numpy_array_1d(data) else [data]
return cls(
implementation.to_native_namespace().Series(arr, name=""),
implementation=implementation,
backend_version=context._backend_version,
version=context._version,
)
@property
def name(self: Self) -> str:
return self._name
@property
def dtype(self: Self) -> DType:
native_dtype = self.native.dtype
return (
native_to_narwhals_dtype(native_dtype, self._version, self._implementation)
if native_dtype != "object"
else object_native_to_narwhals_dtype(
self.native, self._version, self._implementation
)
)
def ewm_mean(
self: Self,
*,
com: float | None,
span: float | None,
half_life: float | None,
alpha: float | None,
adjust: bool,
min_samples: int,
ignore_nulls: bool,
) -> PandasLikeSeries:
ser = self.native
mask_na = ser.isna()
if self._implementation is Implementation.CUDF:
if (min_samples == 0 and not ignore_nulls) or (not mask_na.any()):
result = ser.ewm(
com=com, span=span, halflife=half_life, alpha=alpha, adjust=adjust
).mean()
else:
msg = (
"cuDF only supports `ewm_mean` when there are no missing values "
"or when both `min_period=0` and `ignore_nulls=False`"
)
raise NotImplementedError(msg)
else:
result = ser.ewm(
com, span, half_life, alpha, min_samples, adjust, ignore_na=ignore_nulls
).mean()
result[mask_na] = None
return self._with_native(result)
def scatter(self: Self, indices: int | Sequence[int], values: Any) -> Self:
if isinstance(values, self.__class__):
values = set_index(
values.native,
self.native.index[indices],
implementation=self._implementation,
backend_version=self._backend_version,
)
s = self.native.copy(deep=True)
s.iloc[indices] = values
s.name = self.name
return self._with_native(s)
def _scatter_in_place(self: Self, indices: Self, values: Self) -> None:
# Scatter, modifying original Series. Use with care!
values_native = set_index(
values.native,
self.native.index[indices.native],
implementation=self._implementation,
backend_version=self._backend_version,
)
if self._implementation is Implementation.PANDAS and parse_version(np) < (2,):
values_native = values_native.copy() # pragma: no cover
min_pd_version = (1, 2)
if (
self._implementation is Implementation.PANDAS
and self._backend_version < min_pd_version
):
self.native.iloc[indices.native.values] = values_native # noqa: PD011
else:
self.native.iloc[indices.native] = values_native
def cast(self: Self, dtype: DType | type[DType]) -> Self:
pd_dtype = narwhals_to_native_dtype(
dtype,
dtype_backend=get_dtype_backend(self.native.dtype, self._implementation),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
)
return self._with_native(self.native.astype(pd_dtype), preserve_broadcast=True)
def item(self: Self, index: int | None) -> Any:
# cuDF doesn't have Series.item().
if index is None:
if len(self) != 1:
msg = (
"can only call '.item()' if the Series is of length 1,"
f" or an explicit index is provided (Series is of length {len(self)})"
)
raise ValueError(msg)
return self.native.iloc[0]
return self.native.iloc[index]
def to_frame(self: Self) -> PandasLikeDataFrame:
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
return PandasLikeDataFrame(
self.native.to_frame(),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=False,
)
def to_list(self: Self) -> list[Any]:
is_cudf = self._implementation.is_cudf()
return self.native.to_arrow().to_pylist() if is_cudf else self.native.to_list()
def is_between(
self: Self,
lower_bound: Any,
upper_bound: Any,
closed: Literal["left", "right", "none", "both"],
) -> PandasLikeSeries:
ser = self.native
_, lower_bound = align_and_extract_native(self, lower_bound)
_, upper_bound = align_and_extract_native(self, upper_bound)
if closed == "left":
res = ser.ge(lower_bound) & ser.lt(upper_bound)
elif closed == "right":
res = ser.gt(lower_bound) & ser.le(upper_bound)
elif closed == "none":
res = ser.gt(lower_bound) & ser.lt(upper_bound)
elif closed == "both":
res = ser.ge(lower_bound) & ser.le(upper_bound)
else: # pragma: no cover
raise AssertionError
return self._with_native(res).alias(ser.name)
def is_in(self: Self, other: Any) -> PandasLikeSeries:
return self._with_native(self.native.isin(other))
def arg_true(self: Self) -> PandasLikeSeries:
ser = self.native
result = ser.__class__(range(len(ser)), name=ser.name, index=ser.index).loc[ser]
return self._with_native(result)
def arg_min(self: Self) -> int:
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return self.native.to_numpy().argmin()
return self.native.argmin()
def arg_max(self: Self) -> int:
ser = self.native
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return ser.to_numpy().argmax()
return ser.argmax()
# Binary comparisons
def filter(self: Self, predicate: Any) -> PandasLikeSeries:
if not (
isinstance(predicate, list) and all(isinstance(x, bool) for x in predicate)
):
_, other_native = align_and_extract_native(self, predicate)
else:
other_native = predicate
return self._with_native(self.native.loc[other_native]).alias(self.name)
def __eq__(self: Self, other: object) -> PandasLikeSeries: # type: ignore[override]
ser, other = align_and_extract_native(self, other)
return self._with_native(ser == other).alias(self.name)
def __ne__(self: Self, other: object) -> PandasLikeSeries: # type: ignore[override]
ser, other = align_and_extract_native(self, other)
return self._with_native(ser != other).alias(self.name)
def __ge__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser >= other).alias(self.name)
def __gt__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser > other).alias(self.name)
def __le__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser <= other).alias(self.name)
def __lt__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser < other).alias(self.name)
def __and__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser & other).alias(self.name)
def __rand__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
ser = cast("pd.Series[Any]", ser)
return self._with_native(ser.__and__(other)).alias(self.name)
def __or__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser | other).alias(self.name)
def __ror__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
ser = cast("pd.Series[Any]", ser)
return self._with_native(ser.__or__(other)).alias(self.name)
def __add__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser + other).alias(self.name)
def __radd__(self: Self, other: Any) -> PandasLikeSeries:
_, other_native = align_and_extract_native(self, other)
return self._with_native(self.native.__radd__(other_native)).alias(self.name)
def __sub__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser - other).alias(self.name)
def __rsub__(self: Self, other: Any) -> PandasLikeSeries:
_, other_native = align_and_extract_native(self, other)
return self._with_native(self.native.__rsub__(other_native)).alias(self.name)
def __mul__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser * other).alias(self.name)
def __rmul__(self: Self, other: Any) -> PandasLikeSeries:
_, other_native = align_and_extract_native(self, other)
return self._with_native(self.native.__rmul__(other_native)).alias(self.name)
def __truediv__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser / other).alias(self.name)
def __rtruediv__(self: Self, other: Any) -> PandasLikeSeries:
_, other_native = align_and_extract_native(self, other)
return self._with_native(self.native.__rtruediv__(other_native)).alias(self.name)
def __floordiv__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser // other).alias(self.name)
def __rfloordiv__(self: Self, other: Any) -> PandasLikeSeries:
_, other_native = align_and_extract_native(self, other)
return self._with_native(self.native.__rfloordiv__(other_native)).alias(self.name)
def __pow__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser**other).alias(self.name)
def __rpow__(self: Self, other: Any) -> PandasLikeSeries:
_, other_native = align_and_extract_native(self, other)
return self._with_native(self.native.__rpow__(other_native)).alias(self.name)
def __mod__(self: Self, other: Any) -> PandasLikeSeries:
ser, other = align_and_extract_native(self, other)
return self._with_native(ser % other).alias(self.name)
def __rmod__(self: Self, other: Any) -> PandasLikeSeries:
_, other_native = align_and_extract_native(self, other)
return self._with_native(self.native.__rmod__(other_native)).alias(self.name)
# Unary
def __invert__(self: PandasLikeSeries) -> PandasLikeSeries:
return self._with_native(~self.native)
# Reductions
def any(self: Self) -> bool:
return self.native.any()
def all(self: Self) -> bool:
return self.native.all()
def min(self: Self) -> Any:
return self.native.min()
def max(self: Self) -> Any:
return self.native.max()
def sum(self: Self) -> float:
return self.native.sum()
def count(self: Self) -> int:
return self.native.count()
def mean(self: Self) -> float:
return self.native.mean()
def median(self: Self) -> float:
if not self.dtype.is_numeric():
msg = "`median` operation not supported for non-numeric input type."
raise InvalidOperationError(msg)
return self.native.median()
def std(self: Self, *, ddof: int) -> float:
return self.native.std(ddof=ddof)
def var(self: Self, *, ddof: int) -> float:
return self.native.var(ddof=ddof)
def skew(self: Self) -> float | None:
ser_not_null = self.native.dropna()
if len(ser_not_null) == 0:
return None
elif len(ser_not_null) == 1:
return float("nan")
elif len(ser_not_null) == 2:
return 0.0
else:
m = ser_not_null - ser_not_null.mean()
m2 = (m**2).mean()
m3 = (m**3).mean()
return m3 / (m2**1.5) if m2 != 0 else float("nan")
def len(self: Self) -> int:
return len(self.native)
# Transformations
def is_null(self: Self) -> PandasLikeSeries:
return self._with_native(self.native.isna(), preserve_broadcast=True)
def is_nan(self: Self) -> PandasLikeSeries:
ser = self.native
if self.dtype.is_numeric():
return self._with_native(ser != ser, preserve_broadcast=True) # noqa: PLR0124
msg = f"`.is_nan` only supported for numeric dtype and not {self.dtype}, did you mean `.is_null`?"
raise InvalidOperationError(msg)
def fill_null(
self: Self,
value: Any | None,
strategy: Literal["forward", "backward"] | None,
limit: int | None,
) -> Self:
ser = self.native
if value is not None:
_, value = align_and_extract_native(self, value)
res_ser = self._with_native(ser.fillna(value=value), preserve_broadcast=True)
else:
res_ser = self._with_native(
ser.ffill(limit=limit)
if strategy == "forward"
else ser.bfill(limit=limit),
preserve_broadcast=True,
)
return res_ser
def drop_nulls(self: Self) -> PandasLikeSeries:
return self._with_native(self.native.dropna())
def n_unique(self: Self) -> int:
return self.native.nunique(dropna=False)
def sample(
self: Self,
n: int | None,
*,
fraction: float | None,
with_replacement: bool,
seed: int | None,
) -> Self:
return self._with_native(
self.native.sample(
n=n, frac=fraction, replace=with_replacement, random_state=seed
)
)
def abs(self: Self) -> PandasLikeSeries:
return self._with_native(self.native.abs())
def cum_sum(self: Self, *, reverse: bool) -> Self:
result = (
self.native.cumsum(skipna=True)
if not reverse
else self.native[::-1].cumsum(skipna=True)[::-1]
)
return self._with_native(result)
def unique(self: Self, *, maintain_order: bool) -> PandasLikeSeries:
# pandas always maintains order, as per its docstring:
# "Uniques are returned in order of appearance" # noqa: ERA001
return self._with_native(
self.native.__class__(self.native.unique(), name=self.name)
)
def diff(self: Self) -> PandasLikeSeries:
return self._with_native(self.native.diff())
def shift(self: Self, n: int) -> PandasLikeSeries:
return self._with_native(self.native.shift(n))
def replace_strict(
self: Self,
old: Sequence[Any] | Mapping[Any, Any],
new: Sequence[Any],
*,
return_dtype: DType | type[DType] | None,
) -> PandasLikeSeries:
tmp_name = f"{self.name}_tmp"
dtype_backend = get_dtype_backend(self.native.dtype, self._implementation)
dtype = (
narwhals_to_native_dtype(
return_dtype,
dtype_backend,
self._implementation,
self._backend_version,
self._version,
)
if return_dtype
else None
)
namespace = self.__native_namespace__()
other = namespace.DataFrame(
{self.name: old, tmp_name: namespace.Series(new, dtype=dtype)}
)
result = self._with_native(
self.native.to_frame().merge(other, on=self.name, how="left")[tmp_name]
).alias(self.name)
if result.is_null().sum() != self.is_null().sum():
msg = (
"replace_strict did not replace all non-null values.\n\n"
f"The following did not get replaced: {self.filter(~self.is_null() & result.is_null()).unique(maintain_order=False).to_list()}"
)
raise ValueError(msg)
return result
def sort(self: Self, *, descending: bool, nulls_last: bool) -> PandasLikeSeries:
na_position = "last" if nulls_last else "first"
return self._with_native(
self.native.sort_values(ascending=not descending, na_position=na_position)
).alias(self.name)
def alias(self: Self, name: str | Hashable) -> Self:
if name != self.name:
return self._with_native(
rename(
self.native,
name,
implementation=self._implementation,
backend_version=self._backend_version,
),
preserve_broadcast=True,
)
return self
def __array__(self: Self, dtype: Any, *, copy: bool | None) -> _1DArray:
# pandas used to always return object dtype for nullable dtypes.
# So, we intercept __array__ and pass to `to_numpy` ourselves to make
# sure an appropriate numpy dtype is returned.
return self.to_numpy(dtype=dtype, copy=copy)
def to_numpy(self: Self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
# the default is meant to be None, but pandas doesn't allow it?
# https://numpy.org/doc/stable/reference/generated/numpy.ndarray.__array__.html
copy = copy or self._implementation is Implementation.CUDF
dtypes = import_dtypes_module(self._version)
if isinstance(self.dtype, dtypes.Datetime) and self.dtype.time_zone is not None:
s = self.dt.convert_time_zone("UTC").dt.replace_time_zone(None).native
else:
s = self.native
has_missing = s.isna().any()
if has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING:
if self._implementation is Implementation.PANDAS and self._backend_version < (
1,
): # pragma: no cover
kwargs = {}
else:
kwargs = {"na_value": float("nan")}
return s.to_numpy(
dtype=dtype or PANDAS_TO_NUMPY_DTYPE_MISSING[str(s.dtype)],
copy=copy,
**kwargs,
)
if not has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_NO_MISSING:
return s.to_numpy(
dtype=dtype or PANDAS_TO_NUMPY_DTYPE_NO_MISSING[str(s.dtype)], copy=copy
)
return s.to_numpy(dtype=dtype, copy=copy)
def to_pandas(self: Self) -> pd.Series[Any]:
if self._implementation is Implementation.PANDAS:
return self.native
elif self._implementation is Implementation.CUDF: # pragma: no cover
return self.native.to_pandas()
elif self._implementation is Implementation.MODIN:
return self.native._to_pandas()
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)
def to_polars(self: Self) -> pl.Series:
import polars as pl # ignore-banned-import
return pl.from_pandas(self.to_pandas())
# --- descriptive ---
def is_unique(self: Self) -> Self:
return self._with_native(~self.native.duplicated(keep=False)).alias(self.name)
def null_count(self: Self) -> int:
return self.native.isna().sum()
def is_first_distinct(self: Self) -> Self:
return self._with_native(~self.native.duplicated(keep="first")).alias(self.name)
def is_last_distinct(self: Self) -> Self:
return self._with_native(~self.native.duplicated(keep="last")).alias(self.name)
def is_sorted(self: Self, *, descending: bool) -> bool:
if not isinstance(descending, bool):
msg = f"argument 'descending' should be boolean, found {type(descending)}"
raise TypeError(msg)
if descending:
return self.native.is_monotonic_decreasing
else:
return self.native.is_monotonic_increasing
def value_counts(
self: Self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
) -> PandasLikeDataFrame:
"""Parallel is unused, exists for compatibility."""
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
index_name_ = "index" if self._name is None else self._name
value_name_ = name or ("proportion" if normalize else "count")
val_count = self.native.value_counts(
dropna=False, sort=False, normalize=normalize
).reset_index()
val_count.columns = [index_name_, value_name_]
if sort:
val_count = val_count.sort_values(value_name_, ascending=False)
return PandasLikeDataFrame(
val_count,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)
def quantile(
self: Self,
quantile: float,
interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"],
) -> float:
return self.native.quantile(q=quantile, interpolation=interpolation)
def zip_with(self: Self, mask: Any, other: Any) -> PandasLikeSeries:
ser = self.native
_, mask = align_and_extract_native(self, mask)
_, other = align_and_extract_native(self, other)
res = ser.where(mask, other)
return self._with_native(res)
def head(self: Self, n: int) -> Self:
return self._with_native(self.native.head(n))
def tail(self: Self, n: int) -> Self:
return self._with_native(self.native.tail(n))
def round(self: Self, decimals: int) -> Self:
return self._with_native(self.native.round(decimals=decimals))
def to_dummies(
self: Self, *, separator: str, drop_first: bool
) -> PandasLikeDataFrame:
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
plx = self.__native_namespace__()
series = self.native
name = str(self._name) if self._name else ""
null_col_pl = f"{name}{separator}null"
has_nulls = series.isna().any()
result = plx.get_dummies(
series,
prefix=name,
prefix_sep=separator,
drop_first=drop_first,
# Adds a null column at the end, depending on whether or not there are any.
dummy_na=has_nulls,
dtype="int8",
)
if has_nulls:
*cols, null_col_pd = list(result.columns)
output_order = [null_col_pd, *cols]
result = rename(
select_columns_by_name(
result, output_order, self._backend_version, self._implementation
),
columns={null_col_pd: null_col_pl},
implementation=self._implementation,
backend_version=self._backend_version,
)
return PandasLikeDataFrame(
result,
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)
def gather_every(self: Self, n: int, offset: int) -> Self:
return self._with_native(self.native.iloc[offset::n])
def clip(
self: Self, lower_bound: Self | Any | None, upper_bound: Self | Any | None
) -> Self:
_, lower_bound = (
align_and_extract_native(self, lower_bound) if lower_bound else (None, None)
)
_, upper_bound = (
align_and_extract_native(self, upper_bound) if upper_bound else (None, None)
)
kwargs = {"axis": 0} if self._implementation is Implementation.MODIN else {}
return self._with_native(self.native.clip(lower_bound, upper_bound, **kwargs))
def to_arrow(self: Self) -> ArrowArray:
if self._implementation is Implementation.CUDF:
return self.native.to_arrow()
import pyarrow as pa # ignore-banned-import()
return pa.Array.from_pandas(self.native)
def mode(self: Self) -> Self:
result = self.native.mode()
result.name = self.name
return self._with_native(result)
def cum_count(self: Self, *, reverse: bool) -> Self:
not_na_series = ~self.native.isna()
result = (
not_na_series.cumsum()
if not reverse
else len(self) - not_na_series.cumsum() + not_na_series - 1
)
return self._with_native(result)
def cum_min(self: Self, *, reverse: bool) -> Self:
result = (
self.native.cummin(skipna=True)
if not reverse
else self.native[::-1].cummin(skipna=True)[::-1]
)
return self._with_native(result)
def cum_max(self: Self, *, reverse: bool) -> Self:
result = (
self.native.cummax(skipna=True)
if not reverse
else self.native[::-1].cummax(skipna=True)[::-1]
)
return self._with_native(result)
def cum_prod(self: Self, *, reverse: bool) -> Self:
result = (
self.native.cumprod(skipna=True)
if not reverse
else self.native[::-1].cumprod(skipna=True)[::-1]
)
return self._with_native(result)
def rolling_sum(
self: Self, window_size: int, *, min_samples: int, center: bool
) -> Self:
result = self.native.rolling(
window=window_size, min_periods=min_samples, center=center
).sum()
return self._with_native(result)
def rolling_mean(
self: Self, window_size: int, *, min_samples: int, center: bool
) -> Self:
result = self.native.rolling(
window=window_size, min_periods=min_samples, center=center
).mean()
return self._with_native(result)
def rolling_var(
self: Self, window_size: int, *, min_samples: int, center: bool, ddof: int
) -> Self:
result = self.native.rolling(
window=window_size, min_periods=min_samples, center=center
).var(ddof=ddof)
return self._with_native(result)
def rolling_std(
self: Self, window_size: int, *, min_samples: int, center: bool, ddof: int
) -> Self:
result = self.native.rolling(
window=window_size, min_periods=min_samples, center=center
).std(ddof=ddof)
return self._with_native(result)
def __iter__(self: Self) -> Iterator[Any]:
yield from self.native.__iter__()
def __contains__(self: Self, other: Any) -> bool:
return self.native.isna().any() if other is None else (self.native == other).any()
def is_finite(self: Self) -> Self:
s = self.native
return self._with_native((s > float("-inf")) & (s < float("inf")))
def rank(
self: Self,
method: Literal["average", "min", "max", "dense", "ordinal"],
*,
descending: bool,
) -> Self:
pd_method = "first" if method == "ordinal" else method
name = self.name
if (
self._implementation is Implementation.PANDAS
and self._backend_version < (3,)
and self.dtype.is_integer()
and (null_mask := self.native.isna()).any()
):
# crazy workaround for the case of `na_option="keep"` and nullable
# integer dtypes. This should be supported in pandas > 3.0
# https://github.com/pandas-dev/pandas/issues/56976
ranked_series = (
self.native.to_frame()
.assign(**{f"{name}_is_null": null_mask})
.groupby(f"{name}_is_null")
.rank(
method=pd_method,
na_option="keep",
ascending=not descending,
pct=False,
)[name]
)
else:
ranked_series = self.native.rank(
method=pd_method, na_option="keep", ascending=not descending, pct=False
)
return self._with_native(ranked_series)
def hist(
self: Self,
bins: list[float | int] | None,
*,
bin_count: int | None,
include_breakpoint: bool,
) -> PandasLikeDataFrame:
from numpy import linspace
from numpy import zeros
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
ns = self.__native_namespace__()
data: dict[str, Sequence[int | float | str] | _AnyDArray]
if bin_count == 0 or (bins is not None and len(bins) <= 1):
data = {}
if include_breakpoint:
data["breakpoint"] = []
data["count"] = []
return PandasLikeDataFrame(
ns.DataFrame(data),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)
elif self.native.count() < 1:
if bins is not None:
data = {"breakpoint": bins[1:], "count": zeros(shape=len(bins) - 1)}
else:
count = cast("int", bin_count)
data = {"breakpoint": linspace(0, 1, count), "count": zeros(shape=count)}
if not include_breakpoint:
del data["breakpoint"]
return PandasLikeDataFrame(
ns.DataFrame(data),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)
elif bin_count is not None: # use Polars binning behavior
lower, upper = self.native.min(), self.native.max()
pad_lowest_bin = False
if lower == upper:
lower -= 0.5
upper += 0.5
else:
pad_lowest_bin = True
bins = linspace(lower, upper, bin_count + 1)
if pad_lowest_bin and bins is not None:
bins[0] -= 0.001 * abs(bins[0]) if bins[0] != 0 else 0.001
bin_count = None
# pandas (2.2.*) .value_counts(bins=int) adjusts the lowest bin twice, result in improper counts.
# pandas (2.2.*) .value_counts(bins=[...]) adjusts the lowest bin which should not happen since
# the bins were explicitly passed in.
categories = ns.cut(self.native, bins=bins if bin_count is None else bin_count)
# modin (0.32.0) .value_counts(...) silently drops bins with empty observations, .reindex
# is necessary to restore these bins.
result = categories.value_counts(dropna=True, sort=False).reindex(
categories.cat.categories, fill_value=0
)
data = {}
if include_breakpoint:
data["breakpoint"] = bins[1:] if bins is not None else result.index.right
data["count"] = result.reset_index(drop=True)
return PandasLikeDataFrame(
ns.DataFrame(data),
implementation=self._implementation,
backend_version=self._backend_version,
version=self._version,
validate_column_names=True,
)
@property
def str(self: Self) -> PandasLikeSeriesStringNamespace:
return PandasLikeSeriesStringNamespace(self)
@property
def dt(self: Self) -> PandasLikeSeriesDateTimeNamespace:
return PandasLikeSeriesDateTimeNamespace(self)
@property
def cat(self: Self) -> PandasLikeSeriesCatNamespace:
return PandasLikeSeriesCatNamespace(self)
@property
def list(self: Self) -> PandasLikeSeriesListNamespace:
if not hasattr(self.native, "list"):
msg = "Series must be of PyArrow List type to support list namespace."
raise TypeError(msg)
return PandasLikeSeriesListNamespace(self)
@property
def struct(self: Self) -> PandasLikeSeriesStructNamespace:
if not hasattr(self.native, "struct"):
msg = "Series must be of PyArrow Struct type to support struct namespace."
raise TypeError(msg)
return PandasLikeSeriesStructNamespace(self)