Files
Buffteks-Website/streamlit-venv/lib/python3.10/site-packages/narwhals/_pandas_like/expr.py
2025-01-10 21:40:35 +00:00

749 lines
25 KiB
Python
Executable File

from __future__ import annotations
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Literal
from narwhals._expression_parsing import reuse_series_implementation
from narwhals._expression_parsing import reuse_series_namespace_implementation
from narwhals._pandas_like.series import PandasLikeSeries
if TYPE_CHECKING:
from typing_extensions import Self
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.namespace import PandasLikeNamespace
from narwhals.typing import DTypes
from narwhals.utils import Implementation
class PandasLikeExpr:
def __init__(
self,
call: Callable[[PandasLikeDataFrame], list[PandasLikeSeries]],
*,
depth: int,
function_name: str,
root_names: list[str] | None,
output_names: list[str] | None,
implementation: Implementation,
backend_version: tuple[int, ...],
dtypes: DTypes,
) -> None:
self._call = call
self._depth = depth
self._function_name = function_name
self._root_names = root_names
self._depth = depth
self._output_names = output_names
self._implementation = implementation
self._backend_version = backend_version
self._dtypes = dtypes
def __repr__(self) -> str: # pragma: no cover
return (
f"PandasLikeExpr("
f"depth={self._depth}, "
f"function_name={self._function_name}, "
f"root_names={self._root_names}, "
f"output_names={self._output_names}"
)
def __narwhals_namespace__(self) -> PandasLikeNamespace:
from narwhals._pandas_like.namespace import PandasLikeNamespace
return PandasLikeNamespace(
self._implementation, self._backend_version, dtypes=self._dtypes
)
def __narwhals_expr__(self) -> None: ...
@classmethod
def from_column_names(
cls: type[Self],
*column_names: str,
implementation: Implementation,
backend_version: tuple[int, ...],
dtypes: DTypes,
) -> Self:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [
PandasLikeSeries(
df._native_frame[column_name],
implementation=df._implementation,
backend_version=df._backend_version,
dtypes=df._dtypes,
)
for column_name in column_names
]
return cls(
func,
depth=0,
function_name="col",
root_names=list(column_names),
output_names=list(column_names),
implementation=implementation,
backend_version=backend_version,
dtypes=dtypes,
)
@classmethod
def from_column_indices(
cls: type[Self],
*column_indices: int,
implementation: Implementation,
backend_version: tuple[int, ...],
dtypes: DTypes,
) -> Self:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [
PandasLikeSeries(
df._native_frame.iloc[:, column_index],
implementation=df._implementation,
backend_version=df._backend_version,
dtypes=df._dtypes,
)
for column_index in column_indices
]
return cls(
func,
depth=0,
function_name="nth",
root_names=None,
output_names=None,
implementation=implementation,
backend_version=backend_version,
dtypes=dtypes,
)
def cast(
self,
dtype: Any,
) -> Self:
return reuse_series_implementation(self, "cast", dtype=dtype)
def __eq__(self, other: PandasLikeExpr | Any) -> Self: # type: ignore[override]
return reuse_series_implementation(self, "__eq__", other=other)
def __ne__(self, other: PandasLikeExpr | Any) -> Self: # type: ignore[override]
return reuse_series_implementation(self, "__ne__", other=other)
def __ge__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__ge__", other=other)
def __gt__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__gt__", other=other)
def __le__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__le__", other=other)
def __lt__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__lt__", other=other)
def __and__(self, other: PandasLikeExpr | bool | Any) -> Self:
return reuse_series_implementation(self, "__and__", other=other)
def __rand__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__rand__", other=other)
def __or__(self, other: PandasLikeExpr | bool | Any) -> Self:
return reuse_series_implementation(self, "__or__", other=other)
def __ror__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__ror__", other=other)
def __add__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__add__", other=other)
def __radd__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__radd__", other=other)
def __sub__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__sub__", other=other)
def __rsub__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__rsub__", other=other)
def __mul__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__mul__", other=other)
def __rmul__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__rmul__", other=other)
def __truediv__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__truediv__", other=other)
def __rtruediv__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__rtruediv__", other=other)
def __floordiv__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__floordiv__", other=other)
def __rfloordiv__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__rfloordiv__", other=other)
def __pow__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__pow__", other=other)
def __rpow__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__rpow__", other=other)
def __mod__(self, other: PandasLikeExpr | Any) -> Self:
return reuse_series_implementation(self, "__mod__", other=other)
def __rmod__(self, other: Any) -> Self:
return reuse_series_implementation(self, "__rmod__", other=other)
# Unary
def __invert__(self) -> Self:
return reuse_series_implementation(self, "__invert__")
# Reductions
def null_count(self) -> Self:
return reuse_series_implementation(self, "null_count", returns_scalar=True)
def n_unique(self) -> Self:
return reuse_series_implementation(self, "n_unique", returns_scalar=True)
def sum(self) -> Self:
return reuse_series_implementation(self, "sum", returns_scalar=True)
def count(self) -> Self:
return reuse_series_implementation(self, "count", returns_scalar=True)
def mean(self) -> Self:
return reuse_series_implementation(self, "mean", returns_scalar=True)
def std(self, *, ddof: int = 1) -> Self:
return reuse_series_implementation(self, "std", ddof=ddof, returns_scalar=True)
def any(self) -> Self:
return reuse_series_implementation(self, "any", returns_scalar=True)
def all(self) -> Self:
return reuse_series_implementation(self, "all", returns_scalar=True)
def max(self) -> Self:
return reuse_series_implementation(self, "max", returns_scalar=True)
def min(self) -> Self:
return reuse_series_implementation(self, "min", returns_scalar=True)
# Other
def clip(self, lower_bound: Any, upper_bound: Any) -> Self:
return reuse_series_implementation(
self, "clip", lower_bound=lower_bound, upper_bound=upper_bound
)
def is_between(
self, lower_bound: Any, upper_bound: Any, closed: str = "both"
) -> Self:
return reuse_series_implementation(
self,
"is_between",
lower_bound=lower_bound,
upper_bound=upper_bound,
closed=closed,
)
def is_null(self) -> Self:
return reuse_series_implementation(self, "is_null")
def fill_null(self, value: Any) -> Self:
return reuse_series_implementation(self, "fill_null", value=value)
def is_in(self, other: Any) -> Self:
return reuse_series_implementation(self, "is_in", other=other)
def arg_true(self) -> Self:
return reuse_series_implementation(self, "arg_true")
def filter(self, *predicates: Any) -> Self:
plx = self.__narwhals_namespace__()
other = plx.all_horizontal(*predicates)
return reuse_series_implementation(self, "filter", other=other)
def drop_nulls(self) -> Self:
return reuse_series_implementation(self, "drop_nulls")
def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
return reuse_series_implementation(
self, "sort", descending=descending, nulls_last=nulls_last
)
def abs(self) -> Self:
return reuse_series_implementation(self, "abs")
def cum_sum(self) -> Self:
return reuse_series_implementation(self, "cum_sum")
def unique(self) -> Self:
return reuse_series_implementation(self, "unique")
def diff(self) -> Self:
return reuse_series_implementation(self, "diff")
def shift(self, n: int) -> Self:
return reuse_series_implementation(self, "shift", n=n)
def sample(
self: Self,
n: int | None = None,
*,
fraction: float | None = None,
with_replacement: bool = False,
seed: int | None = None,
) -> Self:
return reuse_series_implementation(
self,
"sample",
n=n,
fraction=fraction,
with_replacement=with_replacement,
seed=seed,
)
def alias(self, name: str) -> Self:
# Define this one manually, so that we can
# override `output_names` and not increase depth
return self.__class__(
lambda df: [series.alias(name) for series in self._call(df)],
depth=self._depth,
function_name=self._function_name,
root_names=self._root_names,
output_names=[name],
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
)
def over(self, keys: list[str]) -> Self:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
if self._output_names is None:
msg = (
"Anonymous expressions are not supported in over.\n"
"Instead of `nw.all()`, try using a named expression, such as "
"`nw.col('a', 'b')`\n"
)
raise ValueError(msg)
tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
tmp = df.select(*keys).join(
tmp, how="left", left_on=keys, right_on=keys, suffix="_right"
)
return [tmp[name] for name in self._output_names]
return self.__class__(
func,
depth=self._depth + 1,
function_name=self._function_name + "->over",
root_names=self._root_names,
output_names=self._output_names,
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
)
def is_duplicated(self) -> Self:
return reuse_series_implementation(self, "is_duplicated")
def is_unique(self) -> Self:
return reuse_series_implementation(self, "is_unique")
def is_first_distinct(self) -> Self:
return reuse_series_implementation(self, "is_first_distinct")
def is_last_distinct(self) -> Self:
return reuse_series_implementation(self, "is_last_distinct")
def quantile(
self,
quantile: float,
interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"],
) -> Self:
return reuse_series_implementation(
self, "quantile", quantile, interpolation, returns_scalar=True
)
def head(self, n: int) -> Self:
return reuse_series_implementation(self, "head", n)
def tail(self, n: int) -> Self:
return reuse_series_implementation(self, "tail", n)
def round(self: Self, decimals: int) -> Self:
return reuse_series_implementation(self, "round", decimals)
def len(self: Self) -> Self:
return reuse_series_implementation(self, "len", returns_scalar=True)
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return reuse_series_implementation(self, "gather_every", n=n, offset=offset)
def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")
@property
def str(self: Self) -> PandasLikeExprStringNamespace:
return PandasLikeExprStringNamespace(self)
@property
def dt(self: Self) -> PandasLikeExprDateTimeNamespace:
return PandasLikeExprDateTimeNamespace(self)
@property
def cat(self: Self) -> PandasLikeExprCatNamespace:
return PandasLikeExprCatNamespace(self)
@property
def name(self: Self) -> PandasLikeExprNameNamespace:
return PandasLikeExprNameNamespace(self)
class PandasLikeExprCatNamespace:
def __init__(self, expr: PandasLikeExpr) -> None:
self._expr = expr
def get_categories(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr,
"cat",
"get_categories",
)
class PandasLikeExprStringNamespace:
def __init__(self, expr: PandasLikeExpr) -> None:
self._expr = expr
def len_chars(
self,
) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "str", "len_chars")
def replace(
self,
pattern: str,
value: str,
*,
literal: bool = False,
n: int = 1,
) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "str", "replace", pattern, value, literal=literal, n=n
)
def replace_all(
self,
pattern: str,
value: str,
*,
literal: bool = False,
) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "str", "replace_all", pattern, value, literal=literal
)
def strip_chars(self, characters: str | None = None) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr,
"str",
"strip_chars",
characters,
)
def starts_with(self, prefix: str) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr,
"str",
"starts_with",
prefix,
)
def ends_with(self, suffix: str) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr,
"str",
"ends_with",
suffix,
)
def contains(self, pattern: str, *, literal: bool) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr,
"str",
"contains",
pattern,
literal=literal,
)
def slice(self, offset: int, length: int | None = None) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "str", "slice", offset, length
)
def to_datetime(self: Self, format: str | None) -> PandasLikeExpr: # noqa: A002
return reuse_series_namespace_implementation(
self._expr,
"str",
"to_datetime",
format,
)
def to_uppercase(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr,
"str",
"to_uppercase",
)
def to_lowercase(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr,
"str",
"to_lowercase",
)
class PandasLikeExprDateTimeNamespace:
def __init__(self, expr: PandasLikeExpr) -> None:
self._expr = expr
def date(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "date")
def year(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "year")
def month(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "month")
def day(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "day")
def hour(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "hour")
def minute(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "minute")
def second(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "second")
def millisecond(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "millisecond")
def microsecond(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "microsecond")
def nanosecond(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "nanosecond")
def ordinal_day(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "ordinal_day")
def total_minutes(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "total_minutes")
def total_seconds(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "dt", "total_seconds")
def total_milliseconds(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "total_milliseconds"
)
def total_microseconds(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "total_microseconds"
)
def total_nanoseconds(self) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "total_nanoseconds"
)
def to_string(self, format: str) -> PandasLikeExpr: # noqa: A002
return reuse_series_namespace_implementation(
self._expr, "dt", "to_string", format
)
def replace_time_zone(self, time_zone: str | None) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "replace_time_zone", time_zone
)
def convert_time_zone(self, time_zone: str) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "convert_time_zone", time_zone
)
def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._expr, "dt", "timestamp", time_unit
)
class PandasLikeExprNameNamespace:
def __init__(self: Self, expr: PandasLikeExpr) -> None:
self._expr = expr
def keep(self: Self) -> PandasLikeExpr:
root_names = self._expr._root_names
if root_names is None:
msg = (
"Anonymous expressions are not supported in `.name.keep`.\n"
"Instead of `nw.all()`, try using a named expression, such as "
"`nw.col('a', 'b')`\n"
)
raise ValueError(msg)
return self._expr.__class__(
lambda df: [
series.alias(name)
for series, name in zip(self._expr._call(df), root_names)
],
depth=self._expr._depth,
function_name=self._expr._function_name,
root_names=root_names,
output_names=root_names,
implementation=self._expr._implementation,
backend_version=self._expr._backend_version,
dtypes=self._expr._dtypes,
)
def map(self: Self, function: Callable[[str], str]) -> PandasLikeExpr:
root_names = self._expr._root_names
if root_names is None:
msg = (
"Anonymous expressions are not supported in `.name.map`.\n"
"Instead of `nw.all()`, try using a named expression, such as "
"`nw.col('a', 'b')`\n"
)
raise ValueError(msg)
output_names = [function(str(name)) for name in root_names]
return self._expr.__class__(
lambda df: [
series.alias(name)
for series, name in zip(self._expr._call(df), output_names)
],
depth=self._expr._depth,
function_name=self._expr._function_name,
root_names=root_names,
output_names=output_names,
implementation=self._expr._implementation,
backend_version=self._expr._backend_version,
dtypes=self._expr._dtypes,
)
def prefix(self: Self, prefix: str) -> PandasLikeExpr:
root_names = self._expr._root_names
if root_names is None:
msg = (
"Anonymous expressions are not supported in `.name.prefix`.\n"
"Instead of `nw.all()`, try using a named expression, such as "
"`nw.col('a', 'b')`\n"
)
raise ValueError(msg)
output_names = [prefix + str(name) for name in root_names]
return self._expr.__class__(
lambda df: [
series.alias(name)
for series, name in zip(self._expr._call(df), output_names)
],
depth=self._expr._depth,
function_name=self._expr._function_name,
root_names=root_names,
output_names=output_names,
implementation=self._expr._implementation,
backend_version=self._expr._backend_version,
dtypes=self._expr._dtypes,
)
def suffix(self: Self, suffix: str) -> PandasLikeExpr:
root_names = self._expr._root_names
if root_names is None:
msg = (
"Anonymous expressions are not supported in `.name.suffix`.\n"
"Instead of `nw.all()`, try using a named expression, such as "
"`nw.col('a', 'b')`\n"
)
raise ValueError(msg)
output_names = [str(name) + suffix for name in root_names]
return self._expr.__class__(
lambda df: [
series.alias(name)
for series, name in zip(self._expr._call(df), output_names)
],
depth=self._expr._depth,
function_name=self._expr._function_name,
root_names=root_names,
output_names=output_names,
implementation=self._expr._implementation,
backend_version=self._expr._backend_version,
dtypes=self._expr._dtypes,
)
def to_lowercase(self: Self) -> PandasLikeExpr:
root_names = self._expr._root_names
if root_names is None:
msg = (
"Anonymous expressions are not supported in `.name.to_lowercase`.\n"
"Instead of `nw.all()`, try using a named expression, such as "
"`nw.col('a', 'b')`\n"
)
raise ValueError(msg)
output_names = [str(name).lower() for name in root_names]
return self._expr.__class__(
lambda df: [
series.alias(name)
for series, name in zip(self._expr._call(df), output_names)
],
depth=self._expr._depth,
function_name=self._expr._function_name,
root_names=root_names,
output_names=output_names,
implementation=self._expr._implementation,
backend_version=self._expr._backend_version,
dtypes=self._expr._dtypes,
)
def to_uppercase(self: Self) -> PandasLikeExpr:
root_names = self._expr._root_names
if root_names is None:
msg = (
"Anonymous expressions are not supported in `.name.to_uppercase`.\n"
"Instead of `nw.all()`, try using a named expression, such as "
"`nw.col('a', 'b')`\n"
)
raise ValueError(msg)
output_names = [str(name).upper() for name in root_names]
return self._expr.__class__(
lambda df: [
series.alias(name)
for series, name in zip(self._expr._call(df), output_names)
],
depth=self._expr._depth,
function_name=self._expr._function_name,
root_names=root_names,
output_names=output_names,
implementation=self._expr._implementation,
backend_version=self._expr._backend_version,
dtypes=self._expr._dtypes,
)