from __future__ import annotations from functools import reduce from typing import TYPE_CHECKING from typing import Any from typing import Callable from typing import Iterable from typing import Literal from typing import cast from narwhals._expression_parsing import combine_root_names from narwhals._expression_parsing import parse_into_exprs from narwhals._expression_parsing import reduce_output_names from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.selectors import PandasSelectorNamespace from narwhals._pandas_like.series import PandasLikeSeries from narwhals._pandas_like.utils import create_native_series from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import vertical_concat if TYPE_CHECKING: from narwhals._pandas_like.typing import IntoPandasLikeExpr from narwhals.dtypes import DType from narwhals.typing import DTypes from narwhals.utils import Implementation class PandasLikeNamespace: @property def selectors(self) -> PandasSelectorNamespace: return PandasSelectorNamespace( implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) # --- not in spec --- def __init__( self, implementation: Implementation, backend_version: tuple[int, ...], dtypes: DTypes, ) -> None: self._implementation = implementation self._backend_version = backend_version self._dtypes = dtypes def _create_expr_from_callable( self, func: Callable[[PandasLikeDataFrame], list[PandasLikeSeries]], *, depth: int, function_name: str, root_names: list[str] | None, output_names: list[str] | None, ) -> PandasLikeExpr: return PandasLikeExpr( func, depth=depth, function_name=function_name, root_names=root_names, output_names=output_names, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) def _create_series_from_scalar( self, value: Any, series: PandasLikeSeries ) -> PandasLikeSeries: return PandasLikeSeries._from_iterable( [value], name=series._native_series.name, index=series._native_series.index[0:1], implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) def _create_expr_from_series(self, series: PandasLikeSeries) -> PandasLikeExpr: return PandasLikeExpr( lambda _df: [series], depth=0, function_name="series", root_names=None, output_names=None, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) def _create_compliant_series(self, value: Any) -> PandasLikeSeries: return create_native_series( value, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) # --- selection --- def col(self, *column_names: str) -> PandasLikeExpr: return PandasLikeExpr.from_column_names( *column_names, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) def nth(self, *column_indices: int) -> PandasLikeExpr: return PandasLikeExpr.from_column_indices( *column_indices, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) def all(self) -> PandasLikeExpr: return PandasLikeExpr( lambda df: [ PandasLikeSeries( df._native_frame[column_name], implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) for column_name in df.columns ], depth=0, function_name="all", root_names=None, output_names=None, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) def lit(self, value: Any, dtype: DType | None) -> PandasLikeExpr: def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries: pandas_series = PandasLikeSeries._from_iterable( data=[value], name="lit", index=df._native_frame.index[0:1], implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) if dtype: return pandas_series.cast(dtype) return pandas_series return PandasLikeExpr( lambda df: [_lit_pandas_series(df)], depth=0, function_name="lit", root_names=None, output_names=["lit"], implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) # --- reduction --- def sum(self, *column_names: str) -> PandasLikeExpr: return PandasLikeExpr.from_column_names( *column_names, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ).sum() def mean(self, *column_names: str) -> PandasLikeExpr: return PandasLikeExpr.from_column_names( *column_names, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ).mean() def max(self, *column_names: str) -> PandasLikeExpr: return PandasLikeExpr.from_column_names( *column_names, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ).max() def min(self, *column_names: str) -> PandasLikeExpr: return PandasLikeExpr.from_column_names( *column_names, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ).min() def len(self) -> PandasLikeExpr: return PandasLikeExpr( lambda df: [ PandasLikeSeries._from_iterable( [len(df._native_frame)], name="len", index=[0], implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) ], depth=0, function_name="len", root_names=None, output_names=["len"], implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) # --- horizontal --- def sum_horizontal(self, *exprs: IntoPandasLikeExpr) -> PandasLikeExpr: parsed_exprs = parse_into_exprs(*exprs, namespace=self) def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: series = (s.fill_null(0) for _expr in parsed_exprs for s in _expr._call(df)) return [reduce(lambda x, y: x + y, series)] return self._create_expr_from_callable( func=func, depth=max(x._depth for x in parsed_exprs) + 1, function_name="sum_horizontal", root_names=combine_root_names(parsed_exprs), output_names=reduce_output_names(parsed_exprs), ) def all_horizontal(self, *exprs: IntoPandasLikeExpr) -> PandasLikeExpr: parsed_exprs = parse_into_exprs(*exprs, namespace=self) def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: series = (s for _expr in parsed_exprs for s in _expr._call(df)) return [reduce(lambda x, y: x & y, series)] return self._create_expr_from_callable( func=func, depth=max(x._depth for x in parsed_exprs) + 1, function_name="all_horizontal", root_names=combine_root_names(parsed_exprs), output_names=reduce_output_names(parsed_exprs), ) def any_horizontal(self, *exprs: IntoPandasLikeExpr) -> PandasLikeExpr: parsed_exprs = parse_into_exprs(*exprs, namespace=self) def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: series = (s for _expr in parsed_exprs for s in _expr._call(df)) return [reduce(lambda x, y: x | y, series)] return self._create_expr_from_callable( func=func, depth=max(x._depth for x in parsed_exprs) + 1, function_name="any_horizontal", root_names=combine_root_names(parsed_exprs), output_names=reduce_output_names(parsed_exprs), ) def mean_horizontal(self, *exprs: IntoPandasLikeExpr) -> PandasLikeExpr: parsed_exprs = parse_into_exprs(*exprs, namespace=self) def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: series = (s.fill_null(0) for _expr in parsed_exprs for s in _expr._call(df)) non_na = (1 - s.is_null() for _expr in parsed_exprs for s in _expr._call(df)) return [ reduce(lambda x, y: x + y, series) / reduce(lambda x, y: x + y, non_na) ] return self._create_expr_from_callable( func=func, depth=max(x._depth for x in parsed_exprs) + 1, function_name="mean_horizontal", root_names=combine_root_names(parsed_exprs), output_names=reduce_output_names(parsed_exprs), ) def min_horizontal(self, *exprs: IntoPandasLikeExpr) -> PandasLikeExpr: parsed_exprs = parse_into_exprs(*exprs, namespace=self) def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: series = [s for _expr in parsed_exprs for s in _expr._call(df)] return [ PandasLikeSeries( native_series=self.concat( (s.to_frame() for s in series), how="horizontal" ) ._native_frame.min(axis=1) .rename(series[0].name, copy=False), implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) ] return self._create_expr_from_callable( func=func, depth=max(x._depth for x in parsed_exprs) + 1, function_name="min_horizontal", root_names=combine_root_names(parsed_exprs), output_names=reduce_output_names(parsed_exprs), ) def max_horizontal(self, *exprs: IntoPandasLikeExpr) -> PandasLikeExpr: parsed_exprs = parse_into_exprs(*exprs, namespace=self) def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: series = [s for _expr in parsed_exprs for s in _expr._call(df)] return [ PandasLikeSeries( native_series=self.concat( (s.to_frame() for s in series), how="horizontal" ) ._native_frame.max(axis=1) .rename(series[0].name, copy=False), implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) ] return self._create_expr_from_callable( func=func, depth=max(x._depth for x in parsed_exprs) + 1, function_name="max_horizontal", root_names=combine_root_names(parsed_exprs), output_names=reduce_output_names(parsed_exprs), ) def concat( self, items: Iterable[PandasLikeDataFrame], *, how: Literal["horizontal", "vertical"], ) -> PandasLikeDataFrame: dfs: list[Any] = [item._native_frame for item in items] if how == "horizontal": return PandasLikeDataFrame( horizontal_concat( dfs, implementation=self._implementation, backend_version=self._backend_version, ), implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) if how == "vertical": return PandasLikeDataFrame( vertical_concat( dfs, implementation=self._implementation, backend_version=self._backend_version, ), implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) raise NotImplementedError def when( self, *predicates: IntoPandasLikeExpr, ) -> PandasWhen: plx = self.__class__( self._implementation, self._backend_version, dtypes=self._dtypes ) if predicates: condition = plx.all_horizontal(*predicates) else: msg = "at least one predicate needs to be provided" raise TypeError(msg) return PandasWhen( condition, self._implementation, self._backend_version, dtypes=self._dtypes ) def concat_str( self, exprs: Iterable[IntoPandasLikeExpr], *more_exprs: IntoPandasLikeExpr, separator: str = "", ignore_nulls: bool = False, ) -> PandasLikeExpr: parsed_exprs: list[PandasLikeExpr] = [ *parse_into_exprs(*exprs, namespace=self), *parse_into_exprs(*more_exprs, namespace=self), ] def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: series = ( s for _expr in parsed_exprs for s in _expr.cast(self._dtypes.String())._call(df) ) null_mask = [s for _expr in parsed_exprs for s in _expr.is_null()._call(df)] if not ignore_nulls: null_mask_result = reduce(lambda x, y: x | y, null_mask) result = reduce(lambda x, y: x + separator + y, series).zip_with( ~null_mask_result, None ) else: init_value, *values = [ s.zip_with(~nm, "") for s, nm in zip(series, null_mask) ] sep_array = init_value.__class__._from_iterable( data=[separator] * len(init_value), name="sep", index=init_value._native_series.index, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) separators = (sep_array.zip_with(~nm, "") for nm in null_mask[:-1]) result = reduce( lambda x, y: x + y, (s + v for s, v in zip(separators, values)), init_value, ) return [result] return self._create_expr_from_callable( func=func, depth=max(x._depth for x in parsed_exprs) + 1, function_name="concat_str", root_names=combine_root_names(parsed_exprs), output_names=reduce_output_names(parsed_exprs), ) class PandasWhen: def __init__( self, condition: PandasLikeExpr, implementation: Implementation, backend_version: tuple[int, ...], then_value: Any = None, otherwise_value: Any = None, *, dtypes: DTypes, ) -> None: self._implementation = implementation self._backend_version = backend_version self._condition = condition self._then_value = then_value self._otherwise_value = otherwise_value self._dtypes = dtypes def __call__(self, df: PandasLikeDataFrame) -> list[PandasLikeSeries]: from narwhals._expression_parsing import parse_into_expr from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._pandas_like.utils import validate_column_comparand plx = PandasLikeNamespace( implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) condition = parse_into_expr(self._condition, namespace=plx)._call(df)[0] # type: ignore[arg-type] try: value_series = parse_into_expr(self._then_value, namespace=plx)._call(df)[0] # type: ignore[arg-type] except TypeError: # `self._otherwise_value` is a scalar and can't be converted to an expression value_series = condition.__class__._from_iterable( # type: ignore[call-arg] [self._then_value] * len(condition), name="literal", index=condition._native_series.index, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) value_series = cast(PandasLikeSeries, value_series) value_series_native = value_series._native_series condition_native = validate_column_comparand(value_series_native.index, condition) if self._otherwise_value is None: return [ value_series._from_native_series( value_series_native.where(condition_native) ) ] try: otherwise_series = parse_into_expr( self._otherwise_value, namespace=plx )._call(df)[0] # type: ignore[arg-type] except TypeError: # `self._otherwise_value` is a scalar and can't be converted to an expression return [ value_series._from_native_series( value_series_native.where(condition_native, self._otherwise_value) ) ] else: return [value_series.zip_with(condition, otherwise_series)] def then(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasThen: self._then_value = value return PandasThen( self, depth=0, function_name="whenthen", root_names=None, output_names=None, implementation=self._implementation, backend_version=self._backend_version, dtypes=self._dtypes, ) class PandasThen(PandasLikeExpr): def __init__( self, call: PandasWhen, *, depth: int, function_name: str, root_names: list[str] | None, output_names: list[str] | None, implementation: Implementation, backend_version: tuple[int, ...], dtypes: DTypes, ) -> None: self._implementation = implementation self._backend_version = backend_version self._dtypes = dtypes self._call = call self._depth = depth self._function_name = function_name self._root_names = root_names self._output_names = output_names def otherwise(self, value: PandasLikeExpr | PandasLikeSeries | Any) -> PandasLikeExpr: # type ignore because we are setting the `_call` attribute to a # callable object of type `PandasWhen`, base class has the attribute as # only a `Callable` self._call._otherwise_value = value # type: ignore[attr-defined] self._function_name = "whenotherwise" return self