Buffteks-Website/streamlit-venv/lib/python3.10/site-packages/narwhals/_arrow/series.py

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Iterator
from typing import Literal
from typing import Sequence
from typing import overload

from narwhals._arrow.utils import cast_for_truediv
from narwhals._arrow.utils import floordiv_compat
from narwhals._arrow.utils import narwhals_to_native_dtype
from narwhals._arrow.utils import native_to_narwhals_dtype
from narwhals._arrow.utils import parse_datetime_format
from narwhals._arrow.utils import validate_column_comparand
from narwhals.utils import Implementation
from narwhals.utils import generate_temporary_column_name

if TYPE_CHECKING:
    from types import ModuleType

    import pyarrow as pa
    from typing_extensions import Self

    from narwhals._arrow.dataframe import ArrowDataFrame
    from narwhals._arrow.namespace import ArrowNamespace
    from narwhals.dtypes import DType
    from narwhals.typing import DTypes


class ArrowSeries:
    def __init__(
        self,
        native_series: pa.ChunkedArray,
        *,
        name: str,
        backend_version: tuple[int, ...],
        dtypes: DTypes,
    ) -> None:
        self._name = name
        self._native_series = native_series
        self._implementation = Implementation.PYARROW
        self._backend_version = backend_version
        self._dtypes = dtypes

    def _from_native_series(self, series: Any) -> Self:
        import pyarrow as pa  # ignore-banned-import()

        if isinstance(series, pa.Array):
            series = pa.chunked_array([series])
        return self.__class__(
            series,
            name=self._name,
            backend_version=self._backend_version,
            dtypes=self._dtypes,
        )

    @classmethod
    def _from_iterable(
        cls: type[Self],
        data: Iterable[Any],
        name: str,
        *,
        backend_version: tuple[int, ...],
        dtypes: DTypes,
    ) -> Self:
        import pyarrow as pa  # ignore-banned-import()

        return cls(
            pa.chunked_array([data]),
            name=name,
            backend_version=backend_version,
            dtypes=dtypes,
        )

    def __narwhals_namespace__(self) -> ArrowNamespace:
        from narwhals._arrow.namespace import ArrowNamespace

        return ArrowNamespace(backend_version=self._backend_version, dtypes=self._dtypes)

    def __len__(self) -> int:
        return len(self._native_series)

    def __eq__(self, other: object) -> Self:  # type: ignore[override]
        import pyarrow.compute as pc

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.equal(ser, other))

    def __ne__(self, other: object) -> Self:  # type: ignore[override]
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.not_equal(ser, other))

    def __ge__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.greater_equal(ser, other))

    def __gt__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.greater(ser, other))

    def __le__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.less_equal(ser, other))

    def __lt__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.less(ser, other))

    def __and__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.and_kleene(ser, other))

    def __rand__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.and_kleene(other, ser))

    def __or__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.or_kleene(ser, other))

    def __ror__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.or_kleene(other, ser))

    def __add__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        other = validate_column_comparand(other)
        return self._from_native_series(pc.add(self._native_series, other))

    def __radd__(self, other: Any) -> Self:
        return self + other  # type: ignore[no-any-return]

    def __sub__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        other = validate_column_comparand(other)
        return self._from_native_series(pc.subtract(self._native_series, other))

    def __rsub__(self, other: Any) -> Self:
        return (self - other) * (-1)  # type: ignore[no-any-return]

    def __mul__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        other = validate_column_comparand(other)
        return self._from_native_series(pc.multiply(self._native_series, other))

    def __rmul__(self, other: Any) -> Self:
        return self * other  # type: ignore[no-any-return]

    def __pow__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.power(ser, other))

    def __rpow__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(pc.power(other, ser))

    def __floordiv__(self, other: Any) -> Self:
        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(floordiv_compat(ser, other))

    def __rfloordiv__(self, other: Any) -> Self:
        ser = self._native_series
        other = validate_column_comparand(other)
        return self._from_native_series(floordiv_compat(other, ser))

    def __truediv__(self, other: Any) -> Self:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        if not isinstance(other, (pa.Array, pa.ChunkedArray)):
            # scalar
            other = pa.scalar(other)
        return self._from_native_series(pc.divide(*cast_for_truediv(ser, other)))

    def __rtruediv__(self, other: Any) -> Self:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        if not isinstance(other, (pa.Array, pa.ChunkedArray)):
            # scalar
            other = pa.scalar(other)
        return self._from_native_series(pc.divide(*cast_for_truediv(other, ser)))

    def __mod__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        floor_div = (self // other)._native_series
        res = pc.subtract(ser, pc.multiply(floor_div, other))
        return self._from_native_series(res)

    def __rmod__(self, other: Any) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        other = validate_column_comparand(other)
        floor_div = (other // self)._native_series
        res = pc.subtract(other, pc.multiply(floor_div, ser))
        return self._from_native_series(res)

    def __invert__(self) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._from_native_series(pc.invert(self._native_series))

    def len(self) -> int:
        return len(self._native_series)

    def filter(self, other: Any) -> Self:
        if not (isinstance(other, list) and all(isinstance(x, bool) for x in other)):
            other = validate_column_comparand(other)
        return self._from_native_series(self._native_series.filter(other))

    def mean(self) -> int:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.mean(self._native_series)  # type: ignore[no-any-return]

    def min(self) -> int:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.min(self._native_series)  # type: ignore[no-any-return]

    def max(self) -> int:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.max(self._native_series)  # type: ignore[no-any-return]

    def sum(self) -> int:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.sum(self._native_series)  # type: ignore[no-any-return]

    def drop_nulls(self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._from_native_series(pc.drop_null(self._native_series))

    def shift(self, n: int) -> Self:
        import pyarrow as pa  # ignore-banned-import()

        ca = self._native_series

        if n > 0:
            result = pa.concat_arrays([pa.nulls(n, ca.type), *ca[:-n].chunks])
        elif n < 0:
            result = pa.concat_arrays([*ca[-n:].chunks, pa.nulls(-n, ca.type)])
        else:
            result = ca
        return self._from_native_series(result)

    def std(self, ddof: int = 1) -> int:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.stddev(self._native_series, ddof=ddof)  # type: ignore[no-any-return]

    def count(self) -> int:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.count(self._native_series)  # type: ignore[no-any-return]

    def n_unique(self) -> int:
        import pyarrow.compute as pc  # ignore-banned-import()

        unique_values = pc.unique(self._native_series)
        return pc.count(unique_values, mode="all")  # type: ignore[no-any-return]

    def __native_namespace__(self: Self) -> ModuleType:
        if self._implementation is Implementation.PYARROW:
            return self._implementation.to_native_namespace()

        msg = f"Expected pyarrow, got: {type(self._implementation)}"  # pragma: no cover
        raise AssertionError(msg)

    @property
    def name(self) -> str:
        return self._name

    def __narwhals_series__(self) -> Self:
        return self

    @overload
    def __getitem__(self, idx: int) -> Any: ...

    @overload
    def __getitem__(self, idx: slice | Sequence[int]) -> Self: ...

    def __getitem__(self, idx: int | slice | Sequence[int]) -> Any | Self:
        if isinstance(idx, int):
            return self._native_series[idx]
        if isinstance(idx, Sequence):
            return self._from_native_series(self._native_series.take(idx))
        return self._from_native_series(self._native_series[idx])

    def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
        import numpy as np  # ignore-banned-import
        import pyarrow as pa  # ignore-banned-import
        import pyarrow.compute as pc  # ignore-banned-import

        ca = self._native_series
        mask = np.zeros(len(ca), dtype=bool)
        mask[indices] = True
        if isinstance(values, self.__class__):
            values = validate_column_comparand(values)
        if isinstance(values, pa.ChunkedArray):
            values = values.combine_chunks()
        if not isinstance(values, pa.Array):
            values = pa.array(values)
        result = pc.replace_with_mask(ca, mask, values.take(indices))
        return self._from_native_series(result)

    def to_list(self) -> Any:
        return self._native_series.to_pylist()

    def __array__(self, dtype: Any = None, copy: bool | None = None) -> Any:
        return self._native_series.__array__(dtype=dtype, copy=copy)

    def to_numpy(self) -> Any:
        return self._native_series.to_numpy()

    def alias(self, name: str) -> Self:
        return self.__class__(
            self._native_series,
            name=name,
            backend_version=self._backend_version,
            dtypes=self._dtypes,
        )

    @property
    def dtype(self: Self) -> DType:
        return native_to_narwhals_dtype(self._native_series.type, self._dtypes)

    def abs(self) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._from_native_series(pc.abs(self._native_series))

    def cum_sum(self) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._from_native_series(pc.cumulative_sum(self._native_series))

    def round(self, decimals: int) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._from_native_series(
            pc.round(self._native_series, decimals, round_mode="half_towards_infinity")
        )

    def diff(self) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._from_native_series(
            pc.pairwise_diff(self._native_series.combine_chunks())
        )

    def any(self) -> bool:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.any(self._native_series)  # type: ignore[no-any-return]

    def all(self) -> bool:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.all(self._native_series)  # type: ignore[no-any-return]

    def is_between(
        self, lower_bound: Any, upper_bound: Any, closed: str = "both"
    ) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        if closed == "left":
            ge = pc.greater_equal(ser, lower_bound)
            lt = pc.less(ser, upper_bound)
            res = pc.and_kleene(ge, lt)
        elif closed == "right":
            gt = pc.greater(ser, lower_bound)
            le = pc.less_equal(ser, upper_bound)
            res = pc.and_kleene(gt, le)
        elif closed == "none":
            gt = pc.greater(ser, lower_bound)
            lt = pc.less(ser, upper_bound)
            res = pc.and_kleene(gt, lt)
        elif closed == "both":
            ge = pc.greater_equal(ser, lower_bound)
            le = pc.less_equal(ser, upper_bound)
            res = pc.and_kleene(ge, le)
        else:  # pragma: no cover
            raise AssertionError
        return self._from_native_series(res)

    def is_empty(self) -> bool:
        return len(self) == 0

    def is_null(self) -> Self:
        ser = self._native_series
        return self._from_native_series(ser.is_null())

    def cast(self, dtype: DType) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        dtype = narwhals_to_native_dtype(dtype, self._dtypes)
        return self._from_native_series(pc.cast(ser, dtype))

    def null_count(self: Self) -> int:
        return self._native_series.null_count  # type: ignore[no-any-return]

    def head(self, n: int) -> Self:
        ser = self._native_series
        if n >= 0:
            return self._from_native_series(ser.slice(0, n))
        else:
            num_rows = len(ser)
            return self._from_native_series(ser.slice(0, max(0, num_rows + n)))

    def tail(self, n: int) -> Self:
        ser = self._native_series
        if n >= 0:
            num_rows = len(ser)
            return self._from_native_series(ser.slice(max(0, num_rows - n)))
        else:
            return self._from_native_series(ser.slice(abs(n)))

    def is_in(self, other: Any) -> Self:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        value_set = pa.array(other)
        ser = self._native_series
        return self._from_native_series(pc.is_in(ser, value_set=value_set))

    def arg_true(self) -> Self:
        import numpy as np  # ignore-banned-import

        ser = self._native_series
        res = np.flatnonzero(ser)
        return self._from_iterable(
            res,
            name=self.name,
            backend_version=self._backend_version,
            dtypes=self._dtypes,
        )

    def item(self: Self, index: int | None = None) -> Any:
        if index is None:
            if len(self) != 1:
                msg = (
                    "can only call '.item()' if the Series is of length 1,"
                    f" or an explicit index is provided (Series is of length {len(self)})"
                )
                raise ValueError(msg)
            return self._native_series[0]
        return self._native_series[index]

    def value_counts(
        self: Self,
        *,
        sort: bool = False,
        parallel: bool = False,
        name: str | None = None,
        normalize: bool = False,
    ) -> ArrowDataFrame:
        """Parallel is unused, exists for compatibility"""
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        from narwhals._arrow.dataframe import ArrowDataFrame

        index_name_ = "index" if self._name is None else self._name
        value_name_ = name or ("proportion" if normalize else "count")

        val_count = pc.value_counts(self._native_series)
        values = val_count.field("values")
        counts = val_count.field("counts")

        if normalize:
            counts = pc.divide(*cast_for_truediv(counts, pc.sum(counts)))

        val_count = pa.Table.from_arrays(
            [values, counts], names=[index_name_, value_name_]
        )

        if sort:
            val_count = val_count.sort_by([(value_name_, "descending")])

        return ArrowDataFrame(
            val_count, backend_version=self._backend_version, dtypes=self._dtypes
        )

    def zip_with(self: Self, mask: Self, other: Self) -> Self:
        import pyarrow.compute as pc  # ignore-banned-import()

        mask = mask._native_series.combine_chunks()
        return self._from_native_series(
            pc.if_else(
                mask,
                self._native_series,
                other._native_series,
            )
        )

    def sample(
        self: Self,
        n: int | None = None,
        *,
        fraction: float | None = None,
        with_replacement: bool = False,
        seed: int | None = None,
    ) -> Self:
        import numpy as np  # ignore-banned-import
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        num_rows = len(self)

        if n is None and fraction is not None:
            n = int(num_rows * fraction)

        rng = np.random.default_rng(seed=seed)
        idx = np.arange(0, num_rows)
        mask = rng.choice(idx, size=n, replace=with_replacement)

        return self._from_native_series(pc.take(ser, mask))

    def fill_null(self: Self, value: Any) -> Self:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        dtype = ser.type

        return self._from_native_series(pc.fill_null(ser, pa.scalar(value, dtype)))

    def to_frame(self: Self) -> ArrowDataFrame:
        import pyarrow as pa  # ignore-banned-import()

        from narwhals._arrow.dataframe import ArrowDataFrame

        df = pa.Table.from_arrays([self._native_series], names=[self.name])
        return ArrowDataFrame(
            df, backend_version=self._backend_version, dtypes=self._dtypes
        )

    def to_pandas(self: Self) -> Any:
        import pandas as pd  # ignore-banned-import()

        return pd.Series(self._native_series, name=self.name)

    def is_duplicated(self: Self) -> ArrowSeries:
        return self.to_frame().is_duplicated().alias(self.name)

    def is_unique(self: Self) -> ArrowSeries:
        return self.to_frame().is_unique().alias(self.name)

    def is_first_distinct(self: Self) -> Self:
        import numpy as np  # ignore-banned-import
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        row_number = pa.array(np.arange(len(self)))
        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
        first_distinct_index = (
            pa.Table.from_arrays([self._native_series], names=[self.name])
            .append_column(col_token, row_number)
            .group_by(self.name)
            .aggregate([(col_token, "min")])
            .column(f"{col_token}_min")
        )

        return self._from_native_series(pc.is_in(row_number, first_distinct_index))

    def is_last_distinct(self: Self) -> Self:
        import numpy as np  # ignore-banned-import
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        row_number = pa.array(np.arange(len(self)))
        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
        last_distinct_index = (
            pa.Table.from_arrays([self._native_series], names=[self.name])
            .append_column(col_token, row_number)
            .group_by(self.name)
            .aggregate([(col_token, "max")])
            .column(f"{col_token}_max")
        )

        return self._from_native_series(pc.is_in(row_number, last_distinct_index))

    def is_sorted(self: Self, *, descending: bool = False) -> bool:
        if not isinstance(descending, bool):
            msg = f"argument 'descending' should be boolean, found {type(descending)}"
            raise TypeError(msg)
        import pyarrow.compute as pc  # ignore-banned-import()

        ser = self._native_series
        if descending:
            return pc.all(pc.greater_equal(ser[:-1], ser[1:]))  # type: ignore[no-any-return]
        else:
            return pc.all(pc.less_equal(ser[:-1], ser[1:]))  # type: ignore[no-any-return]

    def unique(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._from_native_series(pc.unique(self._native_series))

    def sort(
        self: Self, *, descending: bool = False, nulls_last: bool = False
    ) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        series = self._native_series
        order = "descending" if descending else "ascending"
        null_placement = "at_end" if nulls_last else "at_start"
        sorted_indices = pc.array_sort_indices(
            series, order=order, null_placement=null_placement
        )

        return self._from_native_series(pc.take(series, sorted_indices))

    def to_dummies(
        self: Self, *, separator: str = "_", drop_first: bool = False
    ) -> ArrowDataFrame:
        import numpy as np  # ignore-banned-import
        import pyarrow as pa  # ignore-banned-import()

        from narwhals._arrow.dataframe import ArrowDataFrame

        series = self._native_series
        da = series.dictionary_encode().combine_chunks()

        columns = np.zeros((len(da.dictionary), len(da)), np.uint8)
        columns[da.indices, np.arange(len(da))] = 1
        names = [f"{self._name}{separator}{v}" for v in da.dictionary]

        return ArrowDataFrame(
            pa.Table.from_arrays(columns, names=names),
            backend_version=self._backend_version,
            dtypes=self._dtypes,
        ).select(*sorted(names)[int(drop_first) :])

    def quantile(
        self: Self,
        quantile: float,
        interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"],
    ) -> Any:
        import pyarrow.compute as pc  # ignore-banned-import()

        return pc.quantile(self._native_series, q=quantile, interpolation=interpolation)[
            0
        ]

    def gather_every(self: Self, n: int, offset: int = 0) -> Self:
        return self._from_native_series(self._native_series[offset::n])

    def clip(
        self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
    ) -> Self:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._native_series
        arr = pc.max_element_wise(arr, pa.scalar(lower_bound, type=arr.type))
        arr = pc.min_element_wise(arr, pa.scalar(upper_bound, type=arr.type))

        return self._from_native_series(arr)

    def to_arrow(self: Self) -> pa.Array:
        return self._native_series.combine_chunks()

    def mode(self: Self) -> ArrowSeries:
        plx = self.__narwhals_namespace__()
        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
        return self.value_counts(name=col_token, normalize=False).filter(
            plx.col(col_token) == plx.col(col_token).max()
        )[self.name]

    def __iter__(self: Self) -> Iterator[Any]:
        yield from self._native_series.__iter__()

    @property
    def shape(self) -> tuple[int]:
        return (len(self._native_series),)

    @property
    def dt(self) -> ArrowSeriesDateTimeNamespace:
        return ArrowSeriesDateTimeNamespace(self)

    @property
    def cat(self) -> ArrowSeriesCatNamespace:
        return ArrowSeriesCatNamespace(self)

    @property
    def str(self) -> ArrowSeriesStringNamespace:
        return ArrowSeriesStringNamespace(self)


class ArrowSeriesDateTimeNamespace:
    def __init__(self: Self, series: ArrowSeries) -> None:
        self._arrow_series = series

    def to_string(self: Self, format: str) -> ArrowSeries:  # noqa: A002
        import pyarrow.compute as pc  # ignore-banned-import()

        # PyArrow differs from other libraries in that %S also prints out
        # the fractional part of the second...:'(
        # https://arrow.apache.org/docs/python/generated/pyarrow.compute.strftime.html
        format = format.replace("%S.%f", "%S").replace("%S%.f", "%S")
        return self._arrow_series._from_native_series(
            pc.strftime(self._arrow_series._native_series, format)
        )

    def replace_time_zone(self: Self, time_zone: str | None) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        if time_zone is not None:
            result = pc.assume_timezone(
                pc.local_timestamp(self._arrow_series._native_series), time_zone
            )
        else:
            result = pc.local_timestamp(self._arrow_series._native_series)
        return self._arrow_series._from_native_series(result)

    def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import

        if self._arrow_series.dtype.time_zone is None:  # type: ignore[attr-defined]
            result = self.replace_time_zone("UTC")._native_series.cast(
                pa.timestamp(self._arrow_series._native_series.type.unit, time_zone)
            )
        else:
            result = self._arrow_series._native_series.cast(
                pa.timestamp(self._arrow_series._native_series.type.unit, time_zone)
            )

        return self._arrow_series._from_native_series(result)

    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import
        import pyarrow.compute as pc  # ignore-banned-import

        s = self._arrow_series._native_series
        dtype = self._arrow_series.dtype
        if dtype == self._arrow_series._dtypes.Datetime:
            unit = dtype.time_unit  # type: ignore[attr-defined]
            s_cast = s.cast(pa.int64())
            if unit == "ns":
                if time_unit == "ns":
                    result = s_cast
                elif time_unit == "us":
                    result = floordiv_compat(s_cast, 1_000)
                else:
                    result = floordiv_compat(s_cast, 1_000_000)
            elif unit == "us":
                if time_unit == "ns":
                    result = pc.multiply(s_cast, 1_000)
                elif time_unit == "us":
                    result = s_cast
                else:
                    result = floordiv_compat(s_cast, 1_000)
            elif unit == "ms":
                if time_unit == "ns":
                    result = pc.multiply(s_cast, 1_000_000)
                elif time_unit == "us":
                    result = pc.multiply(s_cast, 1_000)
                else:
                    result = s_cast
            elif unit == "s":
                if time_unit == "ns":
                    result = pc.multiply(s_cast, 1_000_000_000)
                elif time_unit == "us":
                    result = pc.multiply(s_cast, 1_000_000)
                else:
                    result = pc.multiply(s_cast, 1_000)
            else:  # pragma: no cover
                msg = f"unexpected time unit {unit}, please report an issue at https://github.com/narwhals-dev/narwhals"
                raise AssertionError(msg)
        elif dtype == self._arrow_series._dtypes.Date:
            time_s = pc.multiply(s.cast(pa.int32()), 86400)
            if time_unit == "ns":
                result = pc.multiply(time_s, 1_000_000_000)
            elif time_unit == "us":
                result = pc.multiply(time_s, 1_000_000)
            else:
                result = pc.multiply(time_s, 1_000)
        else:
            msg = "Input should be either of Date or Datetime type"
            raise TypeError(msg)
        return self._arrow_series._from_native_series(result)

    def date(self: Self) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            self._arrow_series._native_series.cast(pa.date32())
        )

    def year(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.year(self._arrow_series._native_series)
        )

    def month(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.month(self._arrow_series._native_series)
        )

    def day(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.day(self._arrow_series._native_series)
        )

    def hour(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.hour(self._arrow_series._native_series)
        )

    def minute(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.minute(self._arrow_series._native_series)
        )

    def second(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.second(self._arrow_series._native_series)
        )

    def millisecond(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.millisecond(self._arrow_series._native_series)
        )

    def microsecond(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._arrow_series._native_series
        result = pc.add(pc.multiply(pc.millisecond(arr), 1000), pc.microsecond(arr))

        return self._arrow_series._from_native_series(result)

    def nanosecond(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._arrow_series._native_series
        result = pc.add(
            pc.multiply(self.microsecond()._native_series, 1000), pc.nanosecond(arr)
        )
        return self._arrow_series._from_native_series(result)

    def ordinal_day(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.day_of_year(self._arrow_series._native_series)
        )

    def total_minutes(self: Self) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._arrow_series._native_series
        unit = arr.type.unit

        unit_to_minutes_factor = {
            "s": 60,  # seconds
            "ms": 60 * 1e3,  # milli
            "us": 60 * 1e6,  # micro
            "ns": 60 * 1e9,  # nano
        }

        factor = pa.scalar(unit_to_minutes_factor[unit], type=pa.int64())
        return self._arrow_series._from_native_series(
            pc.cast(pc.divide(arr, factor), pa.int64())
        )

    def total_seconds(self: Self) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._arrow_series._native_series
        unit = arr.type.unit

        unit_to_seconds_factor = {
            "s": 1,  # seconds
            "ms": 1e3,  # milli
            "us": 1e6,  # micro
            "ns": 1e9,  # nano
        }
        factor = pa.scalar(unit_to_seconds_factor[unit], type=pa.int64())

        return self._arrow_series._from_native_series(
            pc.cast(pc.divide(arr, factor), pa.int64())
        )

    def total_milliseconds(self: Self) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._arrow_series._native_series
        unit = arr.type.unit

        unit_to_milli_factor = {
            "s": 1e3,  # seconds
            "ms": 1,  # milli
            "us": 1e3,  # micro
            "ns": 1e6,  # nano
        }

        factor = pa.scalar(unit_to_milli_factor[unit], type=pa.int64())

        if unit == "s":
            return self._arrow_series._from_native_series(
                pc.cast(pc.multiply(arr, factor), pa.int64())
            )

        return self._arrow_series._from_native_series(
            pc.cast(pc.divide(arr, factor), pa.int64())
        )

    def total_microseconds(self: Self) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._arrow_series._native_series
        unit = arr.type.unit

        unit_to_micro_factor = {
            "s": 1e6,  # seconds
            "ms": 1e3,  # milli
            "us": 1,  # micro
            "ns": 1e3,  # nano
        }

        factor = pa.scalar(unit_to_micro_factor[unit], type=pa.int64())

        if unit in {"s", "ms"}:
            return self._arrow_series._from_native_series(
                pc.cast(pc.multiply(arr, factor), pa.int64())
            )
        return self._arrow_series._from_native_series(
            pc.cast(pc.divide(arr, factor), pa.int64())
        )

    def total_nanoseconds(self: Self) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import()
        import pyarrow.compute as pc  # ignore-banned-import()

        arr = self._arrow_series._native_series
        unit = arr.type.unit

        unit_to_nano_factor = {
            "s": 1e9,  # seconds
            "ms": 1e6,  # milli
            "us": 1e3,  # micro
            "ns": 1,  # nano
        }

        factor = pa.scalar(unit_to_nano_factor[unit], type=pa.int64())

        return self._arrow_series._from_native_series(
            pc.cast(pc.multiply(arr, factor), pa.int64())
        )


class ArrowSeriesCatNamespace:
    def __init__(self, series: ArrowSeries) -> None:
        self._arrow_series = series

    def get_categories(self) -> ArrowSeries:
        import pyarrow as pa  # ignore-banned-import()

        ca = self._arrow_series._native_series
        # TODO(Unassigned): this looks potentially expensive - is there no better way?
        # https://github.com/narwhals-dev/narwhals/issues/464
        out = pa.chunked_array(
            [pa.concat_arrays([x.dictionary for x in ca.chunks]).unique()]
        )
        return self._arrow_series._from_native_series(out)


class ArrowSeriesStringNamespace:
    def __init__(self: Self, series: ArrowSeries) -> None:
        self._arrow_series = series

    def len_chars(self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.utf8_length(self._arrow_series._native_series)
        )

    def replace(
        self, pattern: str, value: str, *, literal: bool = False, n: int = 1
    ) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        method = "replace_substring" if literal else "replace_substring_regex"
        return self._arrow_series._from_native_series(
            getattr(pc, method)(
                self._arrow_series._native_series,
                pattern=pattern,
                replacement=value,
                max_replacements=n,
            )
        )

    def replace_all(
        self, pattern: str, value: str, *, literal: bool = False
    ) -> ArrowSeries:
        return self.replace(pattern, value, literal=literal, n=-1)

    def strip_chars(self: Self, characters: str | None = None) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        whitespace = " \t\n\r\v\f"
        return self._arrow_series._from_native_series(
            pc.utf8_trim(
                self._arrow_series._native_series,
                characters or whitespace,
            )
        )

    def starts_with(self: Self, prefix: str) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.equal(self.slice(0, len(prefix))._native_series, prefix)
        )

    def ends_with(self: Self, suffix: str) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.equal(self.slice(-len(suffix))._native_series, suffix)
        )

    def contains(self: Self, pattern: str, *, literal: bool = False) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        check_func = pc.match_substring if literal else pc.match_substring_regex
        return self._arrow_series._from_native_series(
            check_func(self._arrow_series._native_series, pattern)
        )

    def slice(self: Self, offset: int, length: int | None = None) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        stop = offset + length if length else None
        return self._arrow_series._from_native_series(
            pc.utf8_slice_codeunits(
                self._arrow_series._native_series, start=offset, stop=stop
            ),
        )

    def to_datetime(self: Self, format: str | None) -> ArrowSeries:  # noqa: A002
        import pyarrow.compute as pc  # ignore-banned-import()

        if format is None:
            format = parse_datetime_format(self._arrow_series._native_series)

        return self._arrow_series._from_native_series(
            pc.strptime(self._arrow_series._native_series, format=format, unit="us")
        )

    def to_uppercase(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.utf8_upper(self._arrow_series._native_series),
        )

    def to_lowercase(self: Self) -> ArrowSeries:
        import pyarrow.compute as pc  # ignore-banned-import()

        return self._arrow_series._from_native_series(
            pc.utf8_lower(self._arrow_series._native_series),
        )