Source code for pylegend.core.language.pandas_api.pandas_api_window_series

# Copyright 2026 Goldman Sachs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
A single-column proxy on a window frame.

A ``WindowSeries`` is obtained by bracket-indexing a
:class:`~pylegend.core.tds.pandas_api.frames.pandas_api_window_tds_frame.PandasApiWindowTdsFrame`
with a column name.  It can also be obtained by calling
``expanding()``, ``rolling()``, or ``window_frame_legend_ext()``
directly on a
:class:`~pylegend.core.language.pandas_api.pandas_api_series.Series`
or
:class:`~pylegend.core.language.pandas_api.pandas_api_groupby_series.GroupbySeries`.

Calling an aggregation method (``sum()``, ``mean()``, etc.) on a
``WindowSeries`` returns a
:class:`~pylegend.core.language.pandas_api.pandas_api_series.Series`
(or a
:class:`~pylegend.core.language.pandas_api.pandas_api_groupby_series.GroupbySeries`
when the underlying window was created from a groupby).  Positional
window functions (``first()``, ``last()``, ``shift()``) and the
general-purpose ``window_extend_legend_ext()`` are also available.
The result can then be assigned back to the parent frame.

**Obtaining a WindowSeries**

.. code-block:: python

    # Via bracket notation on a window frame
    ws = frame.expanding(order_by="col")["col"]

    # Via Series shortcut
    ws = frame["col"].expanding(order_by="col")

    # Grouped variant (returns GroupbySeries after aggregation)
    ws = frame.groupby("grp")["val"].expanding(order_by="val")

**Result type preservation**

The type of the returned ``Series`` (or ``GroupbySeries``) matches
the column type.  For example, an integer column produces an
``IntegerSeries`` after ``.sum()``, while ``count()`` always
returns an ``IntegerSeries`` regardless of the source column type.

**Composing with arithmetic**

The ``Series`` returned by a ``WindowSeries`` aggregation supports
arithmetic, so expressions like the following work:

.. code-block:: python

    frame["shifted"] = frame["col"].expanding().sum() - 100
    frame["ratio"]   = frame["a"].expanding().sum() / frame["b"]
Multiple window assignments can be applied sequentially to the
same frame:

.. code-block:: python

    frame["cumsum"]    = frame["col"].expanding().sum()
    frame["roll_mean"] = frame["col2"].rolling(5, order_by="col2").mean()

See Also
--------
PandasApiWindowTdsFrame : The window frame that produces this.
Series : Non-grouped single-column proxy.
GroupbySeries : Grouped single-column proxy.
PandasApiTdsFrame.expanding : Create an expanding window on a frame.
PandasApiTdsFrame.rolling : Create a rolling window on a frame.

Notes
-----
**Differences from pandas:**

- A ``WindowSeries`` is **not** a data container.  It is an
  expression builder that lazily constructs the SQL / Pure query.
  No data is materialised until the result is executed.
- In pandas, ``Expanding['col']`` and ``Rolling['col']`` have
  built-in convenience methods that return a ``Series``.  Here,
  the same convenience methods are available (``sum()``,
  ``mean()``, ``min()``, ``max()``, ``count()``, ``std()``,
  ``var()``), plus positional window methods (``first()``,
  ``last()``, ``shift()``), and a general ``aggregate()`` /
  ``agg()`` method.  ``window_extend_legend_ext()`` is available
  for fully custom window expressions.
- Extra ``*args`` / ``**kwargs`` on ``aggregate()`` are **not
  supported**.
- The ``numeric_only`` parameter on convenience methods is **not
  supported** and must be ``False``.

Examples
--------
.. ipython:: python

    import pylegend
    frame = pylegend.samples.pandas_api.northwind_orders_frame()

    # Assign an expanding sum via WindowSeries
    frame["Cumulative Sum"] = frame.expanding(
        order_by="Order Id"
    )["Order Id"].sum()
    frame.head(5).to_pandas()

    frame = pylegend.samples.pandas_api.northwind_orders_frame()

    # Grouped expanding sum assigned back
    frame["Group Cumsum"] = frame.groupby(
        "Ship Name"
    )["Order Id"].expanding(order_by="Order Id").sum()
    frame.head(5).to_pandas()

"""

from pylegend._typing import (
    PyLegendOptional,
    PyLegendUnion,
    TYPE_CHECKING,
)
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
from pylegend.core.tds.pandas_api.frames.functions.single_column_window_function import ValueFunc, AggFunc
from pylegend.core.tds.pandas_api.frames.helpers.series_helper import get_series_from_col_type, \
    get_groupby_series_from_col_type
from pylegend.core.tds.pandas_api.frames.pandas_api_window_tds_frame import PandasApiWindowTdsFrame

if TYPE_CHECKING:
    from pylegend.core.language.pandas_api.pandas_api_series import Series
    from pylegend.core.language.pandas_api.pandas_api_groupby_series import GroupbySeries


class WindowSeries:

    _window_frame: PandasApiWindowTdsFrame
    _column_name: str

    def __init__(
        self,
        window_frame: PandasApiWindowTdsFrame,
        column_name: str,
    ) -> None:
        self._window_frame = window_frame
        self._column_name = column_name

    @property
    def window_frame(self) -> PandasApiWindowTdsFrame:
        return self._window_frame

    @property
    def column_name(self) -> str:
        return self._column_name


[docs]
    def aggregate(
        self,
        func: PyLegendAggInput,
        axis: PyLegendUnion[int, str] = 0,
        *args: PyLegendPrimitiveOrPythonPrimitive,
        **kwargs: PyLegendPrimitiveOrPythonPrimitive,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Apply a window aggregate to this single column.

        Compute the window aggregate specified by ``func`` over the
        window defined on this ``WindowSeries``.  The result is a
        :class:`~pylegend.core.language.pandas_api.pandas_api_series.Series`
        (or
        :class:`~pylegend.core.language.pandas_api.pandas_api_groupby_series.GroupbySeries`
        when the underlying window was created from a groupby) that
        can be assigned back to a frame column.

        Parameters
        ----------
        func : str, callable, list, or dict
            Aggregation specification:

            - ``str`` — a named aggregation (``'sum'``, ``'mean'``,
              ``'min'``, ``'max'``, ``'count'``, ``'std'``, ``'var'``).
            - ``callable`` — a function receiving a column proxy and
              returning an aggregated value.
            - ``list`` — a list of the above.
            - ``dict`` — ``{column_name: agg_spec}``.
        axis : {{0, 'index'}}, default 0
            Only ``0`` / ``'index'`` is supported.
        *args
            Not supported.
        **kwargs
            Not supported.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed aggregate
            values.

        See Also
        --------
        agg : Alias for ``aggregate``.
        sum : Windowed sum convenience method.
        mean : Windowed mean convenience method.
        PandasApiWindowTdsFrame.aggregate : Window aggregate on
            all columns.

        Notes
        -----
        **Differences from pandas:**

        - In pandas, ``Expanding['col'].aggregate()`` and
          ``Rolling['col'].aggregate()`` accept ``*args`` and
          ``**kwargs`` forwarded to the aggregation function.  Here,
          extra positional and keyword arguments are **not supported**.
        - The result is always a single-column proxy (``Series`` or
          ``GroupbySeries``), never a DataFrame.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Expanding sum on a single column
            frame["Expanding Sum"] = frame.expanding(
                order_by="Order Id"
            )["Order Id"].aggregate("sum")
            frame.head(5).to_pandas()

        """
        from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
            PandasApiAppliedFunctionTdsFrame,
        )
        from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
        from pylegend.core.tds.pandas_api.frames.functions.window_aggregate_function import (
            WindowAggregateFunction,
        )

        base = self._window_frame._base_frame
        base_frame_unwrapped = self._window_frame.base_frame()
        column = self._column_name

        # Wrap scalar func to target only the selected column
        single_col_func: PyLegendAggInput = {column: func} if isinstance(func, (str, list)) or callable(func) else func

        applied_function_frame = PandasApiAppliedFunctionTdsFrame(
            WindowAggregateFunction(self._window_frame, single_col_func, axis, *args, **kwargs)
        )

        result_columns = applied_function_frame.columns()
        assert len(result_columns) == 1, (
            "WindowSeries.aggregate() should produce exactly one result column"
        )
        col_type = result_columns[0].get_type()

        if isinstance(base, PandasApiGroupbyTdsFrame):
            gb_series_cls = get_groupby_series_from_col_type(col_type)
            # Use __getitem__ to get a groupby frame with the column selected
            new_gb_frame_or_series = base[column]
            if isinstance(new_gb_frame_or_series, PandasApiGroupbyTdsFrame):
                new_gb_frame = new_gb_frame_or_series  # pragma: no cover
            else:
                # __getitem__ with a string returns a GroupbySeries; extract its frame
                new_gb_frame = new_gb_frame_or_series._base_groupby_frame
            return gb_series_cls(new_gb_frame, applied_function_frame)
        else:
            series_cls = get_series_from_col_type(col_type)
            new_series = series_cls(base_frame_unwrapped, column)
            new_series._filtered_frame = applied_function_frame
            return new_series



[docs]
    def agg(
        self,
        func: PyLegendAggInput,
        axis: PyLegendUnion[int, str] = 0,
        *args: PyLegendPrimitiveOrPythonPrimitive,
        **kwargs: PyLegendPrimitiveOrPythonPrimitive,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Apply a window aggregate to this single column.

        Alias for :meth:`aggregate`.  See ``aggregate`` for full
        documentation.

        See Also
        --------
        aggregate : Equivalent method (canonical name).
        """
        return self.aggregate(func, axis, *args, **kwargs)  # pragma: no cover



[docs]
    def sum(
        self,
        numeric_only: bool = False,
        min_count: int = 0,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Compute the windowed sum of this column.

        Convenience method equivalent to ``aggregate('sum')`` on this
        window series.

        Parameters
        ----------
        numeric_only : bool, default False
            Must be ``False``. ``True`` is not supported.
        min_count : int, default 0
            Must be ``0``. Non-zero values are not supported.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed sum values.

        Raises
        ------
        NotImplementedError
            If any parameter is set to an unsupported value.

        See Also
        --------
        aggregate : General windowed aggregation.
        mean : Windowed mean.
        PandasApiTdsFrame.sum : Frame-level sum (no window).

        Notes
        -----
        **Differences from pandas:**

        - ``numeric_only`` and ``min_count`` are **not supported**
          and must remain at their default values.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Expanding sum on a single column
            frame["Expanding Sum"] = frame.expanding(
                order_by="Order Id"
            )["Order Id"].sum()
            frame.head(5).to_pandas()

        """
        if numeric_only is not False:
            raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
        if min_count != 0:
            raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
        return self.aggregate("sum", 0)



[docs]
    def mean(
        self,
        numeric_only: bool = False,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Compute the windowed mean of this column.

        Convenience method equivalent to ``aggregate('mean')`` on this
        window series.

        Parameters
        ----------
        numeric_only : bool, default False
            Must be ``False``. ``True`` is not supported.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed mean values.

        Raises
        ------
        NotImplementedError
            If ``numeric_only`` is ``True``.

        See Also
        --------
        aggregate : General windowed aggregation.
        sum : Windowed sum.
        PandasApiTdsFrame.mean : Frame-level mean (no window).

        Notes
        -----
        **Differences from pandas:**

        - ``numeric_only`` is **not supported** and must be ``False``.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Rolling mean with a window of 3
            frame["Rolling Mean"] = frame.rolling(
                3, order_by="Order Id"
            )["Order Id"].mean()
            frame.head(5).to_pandas()

        """
        if numeric_only is not False:
            raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
        return self.aggregate("mean", 0)



[docs]
    def min(
        self,
        numeric_only: bool = False,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Compute the windowed minimum of this column.

        Convenience method equivalent to ``aggregate('min')`` on this
        window series.

        Parameters
        ----------
        numeric_only : bool, default False
            Must be ``False``. ``True`` is not supported.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed minimum
            values.

        Raises
        ------
        NotImplementedError
            If ``numeric_only`` is ``True``.

        See Also
        --------
        aggregate : General windowed aggregation.
        max : Windowed maximum.
        PandasApiTdsFrame.min : Frame-level min (no window).

        Notes
        -----
        **Differences from pandas:**

        - ``numeric_only`` is **not supported** and must be ``False``.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Expanding min on a single column
            frame["Expanding Min"] = frame.expanding(
                order_by="Order Id"
            )["Order Id"].min()
            frame.head(5).to_pandas()

        """
        if numeric_only is not False:
            raise NotImplementedError("numeric_only=True is not currently supported in min function.")
        return self.aggregate("min", 0)



[docs]
    def max(
        self,
        numeric_only: bool = False,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Compute the windowed maximum of this column.

        Convenience method equivalent to ``aggregate('max')`` on this
        window series.

        Parameters
        ----------
        numeric_only : bool, default False
            Must be ``False``. ``True`` is not supported.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed maximum
            values.

        Raises
        ------
        NotImplementedError
            If ``numeric_only`` is ``True``.

        See Also
        --------
        aggregate : General windowed aggregation.
        min : Windowed minimum.
        PandasApiTdsFrame.max : Frame-level max (no window).

        Notes
        -----
        **Differences from pandas:**

        - ``numeric_only`` is **not supported** and must be ``False``.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Expanding max on a single column
            frame["Expanding Max"] = frame.expanding(
                order_by="Order Id"
            )["Order Id"].max()
            frame.head(5).to_pandas()

        """
        if numeric_only is not False:
            raise NotImplementedError("numeric_only=True is not currently supported in max function.")
        return self.aggregate("max", 0)



[docs]
    def count(self) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Compute the windowed count of non-null values for this column.

        Convenience method equivalent to ``aggregate('count')`` on this
        window series.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed count values.
            The return type is always ``IntegerSeries`` (or its
            ``GroupbySeries`` equivalent), regardless of the source
            column's type.

        See Also
        --------
        aggregate : General windowed aggregation.
        sum : Windowed sum.
        PandasApiTdsFrame.count : Frame-level count (no window).

        Notes
        -----
        **Differences from pandas:**

        - The signature takes no parameters.  The pandas
          ``Expanding.count()`` / ``Rolling.count()`` accept
          ``numeric_only`` which is not supported here.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Expanding count on a single column
            frame["Expanding Count"] = frame.expanding(
                order_by="Order Id"
            )["Order Id"].count()
            frame.head(5).to_pandas()

        """
        return self.aggregate("count", 0)



[docs]
    def std(
        self,
        ddof: int = 1,
        numeric_only: bool = False,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Compute the windowed standard deviation of this column.

        Convenience method equivalent to ``aggregate('std')`` on this
        window series.

        Parameters
        ----------
        ddof : int, default 1
            Degrees of freedom.  ``1`` for sample standard deviation
            (``STDDEV_SAMP``), ``0`` for population standard deviation
            (``STDDEV_POP``).
        numeric_only : bool, default False
            Must be ``False``. ``True`` is not supported.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed standard
            deviation values.

        Raises
        ------
        NotImplementedError
            If ``ddof`` is not ``0`` or ``1``, or if ``numeric_only``
            is ``True``.

        See Also
        --------
        aggregate : General windowed aggregation.
        var : Windowed variance.
        PandasApiTdsFrame.std : Frame-level std (no window).

        Notes
        -----
        **Differences from pandas:**

        - Only ``ddof=0`` (population) and ``ddof=1`` (sample) are
          supported.  Other values raise ``NotImplementedError``.
        - ``numeric_only`` is **not supported** and must be ``False``.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Rolling standard deviation with a window of 3
            frame["Rolling Std"] = frame.rolling(
                3, order_by="Order Id"
            )["Order Id"].std()
            frame.head(5).to_pandas()

        """
        if numeric_only is not False:
            raise NotImplementedError("numeric_only=True is not currently supported in std function.")
        if ddof == 1:
            return self.aggregate("std_dev_sample", 0)
        elif ddof == 0:
            return self.aggregate("std_dev_population", 0)
        else:
            raise NotImplementedError(
                f"Only ddof=0 (Population) and ddof=1 (Sample) are supported in std function, but got: {ddof}"
            )



[docs]
    def var(
        self,
        ddof: int = 1,
        numeric_only: bool = False,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Compute the windowed variance of this column.

        Convenience method equivalent to ``aggregate('var')`` on this
        window series.

        Parameters
        ----------
        ddof : int, default 1
            Degrees of freedom.  ``1`` for sample variance
            (``VAR_SAMP``), ``0`` for population variance
            (``VAR_POP``).
        numeric_only : bool, default False
            Must be ``False``. ``True`` is not supported.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the windowed variance
            values.

        Raises
        ------
        NotImplementedError
            If ``ddof`` is not ``0`` or ``1``, or if ``numeric_only``
            is ``True``.

        See Also
        --------
        aggregate : General windowed aggregation.
        std : Windowed standard deviation.
        PandasApiTdsFrame.var : Frame-level var (no window).

        Notes
        -----
        **Differences from pandas:**

        - Only ``ddof=0`` (population) and ``ddof=1`` (sample) are
          supported.  Other values raise ``NotImplementedError``.
        - ``numeric_only`` is **not supported** and must be ``False``.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Rolling variance with a window of 3
            frame["Rolling Var"] = frame.rolling(
                3, order_by="Order Id"
            )["Order Id"].var()
            frame.head(5).to_pandas()

        """
        if numeric_only is not False:
            raise NotImplementedError("numeric_only=True is not currently supported in var function.")
        if ddof == 1:
            return self.aggregate("variance_sample", 0)
        elif ddof == 0:
            return self.aggregate("variance_population", 0)
        else:
            raise NotImplementedError(
                f"Only ddof=0 (Population) and ddof=1 (Sample) are supported in var function, but got: {ddof}"
            )



[docs]
    def window_extend_legend_ext(
            self,
            value_func: "ValueFunc",
            agg_func: "PyLegendOptional[AggFunc]" = None,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Apply a custom window function to this single column.

        **PyLegend extension** — not present in pandas.

        Compute a user-defined window expression for the selected column.
        The ``value_func`` receives three arguments —
        a :class:`PandasApiPartialFrame` (``p``), a
        :class:`PandasApiWindowReference` (``w``), and a
        :class:`PandasApiTdsRow` (``r``) — and must return a single
        primitive.  The result is a ``Series`` (or ``GroupbySeries``)
        that can be assigned back to the parent frame.

        Parameters
        ----------
        value_func : callable
            ``(p, w, r) -> primitive``.

            Common patterns:

            - ``lambda p, w, r: p.first(w, r)["col"]``  — first value.
            - ``lambda p, w, r: p.last(w, r)["col"]``   — last value.
            - ``lambda p, w, r: p.nth(w, r, 3)["col"]`` — nth value.
            - ``lambda p, w, r: p.lag(r, 1)["col"]``    — lag.
            - ``lambda p, w, r: p.lead(r, 2)["col"]``   — lead.
            - ``lambda p, w, r: r["col"]``               — raw column
              ref (combined with ``agg_func``).
        agg_func : callable or None, default None
            ``(collection) -> primitive``.  If provided, an additional
            aggregation step (e.g. ``lambda c: c.sum()``) is applied
            on top of the ``value_func`` result.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the window function result.

        See Also
        --------
        PandasApiWindowTdsFrame.window_extend_legend_ext :
            Same operation applied to all columns.
        first : Convenience wrapper using ``p.first(w, r)["col"]``.
        last : Convenience wrapper using ``p.last(w, r)["col"]``.
        shift : Convenience wrapper for lag/lead.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # nth-value of a single column
            frame["Nth Order"] = frame.window_frame_legend_ext(
                frame_spec=frame.rows_between(),
                order_by="Order Id",
            )["Order Id"].window_extend_legend_ext(
                value_func=lambda p, w, r: p.nth(w, r, 3)["Order Id"],
            )
            frame.head(5).to_pandas()

        """
        from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
            PandasApiAppliedFunctionTdsFrame,
        )
        from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
        from pylegend.core.tds.pandas_api.frames.functions.single_column_window_function import (
            SingleColumnWindowFunction,
        )

        column = self._column_name
        base = self._window_frame._base_frame
        base_frame_unwrapped = self._window_frame.base_frame()

        applied_function_frame = PandasApiAppliedFunctionTdsFrame(
            SingleColumnWindowFunction(
                base_window_frame=self._window_frame,
                value_func=value_func,
                agg_func=agg_func,
            )
        )

        result_columns = applied_function_frame.columns()
        assert len(result_columns) == 1, (
            "WindowSeries.window_extend_legend_ext() should produce exactly one result column"
        )
        col_type = result_columns[0].get_type()

        if isinstance(base, PandasApiGroupbyTdsFrame):
            gb_series_cls = get_groupby_series_from_col_type(col_type)
            new_gb_frame_or_series = base[column]
            if isinstance(new_gb_frame_or_series, PandasApiGroupbyTdsFrame):  # pragma: no cover
                new_gb_frame = new_gb_frame_or_series
            else:
                new_gb_frame = new_gb_frame_or_series._base_groupby_frame
            return gb_series_cls(new_gb_frame, applied_function_frame)
        else:
            series_cls = get_series_from_col_type(col_type)
            new_series = series_cls(base_frame_unwrapped, column)
            new_series._filtered_frame = applied_function_frame
            return new_series



[docs]
    def first(self) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Return the first value in the window for this column.

        Generates ``first_value(col) OVER (...)`` in SQL.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the first value within
            the window for every row.

        See Also
        --------
        last : Last value in the window.
        PandasApiWindowTdsFrame.first : All-column version.
        shift : Lag/lead by N rows.

        Notes
        -----
        **Differences from pandas:**

        - ``first()`` is a **pylegend extension**.  There is no
          ``Expanding['col'].first()`` or ``Rolling['col'].first()``
          in pandas.
        - Internally delegates to ``window_extend_legend_ext`` with
          ``value_func = lambda p, w, r: p.first(w, r)["col"]``.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            frame["First Order"] = frame.window_frame_legend_ext(
                frame_spec=frame.rows_between(),
                order_by="Order Id",
            )["Order Id"].first()
            frame.head(5).to_pandas()

        """
        from pylegend.core.language.pandas_api.pandas_api_custom_expressions import (
            PandasApiPartialFrame,
            PandasApiWindowReference,
        )
        from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow

        column = self._column_name

        def value_func(
                p: PandasApiPartialFrame,
                w: PandasApiWindowReference,
                r: PandasApiTdsRow,
                _col: str = column,
        ) -> "PyLegendPrimitiveOrPythonPrimitive":
            return p.first(w, r)[_col]

        return self.window_extend_legend_ext(value_func=value_func)



[docs]
    def last(self) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Return the last value in the window for this column.

        Generates ``last_value(col) OVER (...)`` in SQL.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the last value within
            the window for every row.

        See Also
        --------
        first : First value in the window.
        PandasApiWindowTdsFrame.last : All-column version.
        shift : Lag/lead by N rows.

        Notes
        -----
        **Differences from pandas:**

        - ``last()`` is a **pylegend extension**.  There is no
          ``Expanding['col'].last()`` or ``Rolling['col'].last()``
          in pandas.

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            frame["Last Order"] = frame.window_frame_legend_ext(
                frame_spec=frame.rows_between(),
                order_by="Order Id",
            )["Order Id"].last()
            frame.head(5).to_pandas()

        """
        from pylegend.core.language.pandas_api.pandas_api_custom_expressions import (
            PandasApiPartialFrame,
            PandasApiWindowReference,
        )
        from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow

        column = self._column_name

        def value_func(
                p: PandasApiPartialFrame,
                w: PandasApiWindowReference,
                r: PandasApiTdsRow,
                _col: str = column,
        ) -> "PyLegendPrimitiveOrPythonPrimitive":
            return p.last(w, r)[_col]

        return self.window_extend_legend_ext(value_func=value_func)



[docs]
    def shift(
            self,
            periods: int = 1,
            freq: PyLegendOptional[str] = None,
            axis: int = 0,
            fill_value: PyLegendOptional[object] = None,
            suffix: PyLegendOptional[str] = None,
    ) -> PyLegendUnion["Series", "GroupbySeries"]:
        """
        Shift (lag or lead) this column by N rows within the window.

        Generates ``lag(col, N)`` for positive ``periods`` and
        ``lead(col, N)`` for non-positive ``periods`` in SQL.

        Because lag/lead SQL functions do not accept a frame clause,
        ``shift()`` automatically strips the ``frame_spec`` when it is
        the default ``RowsBetween(None, None)`` or ``None``.  If a
        non-default frame spec (e.g. ``rows_between(-2, 2)``) is set,
        a ``ValueError`` is raised.

        Parameters
        ----------
        periods : int, default 1
            Number of rows to shift.

            - ``periods = 1`` - ``lag`` (look backward).
            - ``periods = -1`` - ``lead`` (look forward), with
              offset ``abs(periods)``.
            - ``periods = 0`` → ``lead(col, 0)`` (current row).
        freq : str or None, default None
            **Not supported.**  Raises ``NotImplementedError``.
        axis : {{0, 'index'}}, default 0
            Only ``0`` / ``'index'`` is supported.
        fill_value : object or None, default None
            **Not supported.**  Raises ``NotImplementedError``.
        suffix : str or None, default None
            **Not supported.**  Raises ``NotImplementedError``.

        Returns
        -------
        Series or GroupbySeries
            A single-column proxy containing the shifted values.

        Raises
        ------
        NotImplementedError
            If ``freq``, ``fill_value``, ``suffix`` is not ``None``,
            ``axis`` is not ``0``, or ``periods`` is not an ``int``.
        ValueError
            If the window has a non-default ``frame_spec`` (only
            ``RowsBetween(None, None)`` or ``None`` are permitted).

        See Also
        --------
        first : First value in the window.
        last : Last value in the window.

        Notes
        -----
        **Differences from pandas:**

        - In pandas, ``Series.shift()`` accepts ``freq``,
          ``fill_value``, and ``suffix``, none of which are supported
          here.
        - ``shift()`` does **not** mutate the original window frame.
          Internally it creates a shallow copy with
          ``frame_spec=None`` so that the generated SQL omits the
          ``ROWS BETWEEN`` / ``RANGE BETWEEN`` clause.

        **Edge cases:**

        - ``shift(periods=0)`` generates ``lead(col, 0)``, which
          returns the current row's value (identity operation).

        Examples
        --------
        .. ipython:: python

            import pylegend
            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Previous row's Order Id (lag by 1)
            frame["Prev Order"] = frame.window_frame_legend_ext(
                order_by="Order Id",
            )["Order Id"].shift(periods=1)
            frame.head(5).to_pandas()

            frame = pylegend.samples.pandas_api.northwind_orders_frame()

            # Next row's Order Id (lead by 1)
            frame["Next Order"] = frame.window_frame_legend_ext(
                order_by="Order Id",
            )["Order Id"].shift(periods=-1)
            frame.head(5).to_pandas()

        """
        if freq is not None:
            raise NotImplementedError(
                f"The 'freq' argument of the shift function is not supported, but got: freq={freq!r}"
            )
        if axis not in [0, "index"]:
            raise NotImplementedError(
                f"The 'axis' argument of the shift function must be 0 or 'index', but got: axis={axis!r}"
            )
        if fill_value is not None:
            raise NotImplementedError(
                f"The 'fill_value' argument of the shift function is not supported, but got: fill_value={fill_value!r}"
            )
        if suffix is not None:
            raise NotImplementedError(
                f"The 'suffix' argument of the shift function is not supported for WindowSeries, but got: suffix={suffix!r}"
            )
        if not isinstance(periods, int) or abs(periods) > 1:
            raise NotImplementedError(
                "The 'periods' argument of the shift function must be an int (1 or -1) for WindowSeries."
            )

        import copy
        from pylegend.core.language.pandas_api.pandas_api_frame_spec import RowsBetween
        from pylegend.core.language.pandas_api.pandas_api_custom_expressions import (
            PandasApiPartialFrame,
            PandasApiWindowReference,
        )
        from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow

        # lag/lead window functions do not support a frame clause.
        # Ensure frame_spec is either None or RowsBetween(None, None) (the default),
        # then use a copy of the window frame with frame_spec=None
        frame_spec = self._window_frame._frame_spec
        if frame_spec is None:
            shift_window_series = self
        elif (isinstance(frame_spec, RowsBetween)
              and frame_spec._start is None
              and frame_spec._end is None):
            # Default RowsBetween(None, None) or manually put - make a shallow copy with frame_spec=None
            copied_window_frame = copy.copy(self._window_frame)
            copied_window_frame._frame_spec = None
            shift_window_series = WindowSeries(
                window_frame=copied_window_frame,
                column_name=self._column_name,
            )
        else:
            raise ValueError(
                "The shift function (lag/lead) does not support a window frame clause. "
                "frame_spec must be None or RowsBetween(None, None), "
                f"but got: {frame_spec!r}"
            )

        column = self._column_name

        if periods > 0:
            def value_func(
                    p: PandasApiPartialFrame,
                    w: PandasApiWindowReference,
                    r: PandasApiTdsRow,
                    _col: str = column,
                    _periods: int = periods,
            ) -> "PyLegendPrimitiveOrPythonPrimitive":
                return p.lag(r, _periods)[_col]
        else:
            def value_func(
                    p: PandasApiPartialFrame,
                    w: PandasApiWindowReference,
                    r: PandasApiTdsRow,
                    _col: str = column,
                    _periods: int = -periods,
            ) -> "PyLegendPrimitiveOrPythonPrimitive":
                return p.lead(r, _periods)[_col]

        return shift_window_series.window_extend_legend_ext(value_func=value_func)
PyLegend

Table of Contents

Quick Search

Source code for pylegend.core.language.pandas_api.pandas_api_window_series