# Copyright 2026 Goldman Sachs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
A single-column proxy on a window frame.
A ``WindowSeries`` is obtained by bracket-indexing a
:class:`~pylegend.core.tds.pandas_api.frames.pandas_api_window_tds_frame.PandasApiWindowTdsFrame`
with a column name. It can also be obtained by calling
``expanding()``, ``rolling()``, or ``window_frame_legend_ext()``
directly on a
:class:`~pylegend.core.language.pandas_api.pandas_api_series.Series`
or
:class:`~pylegend.core.language.pandas_api.pandas_api_groupby_series.GroupbySeries`.
Calling an aggregation method (``sum()``, ``mean()``, etc.) on a
``WindowSeries`` returns a
:class:`~pylegend.core.language.pandas_api.pandas_api_series.Series`
(or a
:class:`~pylegend.core.language.pandas_api.pandas_api_groupby_series.GroupbySeries`
when the underlying window was created from a groupby). Positional
window functions (``first()``, ``last()``, ``shift()``) and the
general-purpose ``window_extend_legend_ext()`` are also available.
The result can then be assigned back to the parent frame.
**Obtaining a WindowSeries**
.. code-block:: python
# Via bracket notation on a window frame
ws = frame.expanding(order_by="col")["col"]
# Via Series shortcut
ws = frame["col"].expanding(order_by="col")
# Grouped variant (returns GroupbySeries after aggregation)
ws = frame.groupby("grp")["val"].expanding(order_by="val")
**Result type preservation**
The type of the returned ``Series`` (or ``GroupbySeries``) matches
the column type. For example, an integer column produces an
``IntegerSeries`` after ``.sum()``, while ``count()`` always
returns an ``IntegerSeries`` regardless of the source column type.
**Composing with arithmetic**
The ``Series`` returned by a ``WindowSeries`` aggregation supports
arithmetic, so expressions like the following work:
.. code-block:: python
frame["shifted"] = frame["col"].expanding().sum() - 100
frame["ratio"] = frame["a"].expanding().sum() / frame["b"]
Multiple window assignments can be applied sequentially to the
same frame:
.. code-block:: python
frame["cumsum"] = frame["col"].expanding().sum()
frame["roll_mean"] = frame["col2"].rolling(5, order_by="col2").mean()
See Also
--------
PandasApiWindowTdsFrame : The window frame that produces this.
Series : Non-grouped single-column proxy.
GroupbySeries : Grouped single-column proxy.
PandasApiTdsFrame.expanding : Create an expanding window on a frame.
PandasApiTdsFrame.rolling : Create a rolling window on a frame.
Notes
-----
**Differences from pandas:**
- A ``WindowSeries`` is **not** a data container. It is an
expression builder that lazily constructs the SQL / Pure query.
No data is materialised until the result is executed.
- In pandas, ``Expanding['col']`` and ``Rolling['col']`` have
built-in convenience methods that return a ``Series``. Here,
the same convenience methods are available (``sum()``,
``mean()``, ``min()``, ``max()``, ``count()``, ``std()``,
``var()``), plus positional window methods (``first()``,
``last()``, ``shift()``), and a general ``aggregate()`` /
``agg()`` method. ``window_extend_legend_ext()`` is available
for fully custom window expressions.
- Extra ``*args`` / ``**kwargs`` on ``aggregate()`` are **not
supported**.
- The ``numeric_only`` parameter on convenience methods is **not
supported** and must be ``False``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Assign an expanding sum via WindowSeries
frame["Cumulative Sum"] = frame.expanding(
order_by="Order Id"
)["Order Id"].sum()
frame.head(5).to_pandas()
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Grouped expanding sum assigned back
frame["Group Cumsum"] = frame.groupby(
"Ship Name"
)["Order Id"].expanding(order_by="Order Id").sum()
frame.head(5).to_pandas()
"""
from pylegend._typing import (
PyLegendOptional,
PyLegendUnion,
TYPE_CHECKING,
)
from pylegend.core.language.pandas_api.pandas_api_aggregate_specification import PyLegendAggInput
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
from pylegend.core.tds.pandas_api.frames.functions.single_column_window_function import ValueFunc, AggFunc
from pylegend.core.tds.pandas_api.frames.helpers.series_helper import get_series_from_col_type, \
get_groupby_series_from_col_type
from pylegend.core.tds.pandas_api.frames.pandas_api_window_tds_frame import PandasApiWindowTdsFrame
if TYPE_CHECKING:
from pylegend.core.language.pandas_api.pandas_api_series import Series
from pylegend.core.language.pandas_api.pandas_api_groupby_series import GroupbySeries
class WindowSeries:
_window_frame: PandasApiWindowTdsFrame
_column_name: str
def __init__(
self,
window_frame: PandasApiWindowTdsFrame,
column_name: str,
) -> None:
self._window_frame = window_frame
self._column_name = column_name
@property
def window_frame(self) -> PandasApiWindowTdsFrame:
return self._window_frame
@property
def column_name(self) -> str:
return self._column_name
[docs]
def aggregate(
self,
func: PyLegendAggInput,
axis: PyLegendUnion[int, str] = 0,
*args: PyLegendPrimitiveOrPythonPrimitive,
**kwargs: PyLegendPrimitiveOrPythonPrimitive,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Apply a window aggregate to this single column.
Compute the window aggregate specified by ``func`` over the
window defined on this ``WindowSeries``. The result is a
:class:`~pylegend.core.language.pandas_api.pandas_api_series.Series`
(or
:class:`~pylegend.core.language.pandas_api.pandas_api_groupby_series.GroupbySeries`
when the underlying window was created from a groupby) that
can be assigned back to a frame column.
Parameters
----------
func : str, callable, list, or dict
Aggregation specification:
- ``str`` — a named aggregation (``'sum'``, ``'mean'``,
``'min'``, ``'max'``, ``'count'``, ``'std'``, ``'var'``).
- ``callable`` — a function receiving a column proxy and
returning an aggregated value.
- ``list`` — a list of the above.
- ``dict`` — ``{column_name: agg_spec}``.
axis : {{0, 'index'}}, default 0
Only ``0`` / ``'index'`` is supported.
*args
Not supported.
**kwargs
Not supported.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed aggregate
values.
See Also
--------
agg : Alias for ``aggregate``.
sum : Windowed sum convenience method.
mean : Windowed mean convenience method.
PandasApiWindowTdsFrame.aggregate : Window aggregate on
all columns.
Notes
-----
**Differences from pandas:**
- In pandas, ``Expanding['col'].aggregate()`` and
``Rolling['col'].aggregate()`` accept ``*args`` and
``**kwargs`` forwarded to the aggregation function. Here,
extra positional and keyword arguments are **not supported**.
- The result is always a single-column proxy (``Series`` or
``GroupbySeries``), never a DataFrame.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Expanding sum on a single column
frame["Expanding Sum"] = frame.expanding(
order_by="Order Id"
)["Order Id"].aggregate("sum")
frame.head(5).to_pandas()
"""
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
PandasApiAppliedFunctionTdsFrame,
)
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
from pylegend.core.tds.pandas_api.frames.functions.window_aggregate_function import (
WindowAggregateFunction,
)
base = self._window_frame._base_frame
base_frame_unwrapped = self._window_frame.base_frame()
column = self._column_name
# Wrap scalar func to target only the selected column
single_col_func: PyLegendAggInput = {column: func} if isinstance(func, (str, list)) or callable(func) else func
applied_function_frame = PandasApiAppliedFunctionTdsFrame(
WindowAggregateFunction(self._window_frame, single_col_func, axis, *args, **kwargs)
)
result_columns = applied_function_frame.columns()
assert len(result_columns) == 1, (
"WindowSeries.aggregate() should produce exactly one result column"
)
col_type = result_columns[0].get_type()
if isinstance(base, PandasApiGroupbyTdsFrame):
gb_series_cls = get_groupby_series_from_col_type(col_type)
# Use __getitem__ to get a groupby frame with the column selected
new_gb_frame_or_series = base[column]
if isinstance(new_gb_frame_or_series, PandasApiGroupbyTdsFrame):
new_gb_frame = new_gb_frame_or_series # pragma: no cover
else:
# __getitem__ with a string returns a GroupbySeries; extract its frame
new_gb_frame = new_gb_frame_or_series._base_groupby_frame
return gb_series_cls(new_gb_frame, applied_function_frame)
else:
series_cls = get_series_from_col_type(col_type)
new_series = series_cls(base_frame_unwrapped, column)
new_series._filtered_frame = applied_function_frame
return new_series
[docs]
def agg(
self,
func: PyLegendAggInput,
axis: PyLegendUnion[int, str] = 0,
*args: PyLegendPrimitiveOrPythonPrimitive,
**kwargs: PyLegendPrimitiveOrPythonPrimitive,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Apply a window aggregate to this single column.
Alias for :meth:`aggregate`. See ``aggregate`` for full
documentation.
See Also
--------
aggregate : Equivalent method (canonical name).
"""
return self.aggregate(func, axis, *args, **kwargs) # pragma: no cover
[docs]
def sum(
self,
numeric_only: bool = False,
min_count: int = 0,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Compute the windowed sum of this column.
Convenience method equivalent to ``aggregate('sum')`` on this
window series.
Parameters
----------
numeric_only : bool, default False
Must be ``False``. ``True`` is not supported.
min_count : int, default 0
Must be ``0``. Non-zero values are not supported.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed sum values.
Raises
------
NotImplementedError
If any parameter is set to an unsupported value.
See Also
--------
aggregate : General windowed aggregation.
mean : Windowed mean.
PandasApiTdsFrame.sum : Frame-level sum (no window).
Notes
-----
**Differences from pandas:**
- ``numeric_only`` and ``min_count`` are **not supported**
and must remain at their default values.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Expanding sum on a single column
frame["Expanding Sum"] = frame.expanding(
order_by="Order Id"
)["Order Id"].sum()
frame.head(5).to_pandas()
"""
if numeric_only is not False:
raise NotImplementedError("numeric_only=True is not currently supported in sum function.")
if min_count != 0:
raise NotImplementedError(f"min_count must be 0 in sum function, but got: {min_count}")
return self.aggregate("sum", 0)
[docs]
def mean(
self,
numeric_only: bool = False,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Compute the windowed mean of this column.
Convenience method equivalent to ``aggregate('mean')`` on this
window series.
Parameters
----------
numeric_only : bool, default False
Must be ``False``. ``True`` is not supported.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed mean values.
Raises
------
NotImplementedError
If ``numeric_only`` is ``True``.
See Also
--------
aggregate : General windowed aggregation.
sum : Windowed sum.
PandasApiTdsFrame.mean : Frame-level mean (no window).
Notes
-----
**Differences from pandas:**
- ``numeric_only`` is **not supported** and must be ``False``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Rolling mean with a window of 3
frame["Rolling Mean"] = frame.rolling(
3, order_by="Order Id"
)["Order Id"].mean()
frame.head(5).to_pandas()
"""
if numeric_only is not False:
raise NotImplementedError("numeric_only=True is not currently supported in mean function.")
return self.aggregate("mean", 0)
[docs]
def min(
self,
numeric_only: bool = False,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Compute the windowed minimum of this column.
Convenience method equivalent to ``aggregate('min')`` on this
window series.
Parameters
----------
numeric_only : bool, default False
Must be ``False``. ``True`` is not supported.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed minimum
values.
Raises
------
NotImplementedError
If ``numeric_only`` is ``True``.
See Also
--------
aggregate : General windowed aggregation.
max : Windowed maximum.
PandasApiTdsFrame.min : Frame-level min (no window).
Notes
-----
**Differences from pandas:**
- ``numeric_only`` is **not supported** and must be ``False``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Expanding min on a single column
frame["Expanding Min"] = frame.expanding(
order_by="Order Id"
)["Order Id"].min()
frame.head(5).to_pandas()
"""
if numeric_only is not False:
raise NotImplementedError("numeric_only=True is not currently supported in min function.")
return self.aggregate("min", 0)
[docs]
def max(
self,
numeric_only: bool = False,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Compute the windowed maximum of this column.
Convenience method equivalent to ``aggregate('max')`` on this
window series.
Parameters
----------
numeric_only : bool, default False
Must be ``False``. ``True`` is not supported.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed maximum
values.
Raises
------
NotImplementedError
If ``numeric_only`` is ``True``.
See Also
--------
aggregate : General windowed aggregation.
min : Windowed minimum.
PandasApiTdsFrame.max : Frame-level max (no window).
Notes
-----
**Differences from pandas:**
- ``numeric_only`` is **not supported** and must be ``False``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Expanding max on a single column
frame["Expanding Max"] = frame.expanding(
order_by="Order Id"
)["Order Id"].max()
frame.head(5).to_pandas()
"""
if numeric_only is not False:
raise NotImplementedError("numeric_only=True is not currently supported in max function.")
return self.aggregate("max", 0)
[docs]
def count(self) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Compute the windowed count of non-null values for this column.
Convenience method equivalent to ``aggregate('count')`` on this
window series.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed count values.
The return type is always ``IntegerSeries`` (or its
``GroupbySeries`` equivalent), regardless of the source
column's type.
See Also
--------
aggregate : General windowed aggregation.
sum : Windowed sum.
PandasApiTdsFrame.count : Frame-level count (no window).
Notes
-----
**Differences from pandas:**
- The signature takes no parameters. The pandas
``Expanding.count()`` / ``Rolling.count()`` accept
``numeric_only`` which is not supported here.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Expanding count on a single column
frame["Expanding Count"] = frame.expanding(
order_by="Order Id"
)["Order Id"].count()
frame.head(5).to_pandas()
"""
return self.aggregate("count", 0)
[docs]
def std(
self,
ddof: int = 1,
numeric_only: bool = False,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Compute the windowed standard deviation of this column.
Convenience method equivalent to ``aggregate('std')`` on this
window series.
Parameters
----------
ddof : int, default 1
Degrees of freedom. ``1`` for sample standard deviation
(``STDDEV_SAMP``), ``0`` for population standard deviation
(``STDDEV_POP``).
numeric_only : bool, default False
Must be ``False``. ``True`` is not supported.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed standard
deviation values.
Raises
------
NotImplementedError
If ``ddof`` is not ``0`` or ``1``, or if ``numeric_only``
is ``True``.
See Also
--------
aggregate : General windowed aggregation.
var : Windowed variance.
PandasApiTdsFrame.std : Frame-level std (no window).
Notes
-----
**Differences from pandas:**
- Only ``ddof=0`` (population) and ``ddof=1`` (sample) are
supported. Other values raise ``NotImplementedError``.
- ``numeric_only`` is **not supported** and must be ``False``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Rolling standard deviation with a window of 3
frame["Rolling Std"] = frame.rolling(
3, order_by="Order Id"
)["Order Id"].std()
frame.head(5).to_pandas()
"""
if numeric_only is not False:
raise NotImplementedError("numeric_only=True is not currently supported in std function.")
if ddof == 1:
return self.aggregate("std_dev_sample", 0)
elif ddof == 0:
return self.aggregate("std_dev_population", 0)
else:
raise NotImplementedError(
f"Only ddof=0 (Population) and ddof=1 (Sample) are supported in std function, but got: {ddof}"
)
[docs]
def var(
self,
ddof: int = 1,
numeric_only: bool = False,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Compute the windowed variance of this column.
Convenience method equivalent to ``aggregate('var')`` on this
window series.
Parameters
----------
ddof : int, default 1
Degrees of freedom. ``1`` for sample variance
(``VAR_SAMP``), ``0`` for population variance
(``VAR_POP``).
numeric_only : bool, default False
Must be ``False``. ``True`` is not supported.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the windowed variance
values.
Raises
------
NotImplementedError
If ``ddof`` is not ``0`` or ``1``, or if ``numeric_only``
is ``True``.
See Also
--------
aggregate : General windowed aggregation.
std : Windowed standard deviation.
PandasApiTdsFrame.var : Frame-level var (no window).
Notes
-----
**Differences from pandas:**
- Only ``ddof=0`` (population) and ``ddof=1`` (sample) are
supported. Other values raise ``NotImplementedError``.
- ``numeric_only`` is **not supported** and must be ``False``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Rolling variance with a window of 3
frame["Rolling Var"] = frame.rolling(
3, order_by="Order Id"
)["Order Id"].var()
frame.head(5).to_pandas()
"""
if numeric_only is not False:
raise NotImplementedError("numeric_only=True is not currently supported in var function.")
if ddof == 1:
return self.aggregate("variance_sample", 0)
elif ddof == 0:
return self.aggregate("variance_population", 0)
else:
raise NotImplementedError(
f"Only ddof=0 (Population) and ddof=1 (Sample) are supported in var function, but got: {ddof}"
)
[docs]
def window_extend_legend_ext(
self,
value_func: "ValueFunc",
agg_func: "PyLegendOptional[AggFunc]" = None,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Apply a custom window function to this single column.
**PyLegend extension** — not present in pandas.
Compute a user-defined window expression for the selected column.
The ``value_func`` receives three arguments —
a :class:`PandasApiPartialFrame` (``p``), a
:class:`PandasApiWindowReference` (``w``), and a
:class:`PandasApiTdsRow` (``r``) — and must return a single
primitive. The result is a ``Series`` (or ``GroupbySeries``)
that can be assigned back to the parent frame.
Parameters
----------
value_func : callable
``(p, w, r) -> primitive``.
Common patterns:
- ``lambda p, w, r: p.first(w, r)["col"]`` — first value.
- ``lambda p, w, r: p.last(w, r)["col"]`` — last value.
- ``lambda p, w, r: p.nth(w, r, 3)["col"]`` — nth value.
- ``lambda p, w, r: p.lag(r, 1)["col"]`` — lag.
- ``lambda p, w, r: p.lead(r, 2)["col"]`` — lead.
- ``lambda p, w, r: r["col"]`` — raw column
ref (combined with ``agg_func``).
agg_func : callable or None, default None
``(collection) -> primitive``. If provided, an additional
aggregation step (e.g. ``lambda c: c.sum()``) is applied
on top of the ``value_func`` result.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the window function result.
See Also
--------
PandasApiWindowTdsFrame.window_extend_legend_ext :
Same operation applied to all columns.
first : Convenience wrapper using ``p.first(w, r)["col"]``.
last : Convenience wrapper using ``p.last(w, r)["col"]``.
shift : Convenience wrapper for lag/lead.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# nth-value of a single column
frame["Nth Order"] = frame.window_frame_legend_ext(
frame_spec=frame.rows_between(),
order_by="Order Id",
)["Order Id"].window_extend_legend_ext(
value_func=lambda p, w, r: p.nth(w, r, 3)["Order Id"],
)
frame.head(5).to_pandas()
"""
from pylegend.core.tds.pandas_api.frames.pandas_api_applied_function_tds_frame import (
PandasApiAppliedFunctionTdsFrame,
)
from pylegend.core.tds.pandas_api.frames.pandas_api_groupby_tds_frame import PandasApiGroupbyTdsFrame
from pylegend.core.tds.pandas_api.frames.functions.single_column_window_function import (
SingleColumnWindowFunction,
)
column = self._column_name
base = self._window_frame._base_frame
base_frame_unwrapped = self._window_frame.base_frame()
applied_function_frame = PandasApiAppliedFunctionTdsFrame(
SingleColumnWindowFunction(
base_window_frame=self._window_frame,
value_func=value_func,
agg_func=agg_func,
)
)
result_columns = applied_function_frame.columns()
assert len(result_columns) == 1, (
"WindowSeries.window_extend_legend_ext() should produce exactly one result column"
)
col_type = result_columns[0].get_type()
if isinstance(base, PandasApiGroupbyTdsFrame):
gb_series_cls = get_groupby_series_from_col_type(col_type)
new_gb_frame_or_series = base[column]
if isinstance(new_gb_frame_or_series, PandasApiGroupbyTdsFrame): # pragma: no cover
new_gb_frame = new_gb_frame_or_series
else:
new_gb_frame = new_gb_frame_or_series._base_groupby_frame
return gb_series_cls(new_gb_frame, applied_function_frame)
else:
series_cls = get_series_from_col_type(col_type)
new_series = series_cls(base_frame_unwrapped, column)
new_series._filtered_frame = applied_function_frame
return new_series
[docs]
def first(self) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Return the first value in the window for this column.
Generates ``first_value(col) OVER (...)`` in SQL.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the first value within
the window for every row.
See Also
--------
last : Last value in the window.
PandasApiWindowTdsFrame.first : All-column version.
shift : Lag/lead by N rows.
Notes
-----
**Differences from pandas:**
- ``first()`` is a **pylegend extension**. There is no
``Expanding['col'].first()`` or ``Rolling['col'].first()``
in pandas.
- Internally delegates to ``window_extend_legend_ext`` with
``value_func = lambda p, w, r: p.first(w, r)["col"]``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["First Order"] = frame.window_frame_legend_ext(
frame_spec=frame.rows_between(),
order_by="Order Id",
)["Order Id"].first()
frame.head(5).to_pandas()
"""
from pylegend.core.language.pandas_api.pandas_api_custom_expressions import (
PandasApiPartialFrame,
PandasApiWindowReference,
)
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
column = self._column_name
def value_func(
p: PandasApiPartialFrame,
w: PandasApiWindowReference,
r: PandasApiTdsRow,
_col: str = column,
) -> "PyLegendPrimitiveOrPythonPrimitive":
return p.first(w, r)[_col]
return self.window_extend_legend_ext(value_func=value_func)
[docs]
def last(self) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Return the last value in the window for this column.
Generates ``last_value(col) OVER (...)`` in SQL.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the last value within
the window for every row.
See Also
--------
first : First value in the window.
PandasApiWindowTdsFrame.last : All-column version.
shift : Lag/lead by N rows.
Notes
-----
**Differences from pandas:**
- ``last()`` is a **pylegend extension**. There is no
``Expanding['col'].last()`` or ``Rolling['col'].last()``
in pandas.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["Last Order"] = frame.window_frame_legend_ext(
frame_spec=frame.rows_between(),
order_by="Order Id",
)["Order Id"].last()
frame.head(5).to_pandas()
"""
from pylegend.core.language.pandas_api.pandas_api_custom_expressions import (
PandasApiPartialFrame,
PandasApiWindowReference,
)
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
column = self._column_name
def value_func(
p: PandasApiPartialFrame,
w: PandasApiWindowReference,
r: PandasApiTdsRow,
_col: str = column,
) -> "PyLegendPrimitiveOrPythonPrimitive":
return p.last(w, r)[_col]
return self.window_extend_legend_ext(value_func=value_func)
[docs]
def shift(
self,
periods: int = 1,
freq: PyLegendOptional[str] = None,
axis: int = 0,
fill_value: PyLegendOptional[object] = None,
suffix: PyLegendOptional[str] = None,
) -> PyLegendUnion["Series", "GroupbySeries"]:
"""
Shift (lag or lead) this column by N rows within the window.
Generates ``lag(col, N)`` for positive ``periods`` and
``lead(col, N)`` for non-positive ``periods`` in SQL.
Because lag/lead SQL functions do not accept a frame clause,
``shift()`` automatically strips the ``frame_spec`` when it is
the default ``RowsBetween(None, None)`` or ``None``. If a
non-default frame spec (e.g. ``rows_between(-2, 2)``) is set,
a ``ValueError`` is raised.
Parameters
----------
periods : int, default 1
Number of rows to shift.
- ``periods = 1`` - ``lag`` (look backward).
- ``periods = -1`` - ``lead`` (look forward), with
offset ``abs(periods)``.
- ``periods = 0`` → ``lead(col, 0)`` (current row).
freq : str or None, default None
**Not supported.** Raises ``NotImplementedError``.
axis : {{0, 'index'}}, default 0
Only ``0`` / ``'index'`` is supported.
fill_value : object or None, default None
**Not supported.** Raises ``NotImplementedError``.
suffix : str or None, default None
**Not supported.** Raises ``NotImplementedError``.
Returns
-------
Series or GroupbySeries
A single-column proxy containing the shifted values.
Raises
------
NotImplementedError
If ``freq``, ``fill_value``, ``suffix`` is not ``None``,
``axis`` is not ``0``, or ``periods`` is not an ``int``.
ValueError
If the window has a non-default ``frame_spec`` (only
``RowsBetween(None, None)`` or ``None`` are permitted).
See Also
--------
first : First value in the window.
last : Last value in the window.
Notes
-----
**Differences from pandas:**
- In pandas, ``Series.shift()`` accepts ``freq``,
``fill_value``, and ``suffix``, none of which are supported
here.
- ``shift()`` does **not** mutate the original window frame.
Internally it creates a shallow copy with
``frame_spec=None`` so that the generated SQL omits the
``ROWS BETWEEN`` / ``RANGE BETWEEN`` clause.
**Edge cases:**
- ``shift(periods=0)`` generates ``lead(col, 0)``, which
returns the current row's value (identity operation).
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Previous row's Order Id (lag by 1)
frame["Prev Order"] = frame.window_frame_legend_ext(
order_by="Order Id",
)["Order Id"].shift(periods=1)
frame.head(5).to_pandas()
frame = pylegend.samples.pandas_api.northwind_orders_frame()
# Next row's Order Id (lead by 1)
frame["Next Order"] = frame.window_frame_legend_ext(
order_by="Order Id",
)["Order Id"].shift(periods=-1)
frame.head(5).to_pandas()
"""
if freq is not None:
raise NotImplementedError(
f"The 'freq' argument of the shift function is not supported, but got: freq={freq!r}"
)
if axis not in [0, "index"]:
raise NotImplementedError(
f"The 'axis' argument of the shift function must be 0 or 'index', but got: axis={axis!r}"
)
if fill_value is not None:
raise NotImplementedError(
f"The 'fill_value' argument of the shift function is not supported, but got: fill_value={fill_value!r}"
)
if suffix is not None:
raise NotImplementedError(
f"The 'suffix' argument of the shift function is not supported for WindowSeries, but got: suffix={suffix!r}"
)
if not isinstance(periods, int) or abs(periods) > 1:
raise NotImplementedError(
"The 'periods' argument of the shift function must be an int (1 or -1) for WindowSeries."
)
import copy
from pylegend.core.language.pandas_api.pandas_api_frame_spec import RowsBetween
from pylegend.core.language.pandas_api.pandas_api_custom_expressions import (
PandasApiPartialFrame,
PandasApiWindowReference,
)
from pylegend.core.language.pandas_api.pandas_api_tds_row import PandasApiTdsRow
# lag/lead window functions do not support a frame clause.
# Ensure frame_spec is either None or RowsBetween(None, None) (the default),
# then use a copy of the window frame with frame_spec=None
frame_spec = self._window_frame._frame_spec
if frame_spec is None:
shift_window_series = self
elif (isinstance(frame_spec, RowsBetween)
and frame_spec._start is None
and frame_spec._end is None):
# Default RowsBetween(None, None) or manually put - make a shallow copy with frame_spec=None
copied_window_frame = copy.copy(self._window_frame)
copied_window_frame._frame_spec = None
shift_window_series = WindowSeries(
window_frame=copied_window_frame,
column_name=self._column_name,
)
else:
raise ValueError(
"The shift function (lag/lead) does not support a window frame clause. "
"frame_spec must be None or RowsBetween(None, None), "
f"but got: {frame_spec!r}"
)
column = self._column_name
if periods > 0:
def value_func(
p: PandasApiPartialFrame,
w: PandasApiWindowReference,
r: PandasApiTdsRow,
_col: str = column,
_periods: int = periods,
) -> "PyLegendPrimitiveOrPythonPrimitive":
return p.lag(r, _periods)[_col]
else:
def value_func(
p: PandasApiPartialFrame,
w: PandasApiWindowReference,
r: PandasApiTdsRow,
_col: str = column,
_periods: int = -periods,
) -> "PyLegendPrimitiveOrPythonPrimitive":
return p.lead(r, _periods)[_col]
return shift_window_series.window_extend_legend_ext(value_func=value_func)