# Copyright 2023 Goldman Sachs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta
from decimal import Decimal as PythonDecimal
from datetime import date, datetime
from pylegend._typing import (
PyLegendSequence,
PyLegendUnion,
)
from pylegend.core.language.shared.primitives.primitive import PyLegendPrimitiveOrPythonPrimitive
from pylegend.core.language import (
PyLegendInteger,
PyLegendFloat,
PyLegendNumber,
PyLegendString,
PyLegendBoolean,
PyLegendDate,
PyLegendDateTime,
PyLegendStrictDate,
PyLegendDecimal,
convert_literal_to_literal_expression,
)
from pylegend.core.language.shared.operations.collection_operation_expressions import (
PyLegendCountExpression,
PyLegendDistinctCountExpression,
PyLegendAverageExpression,
PyLegendIntegerMaxExpression,
PyLegendIntegerMinExpression,
PyLegendIntegerSumExpression,
PyLegendFloatMaxExpression,
PyLegendFloatMinExpression,
PyLegendFloatSumExpression,
PyLegendNumberMaxExpression,
PyLegendNumberMinExpression,
PyLegendNumberSumExpression,
PyLegendStdDevSampleExpression,
PyLegendStdDevPopulationExpression,
PyLegendVarianceSampleExpression,
PyLegendVariancePopulationExpression,
PyLegendDecimalMaxExpression,
PyLegendDecimalMinExpression,
PyLegendDecimalSumExpression,
PyLegendDecimalUniqueValueOnlyExpression,
PyLegendStringMaxExpression,
PyLegendStringMinExpression,
PyLegendJoinStringsExpression,
PyLegendStrictDateMaxExpression,
PyLegendStrictDateMinExpression,
PyLegendDateMaxExpression,
PyLegendDateMinExpression,
PyLegendIntegerUniqueValueOnlyExpression,
PyLegendFloatUniqueValueOnlyExpression,
PyLegendNumberUniqueValueOnlyExpression,
PyLegendStringUniqueValueOnlyExpression,
PyLegendStrictDateUniqueValueOnlyExpression,
PyLegendDateUniqueValueOnlyExpression,
PyLegendDateTimeUniqueValueOnlyExpression,
PyLegendBooleanUniqueValueOnlyExpression,
PyLegendCorrExpression,
PyLegendCovarPopulationExpression,
PyLegendCovarSampleExpression,
PyLegendWavgExpression,
PyLegendMaxByExpression,
PyLegendMinByExpression,
PyLegendMedianExpression,
PyLegendModeExpression,
PyLegendPercentileContExpression,
PyLegendPercentileDiscExpression,
)
__all__: PyLegendSequence[str] = [
"PyLegendPrimitiveCollection",
"PyLegendIntegerCollection",
"PyLegendFloatCollection",
"PyLegendNumberCollection",
"PyLegendDecimalCollection",
"PyLegendStringCollection",
"PyLegendBooleanCollection",
"PyLegendDateCollection",
"PyLegendDateTimeCollection",
"PyLegendStrictDateCollection",
"PyLegendNumberPairCollection",
"create_primitive_collection",
]
class PyLegendPrimitiveCollection(metaclass=ABCMeta):
"""
Abstract base for all primitive collection types.
A *collection* wraps a single primitive expression and exposes
aggregate operations (``count``, ``distinct_count``) that are
evaluated over the group of rows produced by a ``groupby`` or
``window`` operation. Concrete sub-classes add type-specific
aggregations such as ``sum``, ``max``, ``min``, ``join``, etc.
Users never instantiate collection objects directly — they are
created internally by the framework when an aggregation function is
applied to a column inside ``groupby().agg()``.
"""
__nested: PyLegendPrimitiveOrPythonPrimitive
def __init__(self, nested: PyLegendPrimitiveOrPythonPrimitive) -> None:
self.__nested = nested
[docs]
def count(self) -> "PyLegendInteger":
"""
Count the number of rows in the group.
Returns
-------
PyLegendInteger
The row count for each group.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.count()
).to_pandas().head(3)
"""
if isinstance(self.__nested, (bool, int, float, str, date, datetime, PythonDecimal)):
nested_expr = convert_literal_to_literal_expression(self.__nested)
else:
nested_expr = self.__nested.value()
return PyLegendInteger(PyLegendCountExpression(nested_expr))
[docs]
def distinct_count(self) -> "PyLegendInteger":
"""
Count the number of distinct values in the group.
Returns
-------
PyLegendInteger
The distinct value count for each group.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.distinct_count()
).to_pandas().head(3)
"""
if isinstance(self.__nested, (bool, int, float, str, date, datetime, PythonDecimal)):
nested_expr = convert_literal_to_literal_expression(self.__nested)
else:
nested_expr = self.__nested.value()
return PyLegendInteger(PyLegendDistinctCountExpression(nested_expr))
class PyLegendNumberCollection(PyLegendPrimitiveCollection):
"""
Collection type for generic numeric expressions.
``PyLegendNumberCollection`` provides aggregate operations for
numeric columns: ``sum``, ``min``, ``max``, ``average`` / ``mean``,
standard deviation, variance, ``median``, ``mode``, ``percentile``,
and ``distinct_value``. It also supports ``row_mapper`` for
creating paired collections used in correlation and covariance
calculations.
Inherits ``count`` and ``distinct_count`` from
:class:`PyLegendPrimitiveCollection`.
"""
__nested: PyLegendUnion[int, float, PythonDecimal, PyLegendInteger, PyLegendFloat, PyLegendNumber, PyLegendDecimal]
def __init__(
self,
nested: PyLegendUnion[int, float, PythonDecimal, PyLegendInteger, PyLegendFloat, PyLegendNumber, PyLegendDecimal]
) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def average(self) -> "PyLegendFloat":
"""
Arithmetic mean of the values in the group.
Returns
-------
PyLegendFloat
See Also
--------
mean : Alias for ``average``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.average()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendFloat(PyLegendAverageExpression(nested_expr)) # type: ignore
[docs]
def mean(self) -> "PyLegendFloat":
"""Alias for :meth:`average`."""
return self.average() # pragma: no cover
[docs]
def max(self) -> "PyLegendNumber":
"""
Maximum value in the group.
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.max()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendNumberMaxExpression(nested_expr)) # type: ignore
[docs]
def min(self) -> "PyLegendNumber":
"""
Minimum value in the group.
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.min()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendNumberMinExpression(nested_expr)) # type: ignore
[docs]
def sum(self) -> "PyLegendNumber":
"""
Sum of the values in the group.
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.sum()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendNumberSumExpression(nested_expr)) # type: ignore
[docs]
def std_dev_sample(self) -> "PyLegendNumber":
"""
Sample standard deviation of the values in the group.
Returns
-------
PyLegendNumber
See Also
--------
std_dev : Alias for ``std_dev_sample``.
std_dev_population : Population standard deviation.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.std_dev_sample()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendStdDevSampleExpression(nested_expr)) # type: ignore
[docs]
def std_dev(self) -> "PyLegendNumber":
"""Alias for :meth:`std_dev_sample`."""
return self.std_dev_sample()
[docs]
def std_dev_population(self) -> "PyLegendNumber":
"""
Population standard deviation of the values in the group.
Returns
-------
PyLegendNumber
See Also
--------
std_dev_sample : Sample standard deviation.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.std_dev_population()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendStdDevPopulationExpression(nested_expr)) # type: ignore
[docs]
def variance_sample(self) -> "PyLegendNumber":
"""
Sample variance of the values in the group.
Returns
-------
PyLegendNumber
See Also
--------
variance : Alias for ``variance_sample``.
variance_population : Population variance.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.variance_sample()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendVarianceSampleExpression(nested_expr)) # type: ignore
[docs]
def variance(self) -> "PyLegendNumber":
"""Alias for :meth:`variance_sample`."""
return self.variance_sample()
[docs]
def variance_population(self) -> "PyLegendNumber":
"""
Population variance of the values in the group.
Returns
-------
PyLegendNumber
See Also
--------
variance_sample : Sample variance.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.variance_population()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendVariancePopulationExpression(nested_expr)) # type: ignore
[docs]
def distinct_value(self) -> "PyLegendNumber":
"""
Return the single distinct value in the group.
Raises an error at query time if the group contains more than
one distinct value.
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Order Id")["Order Id"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendNumberUniqueValueOnlyExpression(nested_expr)) # type: ignore
def mode(self) -> "PyLegendNumber":
"""
Mode (most frequent value) of the values in the group.
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.mode()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
return PyLegendNumber(PyLegendModeExpression(nested_expr)) # type: ignore
[docs]
def percentile(
self,
percentile: float,
ascending: bool = True,
continuous: bool = True,
) -> "PyLegendNumber":
"""
Compute a percentile of the values in the group.
Parameters
----------
percentile : float
The percentile to compute, between 0 and 1.
ascending : bool, default True
If ``True``, values are sorted in ascending order before
computing the percentile.
continuous : bool, default True
If ``True``, use continuous (interpolated) percentile
(``PERCENTILE_CONT``). If ``False``, use discrete
(``PERCENTILE_DISC``).
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.percentile(0.5)
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (int, float, PythonDecimal))
else self.__nested.value()
)
if continuous:
return PyLegendNumber(
PyLegendPercentileContExpression(nested_expr, percentile, ascending) # type: ignore
)
else:
return PyLegendNumber(
PyLegendPercentileDiscExpression(nested_expr, percentile, ascending) # type: ignore
)
[docs]
def row_mapper(
self,
other: PyLegendUnion[
int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber, "PyLegendNumberCollection"
],
) -> "PyLegendNumberPairCollection":
"""
Pair this collection with another numeric value for bivariate
aggregations (correlation, covariance, weighted average).
Parameters
----------
other : int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber, or PyLegendNumberCollection
The second numeric operand.
Returns
-------
PyLegendNumberPairCollection
A paired collection supporting ``corr``, ``covar_sample``,
``covar_population``, and ``wavg_legend_ext``.
"""
return PyLegendNumberPairCollection(self, other)
class PyLegendIntegerCollection(PyLegendNumberCollection):
"""
Collection type for integer expressions.
Overrides ``max``, ``min``, ``sum``, and ``distinct_value`` to
return ``PyLegendInteger`` instead of ``PyLegendNumber``. All
other aggregation methods are inherited from
:class:`PyLegendNumberCollection`.
"""
__nested: PyLegendUnion[int, PyLegendInteger]
def __init__(self, nested: PyLegendUnion[int, PyLegendInteger]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def max(self) -> "PyLegendInteger":
"""
Maximum integer value in the group.
Returns
-------
PyLegendInteger
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.max()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, int)
else self.__nested.value()
)
return PyLegendInteger(PyLegendIntegerMaxExpression(nested_expr)) # type: ignore
[docs]
def min(self) -> "PyLegendInteger":
"""
Minimum integer value in the group.
Returns
-------
PyLegendInteger
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.min()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, int)
else self.__nested.value()
)
return PyLegendInteger(PyLegendIntegerMinExpression(nested_expr)) # type: ignore
[docs]
def sum(self) -> "PyLegendInteger":
"""
Sum of the integer values in the group.
Returns
-------
PyLegendInteger
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.sum()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, int)
else self.__nested.value()
)
return PyLegendInteger(PyLegendIntegerSumExpression(nested_expr)) # type: ignore
[docs]
def distinct_value(self) -> "PyLegendInteger":
"""
Return the single distinct integer value in the group.
Returns
-------
PyLegendInteger
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Order Id")["Order Id"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, int)
else self.__nested.value()
)
return PyLegendInteger(PyLegendIntegerUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
class PyLegendFloatCollection(PyLegendNumberCollection):
"""
Collection type for float expressions.
Overrides ``max``, ``min``, ``sum``, and ``distinct_value`` to
return ``PyLegendFloat`` instead of ``PyLegendNumber``. All other
aggregation methods are inherited from
:class:`PyLegendNumberCollection`.
"""
__nested: PyLegendUnion[float, PyLegendFloat]
def __init__(self, nested: PyLegendUnion[float, PyLegendFloat]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def max(self) -> "PyLegendFloat":
"""
Maximum float value in the group.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_float"] = frame["Order Id"] * 1.0
frame.groupby("Ship Name")["id_float"].aggregate(
lambda x: x.max()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, float)
else self.__nested.value()
)
return PyLegendFloat(PyLegendFloatMaxExpression(nested_expr)) # type: ignore
[docs]
def min(self) -> "PyLegendFloat":
"""
Minimum float value in the group.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_float"] = frame["Order Id"] * 1.0
frame.groupby("Ship Name")["id_float"].aggregate(
lambda x: x.min()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, float)
else self.__nested.value()
)
return PyLegendFloat(PyLegendFloatMinExpression(nested_expr)) # type: ignore
[docs]
def sum(self) -> "PyLegendFloat":
"""
Sum of the float values in the group.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_float"] = frame["Order Id"] * 1.0
frame.groupby("Ship Name")["id_float"].aggregate(
lambda x: x.sum()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, float)
else self.__nested.value()
)
return PyLegendFloat(PyLegendFloatSumExpression(nested_expr)) # type: ignore
[docs]
def distinct_value(self) -> "PyLegendFloat":
"""
Return the single distinct float value in the group.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_float"] = frame["Order Id"] * 1.0
frame.groupby("Order Id")["id_float"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, float)
else self.__nested.value()
)
return PyLegendFloat(PyLegendFloatUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
class PyLegendDecimalCollection(PyLegendNumberCollection):
"""
Collection type for decimal expressions.
Overrides ``max``, ``min``, ``sum``, and ``distinct_value`` to
return ``PyLegendDecimal`` instead of ``PyLegendNumber``. All
other aggregation methods are inherited from
:class:`PyLegendNumberCollection`.
"""
__nested: PyLegendUnion[PythonDecimal, PyLegendDecimal]
def __init__(self, nested: PyLegendUnion[PythonDecimal, PyLegendDecimal]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def max(self) -> "PyLegendDecimal":
"""
Maximum decimal value in the group.
Returns
-------
PyLegendDecimal
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_dec"] = frame["Order Id"].to_string().substring(3, 5).parse_decimal(5, 2)
frame.groupby("Ship Name")["id_dec"].aggregate(
lambda x: x.max()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, PythonDecimal)
else self.__nested.value()
)
return PyLegendDecimal(PyLegendDecimalMaxExpression(nested_expr)) # type: ignore
[docs]
def min(self) -> "PyLegendDecimal":
"""
Minimum decimal value in the group.
Returns
-------
PyLegendDecimal
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_dec"] = frame["Order Id"].to_string().substring(3, 5).parse_decimal(5, 2)
frame.groupby("Ship Name")["id_dec"].aggregate(
lambda x: x.min()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, PythonDecimal)
else self.__nested.value()
)
return PyLegendDecimal(PyLegendDecimalMinExpression(nested_expr)) # type: ignore
[docs]
def sum(self) -> "PyLegendDecimal":
"""
Sum of the decimal values in the group.
Returns
-------
PyLegendDecimal
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_dec"] = frame["Order Id"].to_string().substring(3, 5).parse_decimal(5, 2)
frame.groupby("Ship Name")["id_dec"].aggregate(
lambda x: x.sum()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, PythonDecimal)
else self.__nested.value()
)
return PyLegendDecimal(PyLegendDecimalSumExpression(nested_expr)) # type: ignore
[docs]
def distinct_value(self) -> "PyLegendDecimal":
"""
Return the single distinct decimal value in the group.
Returns
-------
PyLegendDecimal
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["id_dec"] = frame["Order Id"].to_string().substring(3, 5).parse_decimal(5, 2)
frame.groupby("Order Id")["id_dec"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, PythonDecimal)
else self.__nested.value()
)
return PyLegendDecimal(PyLegendDecimalUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
class PyLegendStringCollection(PyLegendPrimitiveCollection):
"""
Collection type for string expressions.
``PyLegendStringCollection`` provides ``max``, ``min``,
``join`` / ``join_strings``, and ``distinct_value`` aggregations
for string columns. Inherits ``count`` and ``distinct_count``
from :class:`PyLegendPrimitiveCollection`.
"""
__nested: PyLegendUnion[str, PyLegendString]
def __init__(self, nested: PyLegendUnion[str, PyLegendString]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def max(self) -> "PyLegendString":
"""
Lexicographic maximum string in the group.
Returns
-------
PyLegendString
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Ship Name"].aggregate(
lambda x: x.max()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, str)
else self.__nested.value()
)
return PyLegendString(PyLegendStringMaxExpression(nested_expr)) # type: ignore
[docs]
def min(self) -> "PyLegendString":
"""
Lexicographic minimum string in the group.
Returns
-------
PyLegendString
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Ship Name"].aggregate(
lambda x: x.min()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, str)
else self.__nested.value()
)
return PyLegendString(PyLegendStringMinExpression(nested_expr)) # type: ignore
[docs]
def join(self, separator: str) -> "PyLegendString":
"""
Concatenate all strings in the group with a separator.
Parameters
----------
separator : str
The delimiter inserted between each string.
Returns
-------
PyLegendString
See Also
--------
join_strings : Alias with default separator ``";"``.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Ship Name"].aggregate(
lambda x: x.join(", ")
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, str)
else self.__nested.value()
)
separator_expr = convert_literal_to_literal_expression(separator)
return PyLegendString(PyLegendJoinStringsExpression(nested_expr, separator_expr)) # type: ignore
[docs]
def join_strings(self, separator: str = ";") -> "PyLegendString":
"""Alias for :meth:`join` with a default separator of ``";"``."""
return self.join(separator=separator)
[docs]
def distinct_value(self) -> "PyLegendString":
"""
Return the single distinct string value in the group.
Returns
-------
PyLegendString
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Ship Name"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, str)
else self.__nested.value()
)
return PyLegendString(PyLegendStringUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
class PyLegendBooleanCollection(PyLegendPrimitiveCollection):
"""
Collection type for boolean expressions.
``PyLegendBooleanCollection`` is used when a boolean column
participates in an aggregation context (e.g. inside
``groupby().agg()``). It inherits ``count`` and ``distinct_count``
from :class:`PyLegendPrimitiveCollection` and adds
``distinct_value``.
"""
__nested: PyLegendUnion[bool, PyLegendBoolean]
def __init__(self, nested: PyLegendUnion[bool, PyLegendBoolean]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def distinct_value(self) -> "PyLegendBoolean":
"""
Return the single distinct value in the group.
Raises an error at query time if the group contains more than
one distinct value.
Returns
-------
PyLegendBoolean
The unique boolean value for each group.
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["is_large"] = frame["Order Id"] > 11000
frame.groupby("Ship Name")["is_large"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, bool)
else self.__nested.value()
)
return PyLegendBoolean(PyLegendBooleanUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
class PyLegendDateCollection(PyLegendPrimitiveCollection):
"""
Collection type for date expressions.
``PyLegendDateCollection`` provides ``max``, ``min``, and
``distinct_value`` aggregations for date columns. Inherits
``count`` and ``distinct_count`` from
:class:`PyLegendPrimitiveCollection`.
"""
__nested: PyLegendUnion[date, datetime, PyLegendDate]
def __init__(self, nested: PyLegendUnion[date, datetime, PyLegendDate]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def max(self) -> "PyLegendDate":
"""
Latest (maximum) date in the group.
Returns
-------
PyLegendDate
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Shipped Date"].aggregate(
lambda x: x.max()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (date, datetime))
else self.__nested.value()
)
return PyLegendDate(PyLegendDateMaxExpression(nested_expr)) # type: ignore
[docs]
def min(self) -> "PyLegendDate":
"""
Earliest (minimum) date in the group.
Returns
-------
PyLegendDate
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Shipped Date"].aggregate(
lambda x: x.min()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (date, datetime))
else self.__nested.value()
)
return PyLegendDate(PyLegendDateMinExpression(nested_expr)) # type: ignore
[docs]
def distinct_value(self) -> "PyLegendDate":
"""
Return the single distinct date in the group.
Returns
-------
PyLegendDate
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Shipped Date")["Shipped Date"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, (date, datetime))
else self.__nested.value()
)
return PyLegendDate(PyLegendDateUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
class PyLegendDateTimeCollection(PyLegendDateCollection):
"""
Collection type for datetime expressions.
Overrides ``distinct_value`` to return ``PyLegendDateTime``.
Inherits ``max``, ``min``, ``count``, and ``distinct_count`` from
:class:`PyLegendDateCollection`.
"""
__nested: PyLegendUnion[datetime, PyLegendDateTime]
def __init__(self, nested: PyLegendUnion[datetime, PyLegendDateTime]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def distinct_value(self) -> "PyLegendDateTime":
"""
Return the single distinct datetime in the group.
Returns
-------
PyLegendDateTime
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["dt"] = frame["Shipped Date"].first_hour_of_day()
frame.groupby("dt")["dt"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, datetime)
else self.__nested.value()
)
return PyLegendDateTime(PyLegendDateTimeUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
class PyLegendStrictDateCollection(PyLegendDateCollection):
"""
Collection type for strict-date (date-only) expressions.
Overrides ``max``, ``min``, and ``distinct_value`` to return
``PyLegendStrictDate``. Inherits ``count`` and ``distinct_count``
from :class:`PyLegendDateCollection`.
"""
__nested: PyLegendUnion[date, PyLegendStrictDate]
def __init__(self, nested: PyLegendUnion[date, PyLegendStrictDate]) -> None:
super().__init__(nested)
self.__nested = nested
[docs]
def max(self) -> "PyLegendStrictDate":
"""
Latest (maximum) strict date in the group.
Returns
-------
PyLegendStrictDate
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["sd"] = frame["Shipped Date"].date_part()
frame.groupby("Ship Name")["sd"].aggregate(
lambda x: x.max()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, date)
else self.__nested.value()
)
return PyLegendStrictDate(PyLegendStrictDateMaxExpression(nested_expr)) # type: ignore
[docs]
def min(self) -> "PyLegendStrictDate":
"""
Earliest (minimum) strict date in the group.
Returns
-------
PyLegendStrictDate
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["sd"] = frame["Shipped Date"].date_part()
frame.groupby("Ship Name")["sd"].aggregate(
lambda x: x.min()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, date)
else self.__nested.value()
)
return PyLegendStrictDate(PyLegendStrictDateMinExpression(nested_expr)) # type: ignore
[docs]
def distinct_value(self) -> "PyLegendStrictDate":
"""
Return the single distinct strict date in the group.
Returns
-------
PyLegendStrictDate
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame["sd"] = frame["Shipped Date"].date_part()
frame.groupby("sd")["sd"].aggregate(
lambda x: x.distinct_value()
).to_pandas().head(3)
"""
nested_expr = (
convert_literal_to_literal_expression(self.__nested) if isinstance(self.__nested, date)
else self.__nested.value()
)
return PyLegendStrictDate(PyLegendStrictDateUniqueValueOnlyExpression(nested_expr)) # type: ignore[arg-type]
def create_primitive_collection(nested: PyLegendPrimitiveOrPythonPrimitive) -> PyLegendPrimitiveCollection:
if isinstance(nested, (int, PyLegendInteger)):
return PyLegendIntegerCollection(nested)
if isinstance(nested, (float, PyLegendFloat)):
return PyLegendFloatCollection(nested)
if isinstance(nested, (PythonDecimal, PyLegendDecimal)):
return PyLegendDecimalCollection(nested)
if isinstance(nested, PyLegendNumber):
return PyLegendNumberCollection(nested)
if isinstance(nested, (str, PyLegendString)):
return PyLegendStringCollection(nested)
if isinstance(nested, (bool, PyLegendBoolean)):
return PyLegendBooleanCollection(nested)
if isinstance(nested, (datetime, PyLegendDateTime)):
return PyLegendDateTimeCollection(nested)
if isinstance(nested, (date, PyLegendStrictDate)):
return PyLegendStrictDateCollection(nested)
if isinstance(nested, PyLegendDate):
return PyLegendDateCollection(nested)
raise RuntimeError(f"Not supported type - {type(nested)}") # pragma: no cover
class PyLegendNumberPairCollection(PyLegendPrimitiveCollection):
"""
Collection type for a pair of numeric expressions.
``PyLegendNumberPairCollection`` enables bivariate aggregations such
as correlation, covariance, weighted average, and extrema-by-key
over two numeric columns. Create one via
:meth:`PyLegendNumberCollection.row_mapper`.
"""
__nested_a: PyLegendUnion[int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber]
__nested_b: PyLegendUnion[int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber]
def __init__(
self,
nested_a: PyLegendUnion[
int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber, "PyLegendNumberCollection"
],
nested_b: PyLegendUnion[
int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber, "PyLegendNumberCollection"
],
) -> None:
resolved_a = self._resolve(nested_a)
resolved_b = self._resolve(nested_b)
super().__init__(resolved_a)
self.__nested_a = resolved_a
self.__nested_b = resolved_b
@staticmethod
def _resolve(
val: PyLegendUnion[
int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber, "PyLegendNumberCollection"
]
) -> PyLegendUnion[int, float, PyLegendInteger, PyLegendFloat, PyLegendDecimal, PyLegendNumber]:
if isinstance(val, PyLegendPrimitiveCollection):
return val._PyLegendPrimitiveCollection__nested # type: ignore
return val
def corr(self) -> "PyLegendFloat":
"""
Pearson correlation coefficient between the two columns.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.row_mapper(x).corr()
).to_pandas().head(3)
"""
nested_expr_a = (
convert_literal_to_literal_expression(self.__nested_a) if isinstance(self.__nested_a, (int, float, PythonDecimal))
else self.__nested_a.value()
)
nested_expr_b = (
convert_literal_to_literal_expression(self.__nested_b) if isinstance(self.__nested_b, (int, float, PythonDecimal))
else self.__nested_b.value()
)
return PyLegendFloat(PyLegendCorrExpression(nested_expr_a, nested_expr_b)) # type: ignore
def _get_nested_exprs(self): # type: ignore
nested_expr_a = (
convert_literal_to_literal_expression(self.__nested_a) if isinstance(self.__nested_a, (int, float, PythonDecimal))
else self.__nested_a.value()
)
nested_expr_b = (
convert_literal_to_literal_expression(self.__nested_b) if isinstance(self.__nested_b, (int, float, PythonDecimal))
else self.__nested_b.value()
)
return nested_expr_a, nested_expr_b
def covar_population(self) -> "PyLegendFloat":
"""
Population covariance between the two columns.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.row_mapper(x).covar_population()
).to_pandas().head(3)
"""
nested_expr_a, nested_expr_b = self._get_nested_exprs() # type: ignore
return PyLegendFloat(PyLegendCovarPopulationExpression(nested_expr_a, nested_expr_b))
def covar_sample(self) -> "PyLegendFloat":
"""
Sample covariance between the two columns.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.row_mapper(x).covar_sample()
).to_pandas().head(3)
"""
nested_expr_a, nested_expr_b = self._get_nested_exprs() # type: ignore
return PyLegendFloat(PyLegendCovarSampleExpression(nested_expr_a, nested_expr_b))
def wavg_legend_ext(self) -> "PyLegendFloat":
"""
Weighted average using column *a* as values and column *b* as weights.
.. note::
This is a Legend extension function.
Returns
-------
PyLegendFloat
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.row_mapper(x).wavg_legend_ext()
).to_pandas().head(3)
"""
nested_expr_a, nested_expr_b = self._get_nested_exprs() # type: ignore
return PyLegendFloat(PyLegendWavgExpression(nested_expr_a, nested_expr_b))
def max_by_legend_ext(self) -> "PyLegendNumber":
"""
Value of column *a* at the row where column *b* is maximum.
.. note::
This is a Legend extension function.
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.row_mapper(x).max_by_legend_ext()
).to_pandas().head(3)
"""
nested_expr_a, nested_expr_b = self._get_nested_exprs() # type: ignore
return PyLegendNumber(PyLegendMaxByExpression(nested_expr_a, nested_expr_b))
def min_by_legend_ext(self) -> "PyLegendNumber":
"""
Value of column *a* at the row where column *b* is minimum.
.. note::
This is a Legend extension function.
Returns
-------
PyLegendNumber
Examples
--------
.. ipython:: python
import pylegend
frame = pylegend.samples.pandas_api.northwind_orders_frame()
frame.groupby("Ship Name")["Order Id"].aggregate(
lambda x: x.row_mapper(x).min_by_legend_ext()
).to_pandas().head(3)
"""
nested_expr_a, nested_expr_b = self._get_nested_exprs() # type: ignore
return PyLegendNumber(PyLegendMinByExpression(nested_expr_a, nested_expr_b))