Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Core selection semantics for the $percentile expression operator.

With ``method: "approximate"`` (a t-digest), small inputs return the actual data
point at rank ceil(p*n). Results are returned as an array, one value per ``p``,
in the same order as ``p``. Captured against MongoDB 8.3.4.
"""

from __future__ import annotations

import pytest

from documentdb_tests.compatibility.tests.core.operator.expressions.accumulator.percentile.utils.percentile_common import ( # noqa: E501
PercentileTest,
percentile_spec,
)
from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import (
assert_expression_result,
execute_expression,
)
from documentdb_tests.framework.parametrize import pytest_params

# Property [Percentile Selection]: $percentile returns the value at the rank
# corresponding to each probability, selecting an order statistic of the input.
PERCENTILE_SELECTION_TESTS: list[PercentileTest] = [
PercentileTest(
"core_median",
spec=percentile_spec([10, 20, 30], [0.5]),
expected=[20.0],
msg="$percentile p=[0.5] over [10,20,30] should return the median [20.0]",
),
PercentileTest(
"core_min_p0",
spec=percentile_spec([10, 20, 30], [0.0]),
expected=[10.0],
msg="$percentile p=[0.0] should return the minimum [10.0]",
),
PercentileTest(
"core_max_p1",
spec=percentile_spec([10, 20, 30], [1.0]),
expected=[30.0],
msg="$percentile p=[1.0] should return the maximum [30.0]",
),
PercentileTest(
"core_single_element",
spec=percentile_spec([42], [0.5]),
expected=[42.0],
msg="$percentile over a single-element array should return that element",
),
PercentileTest(
"core_unsorted_input",
spec=percentile_spec([30, 10, 20], [0.5]),
expected=[20.0],
msg="$percentile should sort input internally; median of [30,10,20] is [20.0]",
),
PercentileTest(
"core_all_equal",
spec=percentile_spec([7, 7, 7], [0.5]),
expected=[7.0],
msg="$percentile over all-equal values should return that value",
),
PercentileTest(
"core_large_input",
spec=percentile_spec(list(range(10_000)), [0.5]),
expected=[4999.0],
msg="$percentile should handle a large (10000-element) input",
),
]

# Property [P Ordering]: results follow the order of the ``p`` array, including
# descending and duplicate probabilities.
PERCENTILE_ORDERING_TESTS: list[PercentileTest] = [
PercentileTest(
"order_multiple_ascending",
spec=percentile_spec([10, 20, 30, 40, 50], [0.25, 0.5, 0.95]),
expected=[20.0, 30.0, 50.0],
msg="$percentile with multiple ascending p values should return results in p order",
),
PercentileTest(
"order_descending_p",
spec=percentile_spec([10, 20, 30, 40, 50], [0.95, 0.05]),
expected=[50.0, 10.0],
msg="$percentile should preserve descending p order in the output",
),
PercentileTest(
"order_duplicate_p",
spec=percentile_spec([10, 20, 30], [0.5, 0.5]),
expected=[20.0, 20.0],
msg="$percentile with duplicate p values should return a result for each",
),
PercentileTest(
"order_boundaries_both",
spec=percentile_spec([10, 20, 30], [0.0, 1.0]),
expected=[10.0, 30.0],
msg="$percentile with p=[0.0, 1.0] should return [min, max]",
),
]

PERCENTILE_CORE_ALL_TESTS = PERCENTILE_SELECTION_TESTS + PERCENTILE_ORDERING_TESTS


@pytest.mark.parametrize("test_case", pytest_params(PERCENTILE_CORE_ALL_TESTS))
def test_percentile_core(collection, test_case: PercentileTest):
"""Test $percentile core selection semantics."""
result = execute_expression(collection, {"$percentile": test_case.spec})
assert_expression_result(
result,
expected=test_case.expected,
error_code=test_case.error_code,
msg=test_case.msg,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Numeric data-type and special-value coverage for $percentile.

The ``approximate`` method computes in double precision and always returns
double values, regardless of input numeric type. Captured against MongoDB 8.3.4.
"""

from __future__ import annotations

import pytest
from bson import Decimal128, Int64

from documentdb_tests.compatibility.tests.core.operator.expressions.accumulator.percentile.utils.percentile_common import ( # noqa: E501
PercentileTest,
percentile_spec,
)
from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import (
assert_expression_result,
execute_expression,
)
from documentdb_tests.framework.parametrize import pytest_params
from documentdb_tests.framework.test_constants import (
FLOAT_INFINITY,
FLOAT_NAN,
FLOAT_NEGATIVE_INFINITY,
)

# Property [Numeric Types]: $percentile accepts all numeric BSON types and
# returns a double under the approximate method.
PERCENTILE_NUMERIC_TYPE_TESTS: list[PercentileTest] = [
PercentileTest(
"type_int32",
spec=percentile_spec([10, 20, 30], [0.5]),
expected=[20.0],
msg="$percentile should compute over int32 input and return a double",
),
PercentileTest(
"type_int64",
spec=percentile_spec({"$literal": [Int64(10), Int64(20), Int64(30)]}, [0.5]),
expected=[20.0],
msg="$percentile should compute over int64 input and return a double",
),
PercentileTest(
"type_double",
spec=percentile_spec({"$literal": [10.0, 20.5, 30.0]}, [0.5]),
expected=[20.5],
msg="$percentile should compute over double input",
),
PercentileTest(
"type_decimal128_returns_double",
spec=percentile_spec(
{"$literal": [Decimal128("10"), Decimal128("20"), Decimal128("30")]}, [0.5]
),
expected=[20.0],
msg="$percentile over Decimal128 input should still return a double (approximate)",
),
PercentileTest(
"type_mixed_numeric",
spec=percentile_spec({"$literal": [10, 20.5, Decimal128("30")]}, [0.5]),
expected=[20.5],
msg="$percentile should compute across mixed numeric types",
),
]

# Property [Special Values]: NaN and infinities participate in ordering;
# the approximate method selects an order statistic accordingly.
PERCENTILE_SPECIAL_VALUE_TESTS: list[PercentileTest] = [
PercentileTest(
"special_nan_in_input",
spec=percentile_spec({"$literal": [10, FLOAT_NAN, 30]}, [0.5]),
expected=[10.0],
msg="$percentile should order NaN as the smallest value in the input",
),
PercentileTest(
"special_positive_infinity",
spec=percentile_spec({"$literal": [10, FLOAT_INFINITY, 30]}, [0.5]),
expected=[30.0],
msg="$percentile should treat +Infinity as the largest value",
),
PercentileTest(
"special_negative_infinity",
spec=percentile_spec({"$literal": [FLOAT_NEGATIVE_INFINITY, 10, 30]}, [0.5]),
expected=[10.0],
msg="$percentile should treat -Infinity as the smallest value",
),
]

PERCENTILE_DATA_TYPE_ALL_TESTS = PERCENTILE_NUMERIC_TYPE_TESTS + PERCENTILE_SPECIAL_VALUE_TESTS


@pytest.mark.parametrize("test_case", pytest_params(PERCENTILE_DATA_TYPE_ALL_TESTS))
def test_percentile_data_types(collection, test_case: PercentileTest):
"""Test $percentile numeric-type and special-value handling."""
result = execute_expression(collection, {"$percentile": test_case.spec})
assert_expression_result(
result,
expected=test_case.expected,
error_code=test_case.error_code,
msg=test_case.msg,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Input-form coverage for the $percentile expression operator.

Verifies that ``input`` accepts each expression form (literal array, raw array
expression, scalar, expression operator, field reference, dotted path) and that
an empty input yields [null]. Captured against MongoDB 8.3.4.
"""

from __future__ import annotations

import pytest

from documentdb_tests.compatibility.tests.core.operator.expressions.accumulator.percentile.utils.percentile_common import ( # noqa: E501
PercentileTest,
percentile_spec,
)
from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import (
assert_expression_result,
execute_expression,
execute_expression_with_insert,
)
from documentdb_tests.framework.parametrize import pytest_params

# Property [Input Forms]: ``input`` accepts any expression that resolves to a
# number or an array of numbers.
PERCENTILE_INPUT_FORMS_TESTS: list[PercentileTest] = [
PercentileTest(
"form_literal_array",
spec=percentile_spec({"$literal": [10, 20, 30]}, [0.5]),
expected=[20.0],
msg="$percentile should accept a $literal array as input",
),
PercentileTest(
"form_raw_array",
spec=percentile_spec([10, 20, 30], [0.5]),
expected=[20.0],
msg="$percentile should accept a raw array expression as input",
),
PercentileTest(
"form_scalar_number",
spec=percentile_spec(42, [0.5]),
expected=[42.0],
msg="$percentile should accept a single scalar number as input",
),
PercentileTest(
"form_expression_operator",
spec=percentile_spec({"$concatArrays": [[10, 20], [30]]}, [0.5]),
expected=[20.0],
msg="$percentile should accept an expression operator that resolves to an array",
),
PercentileTest(
"form_empty_array",
spec=percentile_spec([], [0.5]),
expected=[None],
msg="$percentile over an empty array input should return [null]",
),
PercentileTest(
"form_field_array",
spec=percentile_spec("$v", [0.5]),
document={"v": [10, 20, 30]},
expected=[20.0],
msg="$percentile should read an array from a field reference",
),
PercentileTest(
"form_dotted_path",
spec=percentile_spec("$a.b", [0.5]),
document={"a": [{"b": 1}, {"b": 2}, {"b": 3}]},
expected=[2.0],
msg="$percentile should resolve a dotted path over an array of objects",
),
]


@pytest.mark.parametrize("test_case", pytest_params(PERCENTILE_INPUT_FORMS_TESTS))
def test_percentile_input_form(collection, test_case: PercentileTest):
"""Test $percentile input forms."""
expr = {"$percentile": test_case.spec}
if test_case.document is not None:
result = execute_expression_with_insert(collection, expr, test_case.document)
else:
result = execute_expression(collection, expr)
assert_expression_result(
result,
expected=test_case.expected,
error_code=test_case.error_code,
msg=test_case.msg,
)
Loading