diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index c5d4a2e90..6cadfdde5 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -3,6 +3,7 @@ import contextlib import json import platform +import random import re from collections.abc import Generator from datetime import UTC, datetime, timedelta @@ -10,7 +11,22 @@ from time import sleep from unittest.mock import MagicMock, patch +import ijson +import pyarrow.parquet as pq import pytest +from aignx.codegen.exceptions import ForbiddenException +from aignx.codegen.exceptions import NotFoundException as ApiNotFound +from aignx.codegen.models import ( + ItemOutput, + ItemResultReadResponse, + ItemState, + ItemTerminationReason, + RunItemStatistics, + RunOutput, + RunReadResponse, + RunState, + RunTerminationReason, +) from loguru import logger from tenacity import Retrying, retry, stop_after_attempt, wait_exponential from typer.testing import CliRunner @@ -847,8 +863,6 @@ def test_cli_run_list_for_organization(runner: CliRunner) -> None: @pytest.mark.unit def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None: """Check ForbiddenException with --for-organization shows org-specific access denied message.""" - from aignx.codegen.exceptions import ForbiddenException - with patch.object( ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden") ): @@ -862,8 +876,6 @@ def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None: @pytest.mark.unit def test_cli_run_list_forbidden_without_organization(runner: CliRunner) -> None: """Check ForbiddenException without --for-organization shows generic access denied message.""" - from aignx.codegen.exceptions import ForbiddenException - with patch.object( ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden") ): @@ -897,18 +909,6 @@ def test_cli_run_describe_not_found(runner: CliRunner, record_property) -> None: @pytest.mark.integration def test_cli_run_describe_json_includes_items(runner: CliRunner) -> None: """Check run describe --format=json includes items in output.""" - from aignx.codegen.models import ( - ItemOutput, - ItemResultReadResponse, - ItemState, - ItemTerminationReason, - RunItemStatistics, - RunOutput, - RunReadResponse, - RunState, - RunTerminationReason, - ) - mock_run_data = RunReadResponse( run_id="test-run-id-123", application_id="test-app", @@ -1111,8 +1111,10 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "") assert results_dir.is_dir(), f"Expected directory {results_dir} not found" files_in_dir = list(results_dir.glob("*")) - assert len(files_in_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" + expected_count = len(SPOT_1_EXPECTED_RESULT_FILES) + assert len(files_in_dir) == expected_count, ( + f"Expected {expected_count} files in {results_dir}, but found {len(files_in_dir)}: " + f"{[f.name for f in files_in_dir]}" ) print(f"Found files in {results_dir}:") for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES: @@ -1133,6 +1135,23 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = pq.read_metadata(parquet_path).num_rows + with geojson_path.open("rb") as f: + geojson_feature_count = sum(1 for _ in ijson.items(f, "features.item")) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + # Validate the execute command exited successfully assert result.exit_code == 0 @@ -1222,9 +1241,6 @@ def test_cli_run_update_item_metadata_not_dict(runner: CliRunner) -> None: @pytest.mark.sequential def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Path) -> None: """Test dumping and updating custom metadata via CLI commands.""" - import json - import random - unique_tag = f"test_metadata_{datetime.now(tz=UTC).timestamp()}" with submitted_run(runner, tmp_path, CSV_CONTENT_SPOT0, extra_args=["--tags", unique_tag, "--force"]) as run_id: # Step 1: Dump initial custom metadata of run @@ -1313,11 +1329,8 @@ def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Pa @pytest.mark.e2e @pytest.mark.timeout(timeout=240) @pytest.mark.sequential -def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None: # noqa: PLR0915 +def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None: """Test dumping and updating item custom metadata via CLI commands.""" - import json - import random - unique_tag = f"test_item_metadata_{datetime.now(tz=UTC).timestamp()}" # CSV_CONTENT_SPOT0 uses SPOT_0_FILENAME as external_id, which the describe output surfaces # as "Item External ID: `...`" — the get_external_id() helper below captures it dynamically. @@ -1773,8 +1786,6 @@ def test_cli_application_version_document_describe_success(runner: CliRunner, re def test_cli_application_version_document_describe_not_found(runner: CliRunner, record_property) -> None: """`application version document describe` exits 2 with a clear message on 404.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_documents = MagicMock() fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) fake_client = MagicMock() @@ -1870,8 +1881,6 @@ def test_cli_application_version_document_list_json_empty(runner: CliRunner, rec def test_cli_application_version_document_list_resolve_not_found_text(runner: CliRunner, record_property) -> None: """`application version document list` exits 2 when the application version cannot be resolved.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-01") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -1888,8 +1897,6 @@ def test_cli_application_version_document_list_resolve_not_found_text(runner: Cl def test_cli_application_version_document_list_resolve_not_found_json(runner: CliRunner, record_property) -> None: """`application version document list --format json` emits structured error on 404.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-01") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -1976,8 +1983,6 @@ def test_cli_application_version_document_describe_json_success(runner: CliRunne def test_cli_application_version_document_describe_resolve_not_found_text(runner: CliRunner, record_property) -> None: """`describe` exits 2 when the application version cannot be resolved (text format).""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -1996,8 +2001,6 @@ def test_cli_application_version_document_describe_resolve_not_found_text(runner def test_cli_application_version_document_describe_resolve_not_found_json(runner: CliRunner, record_property) -> None: """`describe --format json` emits structured error when version cannot be resolved.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -2026,8 +2029,6 @@ def test_cli_application_version_document_describe_resolve_not_found_json(runner def test_cli_application_version_document_describe_not_found_json(runner: CliRunner, record_property) -> None: """`describe --format json` emits structured error when the document is missing.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_documents = MagicMock() fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) fake_client = MagicMock() @@ -2111,8 +2112,6 @@ def test_cli_application_version_document_download_resolve_not_found( ) -> None: """`download` exits 2 when the application version cannot be resolved.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-04") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -2142,8 +2141,6 @@ def test_cli_application_version_document_download_not_found( ) -> None: """`download` exits 2 with a clear message when the document does not exist.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-04") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_documents = MagicMock() fake_documents.download_to_path.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) fake_client = MagicMock() diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index 59ba189e2..361cd419d 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -9,6 +9,8 @@ from typing import TYPE_CHECKING from unittest.mock import AsyncMock, MagicMock, Mock, patch +import ijson +import pyarrow.parquet as pq import pytest from nicegui.testing import User from typer.testing import CliRunner @@ -354,7 +356,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back( @pytest.mark.flaky(retries=1, delay=5) @pytest.mark.timeout(timeout=60 * 10) @pytest.mark.sequential # Helps on Linux with image analysis step otherwise timing out -async def test_gui_run_download( # noqa: PLR0915 +async def test_gui_run_download( # noqa: PLR0914, PLR0915 user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property ) -> None: """Test that the user can download a run result via the GUI.""" @@ -440,8 +442,9 @@ async def test_gui_run_download( # noqa: PLR0915 # Check for files in the results directory files_in_results_dir = list(results_dir.glob("*")) - assert len(files_in_results_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: " + expected_count = len(SPOT_0_EXPECTED_RESULT_FILES) + assert len(files_in_results_dir) == expected_count, ( + f"Expected {expected_count} files in {results_dir}, but found {len(files_in_results_dir)}: " f"{[f.name for f in files_in_results_dir]}" ) @@ -464,6 +467,23 @@ async def test_gui_run_download( # noqa: PLR0915 f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = pq.read_metadata(parquet_path).num_rows + with geojson_path.open("rb") as f: + geojson_feature_count = sum(1 for _ in ijson.items(f, "features.item")) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + @pytest.mark.integration @pytest.mark.sequential diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py index 634e85da0..0f7699600 100644 --- a/tests/aignostics/platform/e2e_test.py +++ b/tests/aignostics/platform/e2e_test.py @@ -36,17 +36,17 @@ PIPELINE_GPU_TYPE, PIPELINE_MAX_GPUS_PER_SLIDE, PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES, - SPECIAL_APPLICATION_ID, - SPECIAL_APPLICATION_VERSION, SPOT_0_CRC32C, SPOT_0_GS_URL, SPOT_0_HEIGHT, SPOT_0_RESOLUTION_MPP, SPOT_0_WIDTH, SPOT_1_CRC32C, + SPOT_1_DISEASE, SPOT_1_GS_URL, SPOT_1_HEIGHT, SPOT_1_RESOLUTION_MPP, + SPOT_1_TISSUE, SPOT_1_WIDTH, SPOT_2_CRC32C, SPOT_2_GS_URL, @@ -87,164 +87,102 @@ # Plan to have 100.000 slides processed in total, with 100 slides per application run, # one application run starting every 5 minutes, with a throughput of 1 slide per minute, # given no GPU. -SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT = 100 -SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_00 = 2000 # Minute 0..9 -SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_20 = 2000 # Minute 20..29 -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS = 60 * 60 * 20 # 20 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS = 60 * 60 * 24 # 24 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 = 60 * 60 * 2 # 2 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 = 60 * 60 * 3 # 3 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS = 60 * 30 # 30 minutes -SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS = 60 * 60 # 60 minutes +TEST_APP_STRESS_SLIDE_PER_RUN_COUNT = 100 +TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_00 = 2000 # Minute 0..9 +TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_20 = 2000 # Minute 20..29 +TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS = 60 * 60 * 20 # 20 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS = 60 * 60 * 24 # 24 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 = 60 * 60 * 2 # 2 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 = 60 * 60 * 3 # 3 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS = 60 * 30 # 30 minutes +TEST_APP_STRESS_FIND_AND_VALIDATE_TIMEOUT_SECONDS = 60 * 60 # 60 minutes + + +def _build_wsi_input_item( # noqa: PLR0913, PLR0917 + gs_url: str, + crc32c: str, + width: int, + height: int, + resolution_mpp: float, + expires_seconds: int, + *, + tissue: str = "LUNG", + disease: str = "LUNG_CANCER", +) -> platform.InputItem: + """Build a single WSI InputItem from spot metadata.""" + return platform.InputItem( + external_id=gs_url, + input_artifacts=[ + platform.InputArtifact( + name="whole_slide_image", + download_url=platform.generate_signed_url( + url=gs_url, + expires_seconds=expires_seconds, + ), + metadata={ + "checksum_base64_crc32c": crc32c, + "width_px": width, + "height_px": height, + "resolution_mpp": resolution_mpp, + "media_type": "image/tiff", + "staining_method": "H&E", + "specimen": { + "tissue": tissue, + "disease": disease, + }, + }, + ) + ], + ) def _get_single_spot_payload_for_heta(expires_seconds: int) -> list[platform.InputItem]: """Generates a payload using a single spot.""" return [ - platform.InputItem( - external_id=SPOT_0_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_0_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_0_CRC32C, - "resolution_mpp": SPOT_0_RESOLUTION_MPP, - "width_px": SPOT_0_WIDTH, - "height_px": SPOT_0_HEIGHT, - "media_type": "image/tiff", - "staining_method": "H&E", - "specimen": { - "tissue": "LUNG", - "disease": "LUNG_CANCER", - }, - }, - ) - ], - ), + _build_wsi_input_item( + SPOT_0_GS_URL, SPOT_0_CRC32C, SPOT_0_WIDTH, SPOT_0_HEIGHT, SPOT_0_RESOLUTION_MPP, expires_seconds + ) ] def _get_three_spots_payload_for_test(expires_seconds: int) -> list[platform.InputItem]: """Generates a payload using three spots.""" return [ - platform.InputItem( - external_id=SPOT_1_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_1_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_1_CRC32C, - "width_px": SPOT_1_WIDTH, - "height_px": SPOT_1_HEIGHT, - "resolution_mpp": SPOT_1_RESOLUTION_MPP, - "media_type": "image/tiff", - }, - ) - ], + _build_wsi_input_item( + SPOT_1_GS_URL, + SPOT_1_CRC32C, + SPOT_1_WIDTH, + SPOT_1_HEIGHT, + SPOT_1_RESOLUTION_MPP, + expires_seconds, + tissue=SPOT_1_TISSUE, + disease=SPOT_1_DISEASE, ), - platform.InputItem( - external_id=SPOT_2_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_2_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_2_CRC32C, - "width_px": SPOT_2_WIDTH, - "height_px": SPOT_2_HEIGHT, - "resolution_mpp": SPOT_2_RESOLUTION_MPP, - "media_type": "image/tiff", - }, - ) - ], + _build_wsi_input_item( + SPOT_2_GS_URL, SPOT_2_CRC32C, SPOT_2_WIDTH, SPOT_2_HEIGHT, SPOT_2_RESOLUTION_MPP, expires_seconds ), - platform.InputItem( - external_id=SPOT_3_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_3_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_3_CRC32C, - "width_px": SPOT_3_WIDTH, - "height_px": SPOT_3_HEIGHT, - "resolution_mpp": SPOT_3_RESOLUTION_MPP, - "media_type": "image/tiff", - }, - ) - ], + _build_wsi_input_item( + SPOT_3_GS_URL, SPOT_3_CRC32C, SPOT_3_WIDTH, SPOT_3_HEIGHT, SPOT_3_RESOLUTION_MPP, expires_seconds ), ] -def _get_spots_payload_for_special(expires_seconds: int, count: int) -> list[platform.InputItem]: - """Generates a payload using count many spots. +def _get_spots_payload_for_test_app(expires_seconds: int, count: int) -> list[platform.InputItem]: + """Generates a minimal payload for the test application using count many spots. - Optimized for large counts (e.g., 100k items): - - Generates signed URL once (all items use same source file) - - Pre-builds metadata dicts once (identical across all items) - - Args: - expires_seconds: Expiration time for signed URLs in seconds. - count: Number of items to generate. - - Returns: - List of InputItem objects for the special application. + Optimized for large counts (e.g., 2000 items): + - Generates signed URL once (all items use the same source file) + - Pre-builds metadata dict once (identical across all items) """ if count <= 0: return [] - - signed_url = platform.generate_signed_url( - url=SPOT_1_GS_URL, - expires_seconds=expires_seconds, - ) - wsi_metadata = { - "checksum_base64_crc32c": SPOT_1_CRC32C, - "width_px": SPOT_1_WIDTH, - "height_px": SPOT_1_HEIGHT, - "resolution_mpp": SPOT_1_RESOLUTION_MPP, - "media_type": "image/tiff", - "staining_method": "H&E", - "specimen": { - "tissue": "LUNG", - "disease": "LUNG_CANCER", - }, - } - normalization_metadata = { - "checksum_base64_crc32c": SPOT_1_CRC32C, - "width_px": SPOT_1_WIDTH, - "height_px": SPOT_1_HEIGHT, - "resolution_mpp": SPOT_1_RESOLUTION_MPP, - "media_type": "image/tiff", - } + signed_url = platform.generate_signed_url(url=SPOT_1_GS_URL, expires_seconds=expires_seconds) + metadata = {"checksum_base64_crc32c": SPOT_1_CRC32C, "media_type": "image/tiff"} return [ platform.InputItem( external_id=f"{SPOT_1_GS_URL}&spot_index={index}", input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=signed_url, - metadata=wsi_metadata, - ), - platform.InputArtifact( - name="normalization:wsi", - download_url=signed_url, - metadata=normalization_metadata, - ), + platform.InputArtifact(name="whole_slide_image", download_url=signed_url, metadata=metadata), ], ) for index in range(count) @@ -620,108 +558,97 @@ def test_platform_heta_app_submit() -> None: @pytest.mark.e2e -@pytest.mark.stress_only @pytest.mark.long_running -@pytest.mark.timeout(timeout=SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS) -def test_platform_special_app_submit() -> None: - """Test application runs with the special application. +@pytest.mark.scheduled_only +@pytest.mark.timeout(timeout=HETA_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS) +def test_platform_heta_app_find_and_validate() -> None: + """Test application runs with the HETA application. - This test submits an application run with the special application and validates the submission. + This test finds an application run with the HETA application submitted earlier and + validates it completed successfully and in time. - The test behavior varies based on the current minute when triggered by cron (*/10): - - Minutes 0-9 (every 6th run): Uses 1000 items instead of 100 - - Minutes 40-49 (every 4th run): Uses 2h due date / 3h deadline instead of 20h due date / 24h deadline + Raises: + AssertionError: If any of the validation checks fail. + """ + _find_and_validate( + application_id=HETA_APPLICATION_ID, + application_version=HETA_APPLICATION_VERSION, + ) + + +@pytest.mark.e2e +@pytest.mark.stress_only +@pytest.mark.long_running +@pytest.mark.timeout(timeout=TEST_APP_STRESS_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS) +def test_platform_test_app_stress_submit() -> None: + """Test application runs with the test application under stress conditions. + + Submits a large batch of slides and validates the submission. Batch size and + scheduling vary based on the current minute when triggered by cron (*/10): + - Minutes 0-9 (every 6th run): 2000 items + - Minutes 20-29 (every 6th run): 2000 items + - Minutes 40-49 (every 4th run): 2h due date / 3h deadline instead of defaults + - All other minutes: 100 items Raises: AssertionError: If any of the validation checks fail. """ - # Determine run configuration based on current minute - # Cron runs every 10 minutes (*/10, in _scheduled-test-stress.yml), - # so we check which 10-minute window we're in current_minute = datetime.now(tz=UTC).minute is_on_00 = 0 <= current_minute <= 9 is_on_20 = 20 <= current_minute <= 29 is_on_40 = 40 <= current_minute <= 49 if is_on_00: - slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_00 + slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_00 elif is_on_20: - slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_20 + slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_20 else: - slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT + slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT deadline_seconds = ( - SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 + TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 if is_on_40 - else SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS + else TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS ) due_date_seconds = ( - SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 + TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 if is_on_40 - else SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS + else TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS ) logger.info( - f"Special app submit config: minute={current_minute}, is_on_00={is_on_00}, is_on_40={is_on_40}, " + f"Test app stress submit: minute={current_minute}, is_on_00={is_on_00}, is_on_40={is_on_40}, " f"slide_count={slide_count}, deadline_seconds={deadline_seconds}, due_date_seconds={due_date_seconds}" ) - logger.trace( - f"Generating special application payload with {slide_count} spots for " - f"{SPECIAL_APPLICATION_ID} version {SPECIAL_APPLICATION_VERSION}" - ) - payload = _get_spots_payload_for_special( + payload = _get_spots_payload_for_test_app( expires_seconds=deadline_seconds + 60 * 5, count=slide_count, ) - logger.debug(f"Generated special application payload: {payload}") _submit_and_validate( - application_id=SPECIAL_APPLICATION_ID, - application_version=SPECIAL_APPLICATION_VERSION, + application_id=TEST_APPLICATION_ID, + application_version=TEST_APPLICATION_VERSION, payload=payload, deadline_seconds=deadline_seconds, due_date_seconds=due_date_seconds, - tags={"test_platform_special_app_submit", "special", "stress", "stress_only"}, + tags={"test_platform_test_app_stress_submit", "stress", "stress_only"}, ) - logger.debug("Special application payload submitted successfully") @pytest.mark.e2e @pytest.mark.stress_only @pytest.mark.long_running @pytest.mark.scheduled_only -@pytest.mark.timeout(timeout=SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS) -def test_platform_special_app_find_and_validate() -> None: - """Test application runs with the special application. - - This test finds an application run with the special application submitted earlier and - validates it completed successfully and in time. +@pytest.mark.timeout(timeout=TEST_APP_STRESS_FIND_AND_VALIDATE_TIMEOUT_SECONDS) +def test_platform_test_app_stress_find_and_validate() -> None: + """Find and validate a previously submitted test application stress run. Raises: AssertionError: If any of the validation checks fail. """ _find_and_validate( - application_id=SPECIAL_APPLICATION_ID, - application_version=SPECIAL_APPLICATION_VERSION, - ) - - -@pytest.mark.e2e -@pytest.mark.long_running -@pytest.mark.scheduled_only -@pytest.mark.timeout(timeout=HETA_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS) -def test_platform_heta_app_find_and_validate() -> None: - """Test application runs with the HETA application. - - This test finds an application run with the HETA application submitted earlier and - validates it completed successfully and in time. - - Raises: - AssertionError: If any of the validation checks fail. - """ - _find_and_validate( - application_id=HETA_APPLICATION_ID, - application_version=HETA_APPLICATION_VERSION, + application_id=TEST_APPLICATION_ID, + application_version=TEST_APPLICATION_VERSION, ) diff --git a/tests/aignostics/qupath/gui_test.py b/tests/aignostics/qupath/gui_test.py index 01d9a1b6d..0fdd07a7a 100644 --- a/tests/aignostics/qupath/gui_test.py +++ b/tests/aignostics/qupath/gui_test.py @@ -257,8 +257,9 @@ async def test_gui_run_qupath_install_to_inspect( # noqa: C901, PLR0912, PLR091 # Check for files in the results directory files_in_results_dir = list(results_dir.glob("*")) - assert len(files_in_results_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: " + expected_count = len(SPOT_0_EXPECTED_RESULT_FILES) + assert len(files_in_results_dir) == expected_count, ( + f"Expected {expected_count} files in {results_dir}, but found {len(files_in_results_dir)}: " f"{[f.name for f in files_in_results_dir]}" ) diff --git a/tests/constants_test.py b/tests/constants_test.py index f9385b290..aa18676ee 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -19,15 +19,21 @@ SPOT_0_HEIGHT = 7196 SPOT_1_GS_URL = ( - "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" + "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/1603ba4c-398a-49db-926b-c14d8f17dc83.tiff" ) -SPOT_1_FILENAME = "9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" -SPOT_1_CRC32C = "9l3NNQ==" -SPOT_1_FILESIZE = 14681750 -SPOT_1_RESOLUTION_MPP = 0.46499982 -SPOT_1_WIDTH = 3728 -SPOT_1_HEIGHT = 3640 - +SPOT_1_FILENAME = "1603ba4c-398a-49db-926b-c14d8f17dc83.tiff" +SPOT_1_CRC32C = "MKWV1g==" +SPOT_1_FILESIZE = 8942460 +SPOT_1_RESOLUTION_MPP = 0.25 +SPOT_1_WIDTH = 6649 +SPOT_1_HEIGHT = 6578 +SPOT_1_TISSUE = "BREAST" +SPOT_1_DISEASE = "BREAST_CANCER" + +# SPOT_2, SPOT_3 (and the former SPOT_1 / 9375e3ed): these slides have a known 10x resolution +# ambiguity — certain VIPS versions read their MPP as ~0.0465 instead of ~0.465 due to differing +# interpretations of the TIFF ResolutionUnit tag. The values below reflect the correct 0.465 MPP. +# If a test fails with an off-by-10x resolution error, check the VIPS version in use. SPOT_2_GS_URL = ( "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/8c7b079e-8b8a-4036-bfde-5818352b503a.tiff" ) @@ -46,13 +52,23 @@ SPOT_3_WIDTH = 4016 SPOT_3_HEIGHT = 3952 +SPOT_4_GS_URL = ( + "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" +) +SPOT_4_FILENAME = "9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" +SPOT_4_CRC32C = "9l3NNQ==" +SPOT_4_FILESIZE = 14681750 +SPOT_4_RESOLUTION_MPP = 0.46499982 +SPOT_4_WIDTH = 3728 +SPOT_4_HEIGHT = 3640 + match os.getenv("AIGNOSTICS_PLATFORM_ENVIRONMENT", "production"): case "production": TEST_APPLICATION_ID = "test-app" - TEST_APPLICATION_VERSION = "0.0.6" + TEST_APPLICATION_VERSION = "1.0.0" HETA_APPLICATION_ID = "he-tme" - HETA_APPLICATION_VERSION = "1.1.1" + HETA_APPLICATION_VERSION = "1.2.0" TEST_APPLICATION_VERSION_USE_LATEST_FALLBACK_SKIP = False PIPELINE_GPU_TYPE = "L4" @@ -60,80 +76,88 @@ PIPELINE_GPU_FLEX_START_MAX_RUN_DURATION_MINUTES = None PIPELINE_MAX_GPUS_PER_SLIDE = 1 PIPELINE_CPU_PROVISIONING_MODE = "SPOT" - PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = ( - 30 # Respected starting with 1.0.0-sl.4.1+internal, until then set to 60min by application itself. - ) - - SPECIAL_APPLICATION_ID = "test-app" - SPECIAL_APPLICATION_VERSION = "0.99.0" + PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 25 + # To update file sizes: the tests print every file's actual size before asserting. Run with + # -s to see them, then paste the printed byte values as the second element of each tuple. + # SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov + # SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) SPOT_1_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 469040, 10), - ("tissue_qc_geojson_polygons.json", 177779, 10), - ("tissue_segmentation_geojson_polygons.json", 205951, 10), - ("readout_generation_slide_readouts.csv", 299654, 10), - ("readout_generation_cell_readouts.csv", 2387860, 10), - ("cell_classification_geojson_polygons.json", 16687724, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 536582, 10), - ("tissue_segmentation_csv_class_information.csv", 441, 10), - ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_segmentation_map_image.tiff", 1288632, 10), + ("tissue_qc_geojson_polygons.json", 75293, 10), + ("tissue_segmentation_geojson_polygons.json", 152317, 10), + ("readout_generation_slide_readouts.csv", 299381, 10), + ("readout_generation_cell_readouts.csv", 466725, 10), + ("cell_classification_geojson_polygons.json", 2812005, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), + ("tissue_segmentation_csv_class_information.csv", 446, 10), + ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case "staging": TEST_APPLICATION_ID = "test-app" - TEST_APPLICATION_VERSION = "0.0.6" + TEST_APPLICATION_VERSION = "1.0.0" HETA_APPLICATION_ID = "he-tme" - HETA_APPLICATION_VERSION = "1.1.1" + HETA_APPLICATION_VERSION = "1.2.0" TEST_APPLICATION_VERSION_USE_LATEST_FALLBACK_SKIP = True PIPELINE_GPU_TYPE = "L4" PIPELINE_GPU_PROVISIONING_MODE = "SPOT" PIPELINE_GPU_FLEX_START_MAX_RUN_DURATION_MINUTES = None - PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 PIPELINE_MAX_GPUS_PER_SLIDE = 1 PIPELINE_CPU_PROVISIONING_MODE = "SPOT" - PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 - - SPECIAL_APPLICATION_ID = "test-app" - SPECIAL_APPLICATION_VERSION = "0.99.0" + PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 25 + # See production block above for instructions on how to update these sizes. SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) SPOT_1_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 469040, 10), - ("tissue_qc_geojson_polygons.json", 177779, 10), - ("tissue_segmentation_geojson_polygons.json", 205951, 10), - ("readout_generation_slide_readouts.csv", 299654, 10), - ("readout_generation_cell_readouts.csv", 2387860, 10), - ("cell_classification_geojson_polygons.json", 16687724, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 536582, 10), - ("tissue_segmentation_csv_class_information.csv", 441, 10), - ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_segmentation_map_image.tiff", 1288632, 10), + ("tissue_qc_geojson_polygons.json", 75293, 10), + ("tissue_segmentation_geojson_polygons.json", 152317, 10), + ("readout_generation_slide_readouts.csv", 299381, 10), + ("readout_generation_cell_readouts.csv", 466725, 10), + ("cell_classification_geojson_polygons.json", 2812005, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), + ("tissue_segmentation_csv_class_information.csv", 446, 10), + ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case _: