From dfe0c753ab5054151f2ec917c07e68d74aec0b78 Mon Sep 17 00:00:00 2001 From: iosefa Date: Fri, 26 Jun 2026 15:46:02 +1300 Subject: [PATCH 1/2] Add in-memory height above ground calculation --- .../getting-started-import-and-preprocess.md | 37 +++++ pyforestscan/filters.py | 131 +++++++++++++++- tests/test_filters.py | 143 +++++++++++++++++- 3 files changed, 308 insertions(+), 3 deletions(-) diff --git a/docs/usage/getting-started-import-and-preprocess.md b/docs/usage/getting-started-import-and-preprocess.md index a68c7ba..fc48bf6 100644 --- a/docs/usage/getting-started-import-and-preprocess.md +++ b/docs/usage/getting-started-import-and-preprocess.md @@ -40,6 +40,43 @@ cleaned_arrays = remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0) classified_arrays = classify_ground_points(cleaned_arrays) ``` +### Adding Height Above Ground + +Forest structure metrics require `HeightAboveGround`; raw elevation (`Z`) is not a substitute. If `HeightAboveGround` is not already present in your point cloud arrays, add it either while reading the data or afterward with `add_height_above_ground`. If you are reading directly from a point cloud file, you can calculate Height Above Ground during import: + +```python +from pyforestscan.handlers import read_lidar + +file_path = "../example_data/20191210_5QKB020880.laz" +arrays = read_lidar(file_path, "EPSG:32605", hag=True) +points = arrays[0] +``` + +If your points are already in memory, use `add_height_above_ground`. The default method uses PDAL's Delaunay Height Above Ground filter and requires ground points classified as `Classification == 2`: + +```python +from pyforestscan.filters import add_height_above_ground + +hag_arrays = add_height_above_ground(classified_arrays) +points = hag_arrays[0] +``` + +You can also calculate Height Above Ground from a DTM raster. This method only requires `X`, `Y`, and `Z` fields in the point array, and the DTM must be in the same coordinate reference system as the points: + +```python +from pyforestscan.filters import add_height_above_ground + +dtm_path = "../example_data/20191210_5QKB020880_DS05_dtm.tif" +hag_arrays = add_height_above_ground(arrays, dtm=dtm_path) +points = hag_arrays[0] +``` + +The DTM method can also be selected explicitly: + +```python +hag_arrays = add_height_above_ground(arrays, method="dtm", dtm=dtm_path) +``` + ## Exporting Point Clouds pyforestscan supports exporting processed point clouds to las and laz formats. To export a point cloud as a LAZ file: diff --git a/pyforestscan/filters.py b/pyforestscan/filters.py index 7dcd545..b353b00 100644 --- a/pyforestscan/filters.py +++ b/pyforestscan/filters.py @@ -1,9 +1,136 @@ from typing import List import math +import os +import numpy as np from pyforestscan.handlers import _build_pdal_pipeline -from pyforestscan.pipeline import _filter_hag, _filter_ground, _filter_statistical_outlier, _filter_smrf, \ - _filter_radius, _select_ground, _filter_voxeldownsize, _filter_pointsourceid +from pyforestscan.pipeline import ( + _filter_hag, + _filter_ground, + _filter_pointsourceid, + _filter_radius, + _filter_smrf, + _filter_statistical_outlier, + _filter_voxeldownsize, + _hag_delaunay, + _hag_raster, + _select_ground, +) + + +def add_height_above_ground(existing_points, method=None, dtm=None) -> List: + """ + Add HeightAboveGround values to in-memory point cloud arrays. + + This applies a PDAL Height Above Ground filter directly to existing + NumPy structured arrays. By default, the Delaunay method uses points + classified as ground (Classification == 2) as the terrain surface. When + ``dtm`` is provided, the DTM raster method is used unless ``method`` is + explicitly set. + + Args: + existing_points (np.ndarray or list): A single structured point cloud + array, or a list/tuple of arrays, containing at least 'X', 'Y', 'Z', + and, for the Delaunay method, 'Classification' fields. + method (str, optional): Height Above Ground method. Supported values + are 'delaunay' and 'dtm' (also accepts 'raster'). If None, this + defaults to 'dtm' when a ``dtm`` path is provided, otherwise + 'delaunay'. + dtm (str or path-like, optional): Path to a DTM GeoTIFF for the DTM + method. + + Returns: + list: Point cloud arrays with a 'HeightAboveGround' dimension. + + Raises: + TypeError: If the input is not a structured NumPy array or iterable of + structured NumPy arrays. + ValueError: If required dimensions are missing, no points are supplied, + an unsupported method is requested, or no ground points + (Classification == 2) are present for the Delaunay method. + FileNotFoundError: If the requested DTM file does not exist. + RuntimeError: If the PDAL HAG pipeline fails. + """ + if method is None: + method = "dtm" if dtm is not None else "delaunay" + elif not isinstance(method, str): + raise TypeError("method must be 'delaunay', 'dtm', or 'raster'.") + + method_lc = method.lower() + if method_lc == "raster": + method_lc = "dtm" + if method_lc not in {"delaunay", "dtm"}: + raise ValueError("method must be one of: 'delaunay', 'dtm', or 'raster'.") + + if method_lc == "dtm": + if dtm is None: + raise ValueError("dtm must be provided when method is 'dtm' or 'raster'.") + dtm_path = os.fspath(dtm) + if not os.path.isfile(dtm_path): + raise FileNotFoundError(f"No such DTM file: '{dtm_path}'") + if not dtm_path.lower().endswith((".tif", ".tiff")): + raise ValueError("The DTM file must be a .tif or .tiff file.") + else: + dtm_path = None + + if isinstance(existing_points, np.ndarray): + arrays = [existing_points] + else: + try: + arrays = list(existing_points) + except TypeError as exc: + raise TypeError( + "existing_points must be a structured NumPy array or an iterable " + "of structured NumPy arrays." + ) from exc + + if not arrays: + raise ValueError("At least one point cloud array is required.") + + required_fields = {"X", "Y", "Z"} + if method_lc == "delaunay": + required_fields.add("Classification") + + total_points = 0 + ground_points = 0 + + for idx, arr in enumerate(arrays): + dtype_names = getattr(getattr(arr, "dtype", None), "names", None) + if dtype_names is None: + raise TypeError( + f"Point cloud array at index {idx} must be a structured NumPy array." + ) + + missing_fields = sorted(required_fields.difference(dtype_names)) + if missing_fields: + raise ValueError( + f"Point cloud array at index {idx} is missing required fields: " + f"{', '.join(missing_fields)}." + ) + + total_points += len(arr) + if len(arr) and method_lc == "delaunay": + ground_points += int(np.count_nonzero(arr["Classification"] == 2)) + + if total_points == 0: + raise ValueError("At least one point is required to calculate HeightAboveGround.") + if method_lc == "delaunay" and ground_points == 0: + raise ValueError( + "At least one ground point with Classification == 2 is required " + "to calculate HeightAboveGround." + ) + + pipeline_stages = [_hag_raster(dtm_path)] if method_lc == "dtm" else [_hag_delaunay()] + + try: + pipeline = _build_pdal_pipeline(arrays, pipeline_stages) + except RuntimeError as e: + raise RuntimeError(f"Height Above Ground Pipeline Failed: {e}") + + if not pipeline.arrays: + raise ValueError("No data returned after HeightAboveGround calculation.") + + return pipeline.arrays def filter_hag(arrays, lower_limit=0, upper_limit=None) -> List: diff --git a/tests/test_filters.py b/tests/test_filters.py index 0cf962b..e9856dc 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -1,7 +1,9 @@ import numpy as np +import pytest +import pyforestscan.filters as filter_module from pyforestscan.filters import ( - filter_hag, filter_ground, filter_select_ground + add_height_above_ground, filter_hag, filter_ground, filter_select_ground ) @@ -30,6 +32,145 @@ def create_synthetic_point_cloud(num_points=100, include_ground=True, classifica return point_cloud +def create_classified_point_cloud(): + """ + Creates a small structured point cloud with classified ground points. + """ + dtype = [('X', 'f8'), ('Y', 'f8'), ('Z', 'f8'), ('Classification', 'u1')] + return np.array([ + (0.0, 0.0, 10.0, 2), + (1.0, 0.0, 10.5, 2), + (0.0, 1.0, 9.5, 2), + (1.0, 1.0, 18.0, 1), + ], dtype=dtype) + + +def create_xyz_point_cloud(): + """ + Creates a small structured point cloud without classification. + """ + dtype = [('X', 'f8'), ('Y', 'f8'), ('Z', 'f8')] + return np.array([ + (0.0, 0.0, 10.0), + (1.0, 0.0, 10.5), + (0.0, 1.0, 9.5), + (1.0, 1.0, 18.0), + ], dtype=dtype) + + +def test_add_height_above_ground(monkeypatch): + """ + Test that add_height_above_ground runs an in-memory HAG Delaunay pipeline. + """ + point_cloud = create_classified_point_cloud() + output_dtype = point_cloud.dtype.descr + [('HeightAboveGround', 'f8')] + output = np.empty(point_cloud.shape, dtype=output_dtype) + for name in point_cloud.dtype.names: + output[name] = point_cloud[name] + output['HeightAboveGround'] = [0.0, 0.0, 0.0, 8.0] + + captured = {} + + class FakePipeline: + arrays = [output] + + def fake_build_pdal_pipeline(arrays, pipeline_stages): + captured["arrays"] = arrays + captured["pipeline_stages"] = pipeline_stages + return FakePipeline() + + monkeypatch.setattr(filter_module, "_build_pdal_pipeline", fake_build_pdal_pipeline) + + hag_arrays = add_height_above_ground(point_cloud) + + assert isinstance(hag_arrays, list) + assert hag_arrays[0] is output + assert captured["arrays"][0] is point_cloud + assert captured["pipeline_stages"] == [{"type": "filters.hag_delaunay"}] + assert "HeightAboveGround" in hag_arrays[0].dtype.names + + +def test_add_height_above_ground_with_dtm(monkeypatch, tmp_path): + """ + Test that add_height_above_ground can use an in-memory DTM HAG pipeline. + """ + point_cloud = create_xyz_point_cloud() + dtm = tmp_path / "dtm.tif" + dtm.write_bytes(b"") + + output_dtype = point_cloud.dtype.descr + [('HeightAboveGround', 'f8')] + output = np.empty(point_cloud.shape, dtype=output_dtype) + for name in point_cloud.dtype.names: + output[name] = point_cloud[name] + output['HeightAboveGround'] = [1.0, 1.5, 0.5, 9.0] + + captured = {} + + class FakePipeline: + arrays = [output] + + def fake_build_pdal_pipeline(arrays, pipeline_stages): + captured["arrays"] = arrays + captured["pipeline_stages"] = pipeline_stages + return FakePipeline() + + monkeypatch.setattr(filter_module, "_build_pdal_pipeline", fake_build_pdal_pipeline) + + hag_arrays = add_height_above_ground(point_cloud, dtm=dtm) + + assert isinstance(hag_arrays, list) + assert hag_arrays[0] is output + assert captured["arrays"][0] is point_cloud + assert captured["pipeline_stages"] == [ + {"type": "filters.hag_dem", "raster": str(dtm)} + ] + assert "HeightAboveGround" in hag_arrays[0].dtype.names + + +def test_add_height_above_ground_dtm_method_requires_dtm(): + """ + Test that the DTM method requires a DTM path. + """ + point_cloud = create_xyz_point_cloud() + + with pytest.raises(ValueError, match="dtm must be provided"): + add_height_above_ground(point_cloud, method="dtm") + + +def test_add_height_above_ground_rejects_unknown_method(): + """ + Test that only supported Height Above Ground methods are accepted. + """ + point_cloud = create_classified_point_cloud() + + with pytest.raises(ValueError, match="method must be one of"): + add_height_above_ground(point_cloud, method="nearest") + + +def test_add_height_above_ground_missing_required_field(): + """ + Test that add_height_above_ground requires the dimensions needed by PDAL. + """ + point_cloud = np.array( + [(0.0, 0.0, 10.0)], + dtype=[('X', 'f8'), ('Y', 'f8'), ('Z', 'f8')] + ) + + with pytest.raises(ValueError, match="Classification"): + add_height_above_ground(point_cloud) + + +def test_add_height_above_ground_requires_ground_points(): + """ + Test that add_height_above_ground validates classified ground points. + """ + point_cloud = create_classified_point_cloud() + point_cloud['Classification'] = 1 + + with pytest.raises(ValueError, match="ground point"): + add_height_above_ground(point_cloud) + + def test_filter_hag(): """ Test that filter_hag correctly filters points within the specified HAG limits. From b859c6eed5b69d824d87127edaf832043661d60c Mon Sep 17 00:00:00 2001 From: iosefa Date: Fri, 26 Jun 2026 17:05:12 +1300 Subject: [PATCH 2/2] Run CI tests with conda Python --- .github/workflows/main.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 20b9b75..289126e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,20 +29,22 @@ jobs: - name: Create Conda environment with Python and PDAL run: | - conda create --name pyforestscan_env python=${{ matrix.python-version }} pdal gdal -c conda-forge -v + conda create --name pyforestscan_env python=${{ matrix.python-version }} pip pdal gdal -c conda-forge -v - name: Activate Conda environment and install dependencies shell: bash -l {0} run: | conda activate pyforestscan_env - pip install -r requirements.txt - pip install -r requirements-dev.txt + python -m pip install --no-user -r requirements.txt + python -m pip install --no-user -r requirements-dev.txt - name: Run tests shell: bash -l {0} + env: + PYTHONNOUSERSITE: "1" run: | conda activate pyforestscan_env - pytest --cov --cov-report=xml + python -m pytest --cov --cov-report=xml - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v5