Skip to content

Commit 26ef5d5

Browse files
feat: validate undeclared SigMF extensions in metadata (#104)
* raise warning on undeclared extensions in metadata * add test case for declared & undeclared namespaces --------- Co-authored-by: Teque5 <teque5@gmail.com>
1 parent 2ae107f commit 26ef5d5

File tree

4 files changed

+82
-12
lines changed

4 files changed

+82
-12
lines changed

sigmf/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# SPDX-License-Identifier: LGPL-3.0-or-later
66

77
# version of this python module
8-
__version__ = "1.2.10"
8+
__version__ = "1.2.11"
99
# matching version of the SigMF specification
1010
__specification__ = "1.2.5"
1111

sigmf/sigmffile.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,6 @@ def validate(self):
604604
"""
605605
Check schema and throw error if issue.
606606
"""
607-
version = self.get_global_field(self.VERSION_KEY)
608607
validate.validate(self._metadata, self.get_schema())
609608

610609
def archive(self, name=None, fileobj=None):

sigmf/validate.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import logging
1212
import os
1313
import sys
14+
import warnings
1415

1516
# multi-threading library - should work well as I/O will be the primary
1617
# cost for small SigMF files. Swap to ProcessPool if files are large.
@@ -25,6 +26,34 @@
2526
from . import error, schema, sigmffile
2627

2728

29+
def _get_namespaces_declared(metadata: dict) -> set:
30+
"""Get set of declared extension namespaces."""
31+
extensions = metadata.get("global", {}).get(sigmffile.SigMFFile.EXTENSIONS_KEY, [])
32+
return {ext["name"].split(":")[0] for ext in extensions}
33+
34+
35+
def _get_namespaces_used(metadata: dict) -> set:
36+
"""Get set of used extension namespaces."""
37+
used = set()
38+
39+
def check_dict(ddd: dict):
40+
"""Check keys for non-core namespaces."""
41+
for key in ddd:
42+
if ":" in key:
43+
namespace = key.split(":")[0]
44+
if namespace != "core":
45+
used.add(namespace)
46+
47+
for section in metadata:
48+
if isinstance(metadata[section], dict):
49+
check_dict(metadata[section])
50+
elif isinstance(metadata[section], list):
51+
for item in metadata[section]:
52+
check_dict(item)
53+
54+
return used
55+
56+
2857
def validate(metadata, ref_schema=schema.get_schema()) -> None:
2958
"""
3059
Check that the provided `metadata` dict is valid according to the `ref_schema` dict.
@@ -46,11 +75,22 @@ def validate(metadata, ref_schema=schema.get_schema()) -> None:
4675
"""
4776
jsonschema.validators.validate(instance=metadata, schema=ref_schema)
4877

78+
# check namespaces
79+
undeclared = _get_namespaces_used(metadata) - _get_namespaces_declared(metadata)
80+
if undeclared:
81+
warnings.warn(
82+
f"Found undeclared extensions in use: {', '.join(sorted(undeclared))}. "
83+
f"All extensions should be declared in {sigmffile.SigMFFile.EXTENSIONS_KEY}. "
84+
"This will raise a ValidationError in future versions.",
85+
DeprecationWarning,
86+
stacklevel=2,
87+
)
88+
4989
# ensure captures and annotations have monotonically increasing sample_start
5090
for key in ["captures", "annotations"]:
5191
count = -1
5292
for item in metadata[key]:
53-
new_count = item["core:sample_start"]
93+
new_count = item[sigmffile.SigMFFile.START_INDEX_KEY]
5494
if new_count < count:
5595
raise jsonschema.exceptions.ValidationError(f"{key} has incorrect sample start ordering.")
5696
count = new_count
@@ -121,7 +161,7 @@ def main(arg_tuple: Optional[Tuple[str, ...]] = None) -> None:
121161
n_total = len(paths)
122162
# estimate number of CPU cores
123163
# https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python
124-
est_num_workers = len(os.sched_getaffinity(0)) if os.name == 'posix' else os.cpu_count()
164+
est_num_workers = len(os.sched_getaffinity(0)) if os.name == "posix" else os.cpu_count()
125165
# create a thread pool
126166
# https://docs.python.org/3.7/library/concurrent.futures.html#threadpoolexecutor
127167
with ThreadPoolExecutor(max_workers=est_num_workers) as executor:

tests/test_validation.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
"""Tests for Validator"""
88

9+
import copy
910
import tempfile
1011
import unittest
1112
from pathlib import Path
@@ -18,9 +19,12 @@
1819
from .testdata import TEST_FLOAT32_DATA, TEST_METADATA
1920

2021

21-
def test_valid_data():
22-
"""ensure the default metadata is OK"""
23-
SigMFFile(TEST_METADATA).validate()
22+
class NominalCases(unittest.TestCase):
23+
"""Cases where the validator should succeed."""
24+
25+
def test_nominal(self):
26+
"""nominal case should pass"""
27+
SigMFFile(copy.deepcopy(TEST_METADATA)).validate()
2428

2529

2630
class CommandLineValidator(unittest.TestCase):
@@ -32,7 +36,7 @@ def setUp(self):
3236
self.tmp_path = tmp_path = Path(self.tmp_dir.name)
3337
junk_path = tmp_path / "junk"
3438
TEST_FLOAT32_DATA.tofile(junk_path)
35-
some_meta = SigMFFile(TEST_METADATA, data_file=junk_path)
39+
some_meta = SigMFFile(copy.deepcopy(TEST_METADATA), data_file=junk_path)
3640
some_meta.tofile(tmp_path / "a")
3741
some_meta.tofile(tmp_path / "b")
3842
some_meta.tofile(tmp_path / "c", toarchive=True)
@@ -75,13 +79,14 @@ class FailingCases(unittest.TestCase):
7579
"""Cases where the validator should raise an exception."""
7680

7781
def setUp(self):
78-
self.metadata = dict(TEST_METADATA)
82+
self.metadata = copy.deepcopy(TEST_METADATA)
7983

8084
def test_no_version(self):
81-
"""core:version must be present"""
82-
del self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.VERSION_KEY]
85+
"""version key must be present"""
86+
meta = SigMFFile(copy.deepcopy(self.metadata))
87+
del meta._metadata[SigMFFile.GLOBAL_KEY][SigMFFile.VERSION_KEY]
8388
with self.assertRaises(ValidationError):
84-
SigMFFile(self.metadata).validate()
89+
meta.validate()
8590

8691
def test_extra_top_level_key(self):
8792
"""no extra keys allowed on the top level"""
@@ -128,3 +133,29 @@ def test_invalid_hash(self):
128133
self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = "derp"
129134
with self.assertRaises(sigmf.error.SigMFFileError):
130135
SigMFFile(metadata=self.metadata, data_file=temp_file.name)
136+
137+
138+
class CheckNamespace(unittest.TestCase):
139+
"""Cases where namespace issues are involved"""
140+
141+
def setUp(self):
142+
self.metadata = copy.deepcopy(TEST_METADATA)
143+
144+
def test_undeclared_namespace(self):
145+
"""unknown namespace should raise a warning"""
146+
self.metadata[SigMFFile.GLOBAL_KEY]["other_namespace:key"] = 0
147+
with self.assertWarns(Warning):
148+
SigMFFile(self.metadata).validate()
149+
150+
def test_declared_namespace(self):
151+
"""known namespace should not raise a warning"""
152+
self.metadata[SigMFFile.GLOBAL_KEY]["other_namespace:key"] = 0
153+
# define other_namespace
154+
self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.EXTENSIONS_KEY] = [
155+
{
156+
"name": "other_namespace",
157+
"version": "0.0.1",
158+
"optional": False,
159+
}
160+
]
161+
SigMFFile(self.metadata).validate()

0 commit comments

Comments
 (0)