Skip to content

Commit ef0d8ce

Browse files
authored
unify sample access via constructor autoscale parameter (#121)
* Make slicing equivalent to reading for integer types * e.g. meta[0:10] == meta.read_samples(count=10) * Breaking API Change: autoscale and raw_components parameters removed from read methods * Move autoscale configuration from method parameters to SigMFFile constructor * Remove already deprecated raw_components parameter from all methods * Update read_samples() and read_samples_in_capture() to use instance autoscale setting * Add autoscale support to fromfile(), fromarchive(), and SigMFArchiveReader * Simplify __getitem__ with unified scaling behavior for consistency * increment minor version
1 parent 99065b7 commit ef0d8ce

File tree

6 files changed

+134
-69
lines changed

6 files changed

+134
-69
lines changed

docs/source/quickstart.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,24 @@ method-based approach.
115115
Only core **global** fields support attribute access. Capture and annotation
116116
fields must still be accessed using the traditional ``get_captures()`` and
117117
``get_annotations()`` methods.
118+
119+
--------------------------------
120+
Control Fixed-Point Data Scaling
121+
--------------------------------
122+
123+
For fixed-point datasets, you can control whether samples are automatically scaled to floating-point values:
124+
125+
.. code-block:: python
126+
127+
import sigmf
128+
129+
# Default behavior: autoscale fixed-point data to [-1.0, 1.0] range
130+
handle = sigmf.fromfile("fixed_point_data.sigmf")
131+
samples = handle.read_samples() # Returns float32/complex64
132+
133+
# Disable autoscaling to access raw integer values
134+
handle_raw = sigmf.fromfile("fixed_point_data.sigmf", autoscale=False)
135+
raw_samples = handle_raw.read_samples() # Returns original integer types
136+
137+
# Both slicing and read_samples() respect the autoscale setting
138+
assert handle[0:10].dtype == handle.read_samples(count=10).dtype

sigmf/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# SPDX-License-Identifier: LGPL-3.0-or-later
66

77
# version of this python module
8-
__version__ = "1.3.0"
8+
__version__ = "1.4.0"
99
# matching version of the SigMF specification
1010
__specification__ = "1.2.6"
1111

sigmf/archivereader.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ class SigMFArchiveReader:
2929
map_readonly : bool, optional
3030
Indicate whether assignments on the numpy.memmap are allowed.
3131
archive_buffer : buffer, optional
32-
32+
Alternative buffer to read archive from.
33+
autoscale : bool, optional
34+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).
3335
3436
Raises
3537
------
@@ -41,7 +43,7 @@ class SigMFArchiveReader:
4143
If metadata is invalid.
4244
"""
4345

44-
def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None):
46+
def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None, autoscale=True):
4547
if name is not None:
4648
path = Path(name)
4749
if path.suffix != SIGMF_ARCHIVE_EXT:
@@ -90,7 +92,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
9092
if data_offset is None:
9193
raise SigMFFileError("No .sigmf-data file found in archive!")
9294

93-
self.sigmffile = SigMFFile(metadata=json_contents)
95+
self.sigmffile = SigMFFile(metadata=json_contents, autoscale=autoscale)
9496
self.sigmffile.validate()
9597

9698
self.sigmffile.set_data_file(

sigmf/sigmffile.py

Lines changed: 75 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ class SigMFFile(SigMFMetafile):
182182
]
183183
VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS}
184184

185-
def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True):
185+
def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True, autoscale=True):
186186
"""
187187
API for SigMF I/O
188188
@@ -198,13 +198,17 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu
198198
When True will skip calculating hash on data_file (if present) to check against metadata.
199199
map_readonly: bool, default True
200200
Indicates whether assignments on the numpy.memmap are allowed.
201+
autoscale: bool, default True
202+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
203+
for all sample reading operations including slicing.
201204
"""
202205
super().__init__()
203206
self.data_file = None
204207
self.data_buffer = None
205208
self.sample_count = 0
206209
self._memmap = None
207210
self.is_complex_data = False # numpy.iscomplexobj(self._memmap) is not adequate for fixed-point complex case
211+
self.autoscale = autoscale
208212

209213
self.set_metadata(metadata)
210214
if global_info is not None:
@@ -310,10 +314,39 @@ def __next__(self):
310314
def __getitem__(self, sli):
311315
mem = self._memmap[sli] # matches behavior of numpy.ndarray.__getitem__()
312316

317+
# apply _return_type conversion if set
313318
if self._return_type is None:
314-
return mem
315-
316-
# is_fixed_point and is_complex
319+
# no special conversion needed
320+
if not self.autoscale:
321+
return mem
322+
else:
323+
# apply autoscaling for fixed-point data when autoscale=True
324+
dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))
325+
is_fixedpoint_data = dtype["is_fixedpoint"]
326+
327+
if is_fixedpoint_data:
328+
# apply scaling for fixed-point data
329+
is_unsigned_data = dtype["is_unsigned"]
330+
component_size = dtype["component_size"]
331+
data_type_out = np.dtype("f4") if not self.is_complex_data else np.dtype("f4, f4")
332+
333+
data = mem.astype(data_type_out)
334+
data = data.view(np.dtype("f4"))
335+
if is_unsigned_data:
336+
data -= 2 ** (component_size * 8 - 1)
337+
data *= 2 ** -(component_size * 8 - 1)
338+
data = data.view(data_type_out)
339+
if self.is_complex_data:
340+
data = data.view(np.complex64)
341+
# for single-channel complex data, flatten the last dimension
342+
if data.ndim > 1 and self.get_num_channels() == 1:
343+
data = data.flatten()
344+
return data[0] if isinstance(sli, int) else data
345+
else:
346+
# floating-point data, no scaling needed
347+
return mem
348+
349+
# handle complex data type conversion
317350
if self._memmap.ndim == 2:
318351
# num_channels == 1
319352
ray = mem[:, 0].astype(self._return_type) + 1.0j * mem[:, 1].astype(self._return_type)
@@ -740,7 +773,7 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False):
740773
self.dump(fp, pretty=pretty)
741774
fp.write("\n") # text files should end in carriage return
742775

743-
def read_samples_in_capture(self, index=0, autoscale=True):
776+
def read_samples_in_capture(self, index=0):
744777
"""
745778
Reads samples from the specified captures segment in its entirety.
746779
@@ -763,9 +796,9 @@ def read_samples_in_capture(self, index=0, autoscale=True):
763796
"an integer number of samples across channels. It may be invalid."
764797
)
765798

766-
return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size(), autoscale, False)
799+
return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size())
767800

768-
def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=False):
801+
def read_samples(self, start_index=0, count=-1):
769802
"""
770803
Reads the specified number of samples starting at the specified index from the associated data file.
771804
@@ -775,16 +808,12 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F
775808
Starting sample index from which to read.
776809
count : int, default -1
777810
Number of samples to read. -1 will read whole file.
778-
autoscale : bool, default True
779-
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
780-
raw_components : bool, default False
781-
If True read and return the sample components (individual I & Q for complex, samples for real)
782-
with no conversions or interleaved channels.
783811
784812
Returns
785813
-------
786814
data : ndarray
787815
Samples are returned as an array of float or complex, with number of dimensions equal to NUM_CHANNELS_KEY.
816+
Scaling behavior depends on the autoscale parameter set during construction.
788817
"""
789818
if count == 0:
790819
raise IOError("Number of samples must be greater than zero, or -1 for all samples.")
@@ -800,9 +829,9 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F
800829

801830
if not self._is_conforming_dataset():
802831
warnings.warn(f"Recording dataset appears non-compliant, resulting data may be erroneous")
803-
return self._read_datafile(first_byte, count * self.num_channels, autoscale, False)
832+
return self._read_datafile(first_byte, count * self.get_num_channels())
804833

805-
def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
834+
def _read_datafile(self, first_byte, nitems):
806835
"""
807836
internal function for reading samples from datafile
808837
"""
@@ -832,18 +861,15 @@ def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
832861
# return reshaped view for num_channels
833862
# first dimension will be double size if `is_complex_data`
834863
data = data.reshape(data.shape[0] // num_channels, num_channels)
835-
if not raw_components:
836-
data = data.astype(data_type_out)
837-
if autoscale and is_fixedpoint_data:
838-
data = data.view(np.dtype("f4"))
839-
if is_unsigned_data:
840-
data -= 2 ** (component_size * 8 - 1)
841-
data *= 2 ** -(component_size * 8 - 1)
842-
data = data.view(data_type_out)
843-
if self.is_complex_data:
844-
data = data.view(np.complex64)
845-
else:
846-
data = data.view(component_type_in)
864+
data = data.astype(data_type_out)
865+
if self.autoscale and is_fixedpoint_data:
866+
data = data.view(np.dtype("f4"))
867+
if is_unsigned_data:
868+
data -= 2 ** (component_size * 8 - 1)
869+
data *= 2 ** -(component_size * 8 - 1)
870+
data = data.view(data_type_out)
871+
if self.is_complex_data:
872+
data = data.view(np.complex64)
847873

848874
if self.data_file is not None:
849875
fp.close()
@@ -1144,18 +1170,34 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None):
11441170
return None
11451171

11461172

1147-
def fromarchive(archive_path, dir=None, skip_checksum=False):
1173+
def fromarchive(archive_path, dir=None, skip_checksum=False, autoscale=True):
11481174
"""Extract an archive and return a SigMFFile.
11491175
11501176
The `dir` parameter is no longer used as this function has been changed to
11511177
access SigMF archives without extracting them.
1178+
1179+
Parameters
1180+
----------
1181+
archive_path: str
1182+
Path to `sigmf-archive` tarball.
1183+
dir: str, optional
1184+
No longer used. Kept for compatibility.
1185+
skip_checksum: bool, default False
1186+
Skip dataset checksum calculation.
1187+
autoscale: bool, default True
1188+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).
1189+
1190+
Returns
1191+
-------
1192+
SigMFFile
1193+
Instance created from archive.
11521194
"""
11531195
from .archivereader import SigMFArchiveReader
11541196

1155-
return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum).sigmffile
1197+
return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum, autoscale=autoscale).sigmffile
11561198

11571199

1158-
def fromfile(filename, skip_checksum=False):
1200+
def fromfile(filename, skip_checksum=False, autoscale=True):
11591201
"""
11601202
Creates and returns a SigMFFile or SigMFCollection instance with metadata loaded from the specified file.
11611203
@@ -1171,6 +1213,8 @@ def fromfile(filename, skip_checksum=False):
11711213
Path for SigMF Metadata, Dataset, Archive or Collection (with or without extension).
11721214
skip_checksum: bool, default False
11731215
When True will not read entire dataset to calculate hash.
1216+
autoscale: bool, default True
1217+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).
11741218
11751219
Returns
11761220
-------
@@ -1187,7 +1231,7 @@ def fromfile(filename, skip_checksum=False):
11871231
ext = file_path.suffix
11881232

11891233
if (ext.lower().endswith(SIGMF_ARCHIVE_EXT) or not Path.is_file(meta_fn)) and Path.is_file(archive_fn):
1190-
return fromarchive(archive_fn, skip_checksum=skip_checksum)
1234+
return fromarchive(archive_fn, skip_checksum=skip_checksum, autoscale=autoscale)
11911235

11921236
if (ext.lower().endswith(SIGMF_COLLECTION_EXT) or not Path.is_file(meta_fn)) and Path.is_file(collection_fn):
11931237
collection_fp = open(collection_fn, "rb")
@@ -1207,7 +1251,7 @@ def fromfile(filename, skip_checksum=False):
12071251
meta_fp.close()
12081252

12091253
data_fn = get_dataset_filename_from_metadata(meta_fn, metadata)
1210-
return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum)
1254+
return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum, autoscale=autoscale)
12111255

12121256

12131257
def get_sigmf_filenames(filename):

tests/test_archivereader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_access_data_without_untar(self):
6060
if complex_prefix == "c":
6161
# complex data will be half as long
6262
target_count //= 2
63-
self.assertTrue(np.all(np.iscomplex(readback_samples)))
63+
self.assertTrue(np.iscomplexobj(readback_samples))
6464
if num_channels != 1:
6565
# check expected # of channels
6666
self.assertEqual(

0 commit comments

Comments
 (0)