Skip to content

Commit b329efd

Browse files
committed
[feat] initial support of bjdata draft 4 extension types
1 parent 2bfffcb commit b329efd

File tree

4 files changed

+328
-50
lines changed

4 files changed

+328
-50
lines changed

jdataencode.m

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
% toeplitz, and use _ArrayShape_ to encode the matrix
4646
% MapAsStruct: [0|1] if set to 1, convert containers.Map into
4747
% struct; otherwise, keep it as map
48+
% DateTime: [1|0] if set to 1, convert datetime to string
4849
% Compression: ['zlib'|'gzip','lzma','lz4','lz4hc'] - use zlib method
4950
% to compress data array
5051
% CompressArraySize: [300|int]: only to compress an array if the
@@ -93,6 +94,7 @@
9394
opt.messagepack = jsonopt('MessagePack', 0, opt);
9495
opt.usearrayshape = jsonopt('UseArrayShape', 0, opt) && exist('bandwidth');
9596
opt.annotatearray = jsonopt('AnnotateArray', 0, opt);
97+
opt.datetime = jsonopt('DateTime', 1, opt);
9698

9799
% Performance optimization: pre-compute prefixed field names to avoid
98100
% repeated string concatenation in hot loops
@@ -467,6 +469,10 @@
467469

468470
%% -------------------------------------------------------------------------
469471
function newitem = matlabobject2jd(item, opt)
472+
if (~opt.datetime && (isa(item, 'datetime') || isa(item, 'duration')))
473+
newitem = item;
474+
return
475+
end
470476
try
471477
if numel(item) == 0 % empty object
472478
newitem = struct();

loadbj.m

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,8 @@
107107
else
108108
inputstr = urlread(fname);
109109
end
110-
elseif (~isempty(fname) && any(fname(1) == '[{SCBHiUIulmLMhdDTFZN'))
111-
inputstr = fname;
112110
else
113-
error('input file does not exist or buffer is invalid');
111+
inputstr = fname;
114112
end
115113

116114
inputlen = length(inputstr);
@@ -192,7 +190,7 @@
192190
else
193191
[data{jsoncount}, pos] = parse_array(pos, opt);
194192
end
195-
case {'S', 'C', 'B', 'H', 'i', 'U', 'I', 'u', 'l', 'm', 'L', 'M', 'h', 'd', 'D', 'T', 'F', 'Z', 'N'}
193+
case {'S', 'C', 'B', 'H', 'i', 'U', 'I', 'u', 'l', 'm', 'L', 'M', 'h', 'd', 'D', 'T', 'F', 'Z', 'N', 'E'}
196194
[data{jsoncount}, pos] = parse_value(pos, [], opt);
197195
otherwise
198196
error_pos('Root level structure must start with a valid marker "{[SCBHiUIulmLMhdDTFZN"', opt, pos);
@@ -639,6 +637,8 @@
639637
end
640638
val = [];
641639
pos = pos + 1;
640+
case 'E'
641+
[val, pos] = parse_extension(pos, opt);
642642
otherwise
643643
error_pos('Value expected at position %d', opt, pos);
644644
end
@@ -1162,3 +1162,73 @@
11621162
values{i} = '';
11631163
end
11641164
end
1165+
1166+
%% -------------------------------------------------------------------------
1167+
1168+
function [val, pos] = parse_extension(pos, opt)
1169+
% Parse BJData Extension: [E][type-id][byte-length][payload]
1170+
pos = pos + 1;
1171+
[typeid, pos] = parse_number(pos, opt);
1172+
[bytelen, pos] = parse_number(pos, opt);
1173+
typeid = double(typeid);
1174+
bytelen = double(bytelen);
1175+
1176+
if bytelen > 0
1177+
payload = uint8(opt.inputstr_(pos:pos + bytelen - 1));
1178+
pos = pos + bytelen;
1179+
else
1180+
payload = uint8([]);
1181+
end
1182+
1183+
% Swap bytes for Little-Endian (UUID excluded - Big-Endian per RFC 4122)
1184+
doswap = opt.flipendian_ && typeid ~= 10;
1185+
sw = @(d) d(end:-1:1); % byte reversal helper
1186+
1187+
switch typeid
1188+
case 1 % epoch_s: uint32
1189+
if doswap
1190+
payload = sw(payload);
1191+
end
1192+
val = datetime(double(typecast(payload, 'uint32')), 'ConvertFrom', 'posixtime', 'TimeZone', 'UTC');
1193+
case {2, 6} % epoch_us, datetime_us: int64
1194+
if doswap
1195+
payload = sw(payload);
1196+
end
1197+
val = datetime(double(typecast(payload, 'int64')) / 1e6, 'ConvertFrom', 'posixtime', 'TimeZone', 'UTC');
1198+
case 3 % epoch_ns: int64 + uint32
1199+
if doswap
1200+
payload = [sw(payload(1:8)), sw(payload(9:12))];
1201+
end
1202+
val = datetime(double(typecast(payload(1:8), 'int64')) + double(typecast(payload(9:12), 'uint32')) / 1e9, ...
1203+
'ConvertFrom', 'posixtime', 'TimeZone', 'UTC');
1204+
case 4 % date: int16 + 2*uint8
1205+
if doswap
1206+
payload(1:2) = sw(payload(1:2));
1207+
end
1208+
val = datetime(double(typecast(payload(1:2), 'int16')), double(payload(3)), double(payload(4)));
1209+
case 5 % time_s: 3*uint8 + reserved
1210+
val = duration(double(payload(1)), double(payload(2)), double(payload(3)));
1211+
case 7 % timedelta_us: int64
1212+
if doswap
1213+
payload = sw(payload);
1214+
end
1215+
val = duration(0, 0, double(typecast(payload, 'int64')) / 1e6);
1216+
case 8 % complex64: 2*float32
1217+
if doswap
1218+
payload = [sw(payload(1:4)), sw(payload(5:8))];
1219+
end
1220+
p = typecast(payload, 'single');
1221+
val = complex(double(p(1)), double(p(2)));
1222+
case 9 % complex128: 2*float64
1223+
if doswap
1224+
payload = [sw(payload(1:8)), sw(payload(9:16))];
1225+
end
1226+
p = typecast(payload, 'double');
1227+
val = complex(p(1), p(2));
1228+
case 10 % uuid: 16 bytes Big-Endian
1229+
h = lower(reshape(dec2hex(payload, 2)', 1, []));
1230+
val = jdict([h(1:8) '-' h(9:12) '-' h(13:16) '-' h(17:20) '-' h(21:32)], ...
1231+
'schema', struct('type', 'string', 'format', 'uuid'));
1232+
otherwise % unknown extension
1233+
val = jdict(payload, 'schema', struct('type', 'bytes', 'exttype', int32(typeid)));
1234+
end

savebj.m

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,13 @@
243243
skippreencode = false;
244244
if (opt.formatversion >= 4 && ~opt.messagepack && ~opt.ubjson)
245245
if (isa(obj, 'table') && size(obj, 1) > 1)
246-
[cansoa, ~] = table2soa(obj, opt);
246+
cansoa = table2soa(obj, opt);
247247
skippreencode = cansoa;
248248
end
249249
end
250250

251251
if (~skippreencode && jsonopt('PreEncode', 1, opt))
252-
obj = jdataencode(obj, 'Base64', 0, 'UseArrayZipSize', opt.messagepack, opt);
252+
obj = jdataencode(obj, 'Base64', 0, 'UseArrayZipSize', opt.messagepack, 'DateTime', 0, opt);
253253
end
254254

255255
dozip = opt.compression;
@@ -321,6 +321,12 @@
321321
txt = struct2ubjson(name, item, level, opt);
322322
elseif (isnumeric(item) || islogical(item))
323323
txt = mat2ubjson(name, item, level, opt);
324+
elseif (isa(item, 'datetime'))
325+
txt = ext2ubjson(name, item, 'datetime', opt);
326+
elseif (isa(item, 'duration'))
327+
txt = ext2ubjson(name, item, 'duration', opt);
328+
elseif (isa(item, 'jdict'))
329+
txt = jdict2ubjson(name, item, level, opt);
324330
elseif (ischar(item))
325331
if (numel(item) >= opt.compressstringsize)
326332
txt = mat2ubjson(name, item, level, opt);
@@ -339,6 +345,8 @@
339345
txt = struct2ubjson(name, jdataencode(item), level, opt);
340346
elseif (isobject(item))
341347
txt = matlabobject2ubjson(name, item, level, opt);
348+
elseif (~isreal(item) && isnumeric(item))
349+
txt = ext2ubjson(name, item, 'complex', opt);
342350
else
343351
txt = any2ubjson(name, item, level, opt);
344352
end
@@ -1744,3 +1752,130 @@
17441752
else
17451753
newdata = data;
17461754
end
1755+
1756+
%% -------------------------------------------------------------------------
1757+
function txt = ext2ubjson(name, item, dtype, opt)
1758+
% Unified extension encoder for datetime, duration, complex
1759+
if opt.messagepack || opt.ubjson
1760+
switch dtype
1761+
case 'datetime'
1762+
txt = str2ubjson(name, char(item), 0, opt);
1763+
case 'duration'
1764+
txt = mat2ubjson(name, seconds(item), 0, opt);
1765+
case 'complex'
1766+
txt = struct2ubjson(name, struct('re', real(item), 'im', imag(item)), 0, opt);
1767+
end
1768+
return
1769+
end
1770+
1771+
if numel(item) > 1
1772+
parts = cell(1, numel(item) + 2);
1773+
parts{1} = opt.AM_{1};
1774+
for i = 1:numel(item)
1775+
parts{i + 1} = encode_ext_scalar(item(i), dtype, opt);
1776+
end
1777+
parts{end} = opt.AM_{2};
1778+
txt = [parts{:}];
1779+
else
1780+
txt = encode_ext_scalar(item, dtype, opt);
1781+
end
1782+
if ~isempty(name)
1783+
txt = [N_(decodevarname(name, opt.unpackhex), opt) txt];
1784+
end
1785+
1786+
%% -------------------------------------------------------------------------
1787+
function txt = encode_ext_scalar(val, dtype, opt)
1788+
% Unified encoder for datetime, duration, complex scalars
1789+
% Returns 'Z' for NaT/NaN, otherwise [E][typeid][len][payload]
1790+
switch dtype
1791+
case 'datetime'
1792+
if isnat(val)
1793+
txt = opt.ZM_;
1794+
return
1795+
end
1796+
tz = val.TimeZone;
1797+
if isempty(tz)
1798+
val.TimeZone = 'UTC';
1799+
end
1800+
pt = posixtime(val);
1801+
hasTime = hour(val) || minute(val) || second(val);
1802+
if ~hasTime && isempty(tz)
1803+
typeid = 4;
1804+
payload = [typecast(int16(year(val)), 'uint8'), uint8([month(val), day(val)])];
1805+
elseif mod(second(val), 1) ~= 0 || pt < 0 || pt > 4294967295
1806+
typeid = 6;
1807+
payload = typecast(int64(round(pt * 1e6)), 'uint8');
1808+
else
1809+
typeid = 1;
1810+
payload = typecast(uint32(pt), 'uint8');
1811+
end
1812+
case 'duration'
1813+
if isnan(val)
1814+
txt = opt.ZM_;
1815+
return
1816+
end
1817+
typeid = 7;
1818+
payload = typecast(int64(round(seconds(val) * 1e6)), 'uint8');
1819+
case 'complex'
1820+
if isa(val, 'single')
1821+
typeid = 8;
1822+
payload = [typecast(single(real(val)), 'uint8'), typecast(single(imag(val)), 'uint8')];
1823+
else
1824+
typeid = 9;
1825+
payload = [typecast(double(real(val)), 'uint8'), typecast(double(imag(val)), 'uint8')];
1826+
end
1827+
end
1828+
% Apply endian swap based on typeid
1829+
if opt.flipendian_
1830+
n = length(payload);
1831+
if typeid == 4 % date: only swap first 2 bytes (int16)
1832+
payload(1:2) = payload(2:-1:1);
1833+
elseif typeid == 8 % complex64: swap two 4-byte floats
1834+
payload = [payload(4:-1:1), payload(8:-1:5)];
1835+
elseif typeid == 9 % complex128: swap two 8-byte floats
1836+
payload = [payload(8:-1:1), payload(16:-1:9)];
1837+
elseif n > 1 % all others: simple reversal
1838+
payload = payload(n:-1:1);
1839+
end
1840+
end
1841+
txt = ['E' I_(uint8(typeid), opt) I_(uint8(length(payload)), opt) char(payload)];
1842+
1843+
%% -------------------------------------------------------------------------
1844+
function txt = jdict2ubjson(name, item, level, opt)
1845+
% Handle jdict objects - check schema for special types
1846+
if ~isa(item, 'jdict')
1847+
txt = struct2ubjson(name, item, level, opt);
1848+
return
1849+
end
1850+
s = item.schema;
1851+
if item.getattr('$', '') && strcmp(s.format, 'uuid')
1852+
% UUID string
1853+
if opt.messagepack || opt.ubjson
1854+
txt = str2ubjson(name, char(item), level, opt);
1855+
return
1856+
end
1857+
uuidstr = char(item);
1858+
hexstr = strrep(uuidstr, '-', '');
1859+
payload = uint8(zeros(1, 16));
1860+
for i = 1:16
1861+
payload(i) = hex2dec(hexstr(2 * i - 1:2 * i));
1862+
end
1863+
txt = ['E' I_(uint8(10), opt) I_(uint8(16), opt) char(payload)];
1864+
if ~isempty(name)
1865+
txt = [N_(decodevarname(name, opt.unpackhex), opt) txt];
1866+
end
1867+
elseif isfield(s, 'type') && strcmp(s.type, 'bytes')
1868+
% Raw extension bytes
1869+
payload = uint8(item.data);
1870+
typeid = uint32(0);
1871+
if isfield(s, 'exttype')
1872+
typeid = uint32(s.exttype);
1873+
end
1874+
txt = ['E' I_(typeid, opt) I_(uint32(length(payload)), opt) char(payload)];
1875+
if ~isempty(name)
1876+
txt = [N_(decodevarname(name, opt.unpackhex), opt) txt];
1877+
end
1878+
else
1879+
% Generic jdict - encode as struct
1880+
txt = struct2ubjson(name, struct(item), level, opt);
1881+
end

0 commit comments

Comments
 (0)