@@ -26,12 +26,11 @@ import Control.Monad.ST.Run (runByteArrayST)
2626import Data.Bits (countLeadingZeros ,complement ,unsafeShiftR ,(.&.) ,(.|.) )
2727import Data.Bits (testBit )
2828import Data.Bytes.Builder (Builder )
29- import Data.Foldable (foldMap )
3029import Data.Int (Int32 )
3130import Data.Primitive (ByteArray (ByteArray ),newByteArray )
3231import Data.Primitive (writeByteArray ,byteArrayFromListN ,sizeofByteArray )
3332import Data.Primitive (MutableByteArray (.. ),unsafeFreezeByteArray )
34- import Data.Primitive (readByteArray ,copyMutableByteArray , indexByteArray )
33+ import Data.Primitive (readByteArray ,copyMutableByteArray )
3534import Data.Text.Short (ShortText )
3635import Data.Word (Word8 ,Word32 ,Word64 )
3736import Data.Word.Zigzag (toZigzag32 ,toZigzag64 )
@@ -45,7 +44,6 @@ import qualified Data.Bytes as Bytes
4544import qualified Data.Bytes.Builder as B
4645import qualified Data.Bytes.Builder.Bounded as Bounded
4746import qualified Data.Bytes.Builder.Bounded.Unsafe as Unsafe
48- import qualified Data.Bytes.Text.Ascii as Ascii
4947import qualified Data.ByteString.Short as SBS
5048import qualified Data.Number.Scientific as Sci
5149import qualified Data.Text.Short as TS
@@ -56,46 +54,56 @@ import qualified Prelude
5654-- | Encode a Json 'Value' to the Smile binary format.
5755-- This encoder does not produce backreferences.
5856encode :: Value -> Builder
59- {-# noinline encode #-}
60- encode v0 = header <> recurse v0
61- where
62- header = B. bytes $ Ascii. fromString " :)\n \x00 "
63- recurse :: Value -> Builder
64- recurse (Object obj) = B. word8 0xFA <> foldMap recMember obj <> B. word8 0xFB
65- recurse (Array arr) = B. word8 0xF8 <> foldMap recurse arr <> B. word8 0xF9
66- recurse (String str) = encodeString str
67- recurse (Number x)
57+ {-# inline encode #-}
58+ encode v0 = B. ascii4 ' :' ' )' ' \n ' ' \x00 ' <> encodeNoHeader v0
59+
60+ -- The "rebuild" trick was adapted from the fast-builder library. It
61+ -- results in a 2x performance gain on the twitter benchmark.
62+ -- This function is marked noinline to ensure that its performance is
63+ -- stable.
64+ encodeNoHeader :: Value -> Builder
65+ {-# noinline encodeNoHeader #-}
66+ encodeNoHeader val = B. rebuild $ case val of
67+ Object obj ->
68+ B. word8 0xFA
69+ <>
70+ foldMap (\ Member {key,value} -> encodeKey key <> encodeNoHeader value) obj
71+ <>
72+ B. word8 0xFB
73+ Array arr -> B. word8 0xF8 <> foldMap encodeNoHeader arr <> B. word8 0xF9
74+ String str -> encodeString str
75+ Number x
6876 | Just i32 <- Sci. toInt32 x
6977 , - 16 <= i32 && i32 <= 15
7078 , w5 <- fromIntegral @ Word32 @ Word8 (toZigzag32 i32)
71- = B. word8 (0xC0 + w5)
79+ -> B. word8 (0xC0 + w5)
7280 | Just i32 <- Sci. toInt32 x
73- = B. fromBounded Nat. constant (Bounded. word8 0x24 `Bounded.append` vlqSmile64 (fromIntegral @ Word32 @ Word64 (toZigzag32 i32)))
81+ -> B. fromBounded Nat. constant (Bounded. word8 0x24 `Bounded.append` vlqSmile64 (fromIntegral @ Word32 @ Word64 (toZigzag32 i32)))
7482 | Just i64 <- Sci. toInt64 x
75- = B. fromBounded Nat. constant (Bounded. word8 0x25 `Bounded.append` vlqSmile64 (toZigzag64 i64))
76- | otherwise = Sci. withExposed encodeSmallDecimal encodeBigDecimal x
77- recurse Null = B. word8 0x21
78- recurse False = B. word8 0x22
79- recurse True = B. word8 0x23
80- recMember :: Member -> Builder
81- recMember Member {key,value} = encodeKey key <> recurse value
82- encodeSmallDecimal :: Int -> Int -> Builder
83- encodeSmallDecimal ! c ! e = encodeBigDecimal ( fromIntegral c) ( fromIntegral e)
84- encodeBigDecimal :: Integer -> Integer -> Builder
85- encodeBigDecimal c e = case e of
86- 0 -> encodeBigInteger c
87- _ -> B. word8 0x2A -- bigdecimal token tag
88- <> vlqSmile ( fromIntegral @ Word32 @ Natural
89- $ toZigzag32 scale)
90- <> vlqSmile (fromIntegral @ Int @ Natural $ sizeofByteArray raw) -- size of byte digits
91- <> B. sevenEightSmile (Bytes. fromByteArray raw) -- 7/8 encoding of byte digits
92- where
93- scale :: Int32
94- -- WARNING smile can't handle exponents outside int32_t, so this truncates
95- -- WARNING "scale" is what Java BigDecimal thinks, which is
96- -- negative of all mathematics since exponential notation was invented 💩
97- scale = fromIntegral @ Integer @ Int32 (- e)
98- raw = integerToBase256ByteArray c
83+ -> B. fromBounded Nat. constant (Bounded. word8 0x25 `Bounded.append` vlqSmile64 (toZigzag64 i64))
84+ | otherwise -> Sci. withExposed encodeSmallDecimal encodeBigDecimal x
85+ Null -> B. word8 0x21
86+ False -> B. word8 0x22
87+ True -> B. word8 0x23
88+
89+ encodeSmallDecimal :: Int -> Int -> Builder
90+ encodeSmallDecimal ! c ! e = encodeBigDecimal ( fromIntegral c) ( fromIntegral e)
91+
92+ encodeBigDecimal :: Integer -> Integer -> Builder
93+ encodeBigDecimal c e = case e of
94+ 0 -> encodeBigInteger c
95+ _ -> B. word8 0x2A -- bigdecimal token tag
96+ <> vlqSmile ( fromIntegral @ Word32 @ Natural
97+ $ toZigzag32 scale)
98+ <> vlqSmile (fromIntegral @ Int @ Natural $ sizeofByteArray raw) -- size of byte digits
99+ <> B. sevenEightSmile (Bytes. fromByteArray raw) -- 7/8 encoding of byte digits
100+ where
101+ scale :: Int32
102+ -- WARNING smile can't handle exponents outside int32_t, so this truncates
103+ -- WARNING "scale" is what Java BigDecimal thinks, which is
104+ -- negative of all mathematics since exponential notation was invented 💩
105+ scale = fromIntegral @ Integer @ Int32 (- e)
106+ raw = integerToBase256ByteArray c
99107
100108-- | Encode a number using as SMILE @BigInteger@ token type (prefix @0x26@).
101109encodeBigInteger :: Integer -> Builder
@@ -215,7 +223,6 @@ encodeAsciiString !str
215223
216224-- | Encode a string.
217225encodeString :: ShortText -> Builder
218- {-# inline encodeString #-}
219226encodeString ! str = case SBS. length (TS. toShortByteString str) of
220227 0 -> B. word8 0x20
221228 n -> case TS. isAscii str of
@@ -228,7 +235,6 @@ encodeString !str = case SBS.length (TS.toShortByteString str) of
228235
229236-- | Encode a key.
230237encodeKey :: ShortText -> Builder
231- {-# inline encodeKey #-}
232238encodeKey ! str = case SBS. length (TS. toShortByteString str) of
233239 0 -> B. word8 0x20
234240 n | n <= 64
0 commit comments