Skip to content

Commit 0dfbe27

Browse files
committed
Huge performance boost for json and smile from rebuild
1 parent bcf91c6 commit 0dfbe27

4 files changed

Lines changed: 50 additions & 42 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ stack.yaml
2626
*.swp
2727
test_results/**
2828
cabal.project.local
29+
cabal.profile.project

cabal.project

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ source-repository-package
88
source-repository-package
99
type: git
1010
location: https://github.com/byteverse/bytebuild
11-
tag: 4f03a98100237863bb4a9ac75d4de6723a8e0961
11+
tag: ed7a6c283c0d5cde915eb2685b7e370c5fc17b17

src/Json.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ decode = P.parseBytesEither do
153153

154154
-- | Encode a JSON syntax tree.
155155
encode :: Value -> BLDR.Builder
156-
encode = \case
156+
{-# noinline encode #-}
157+
encode v0 = BLDR.rebuild $ case v0 of
157158
True -> BLDR.ascii4 't' 'r' 'u' 'e'
158159
False -> BLDR.ascii5 'f' 'a' 'l' 's' 'e'
159160
Null -> BLDR.ascii4 'n' 'u' 'l' 'l'

src/Json/Smile.hs

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@ import Control.Monad.ST.Run (runByteArrayST)
2626
import Data.Bits (countLeadingZeros,complement,unsafeShiftR,(.&.),(.|.))
2727
import Data.Bits (testBit)
2828
import Data.Bytes.Builder (Builder)
29-
import Data.Foldable (foldMap)
3029
import Data.Int (Int32)
3130
import Data.Primitive (ByteArray(ByteArray),newByteArray)
3231
import Data.Primitive (writeByteArray,byteArrayFromListN,sizeofByteArray)
3332
import Data.Primitive (MutableByteArray(..),unsafeFreezeByteArray)
34-
import Data.Primitive (readByteArray,copyMutableByteArray,indexByteArray)
33+
import Data.Primitive (readByteArray,copyMutableByteArray)
3534
import Data.Text.Short (ShortText)
3635
import Data.Word (Word8,Word32,Word64)
3736
import Data.Word.Zigzag (toZigzag32,toZigzag64)
@@ -45,7 +44,6 @@ import qualified Data.Bytes as Bytes
4544
import qualified Data.Bytes.Builder as B
4645
import qualified Data.Bytes.Builder.Bounded as Bounded
4746
import qualified Data.Bytes.Builder.Bounded.Unsafe as Unsafe
48-
import qualified Data.Bytes.Text.Ascii as Ascii
4947
import qualified Data.ByteString.Short as SBS
5048
import qualified Data.Number.Scientific as Sci
5149
import qualified Data.Text.Short as TS
@@ -56,46 +54,56 @@ import qualified Prelude
5654
-- | Encode a Json 'Value' to the Smile binary format.
5755
-- This encoder does not produce backreferences.
5856
encode :: Value -> Builder
59-
{-# noinline encode #-}
60-
encode v0 = header <> recurse v0
61-
where
62-
header = B.bytes $ Ascii.fromString ":)\n\x00"
63-
recurse :: Value -> Builder
64-
recurse (Object obj) = B.word8 0xFA <> foldMap recMember obj <> B.word8 0xFB
65-
recurse (Array arr) = B.word8 0xF8 <> foldMap recurse arr <> B.word8 0xF9
66-
recurse (String str) = encodeString str
67-
recurse (Number x)
57+
{-# inline encode #-}
58+
encode v0 = B.ascii4 ':' ')' '\n' '\x00' <> encodeNoHeader v0
59+
60+
-- The "rebuild" trick was adapted from the fast-builder library. It
61+
-- results in a 2x performance gain on the twitter benchmark.
62+
-- This function is marked noinline to ensure that its performance is
63+
-- stable.
64+
encodeNoHeader :: Value -> Builder
65+
{-# noinline encodeNoHeader #-}
66+
encodeNoHeader val = B.rebuild $ case val of
67+
Object obj ->
68+
B.word8 0xFA
69+
<>
70+
foldMap (\Member{key,value} -> encodeKey key <> encodeNoHeader value) obj
71+
<>
72+
B.word8 0xFB
73+
Array arr -> B.word8 0xF8 <> foldMap encodeNoHeader arr <> B.word8 0xF9
74+
String str -> encodeString str
75+
Number x
6876
| Just i32 <- Sci.toInt32 x
6977
, -16 <= i32 && i32 <= 15
7078
, w5 <- fromIntegral @Word32 @Word8 (toZigzag32 i32)
71-
= B.word8 (0xC0 + w5)
79+
-> B.word8 (0xC0 + w5)
7280
| Just i32 <- Sci.toInt32 x
73-
= B.fromBounded Nat.constant (Bounded.word8 0x24 `Bounded.append` vlqSmile64 (fromIntegral @Word32 @Word64 (toZigzag32 i32)))
81+
-> B.fromBounded Nat.constant (Bounded.word8 0x24 `Bounded.append` vlqSmile64 (fromIntegral @Word32 @Word64 (toZigzag32 i32)))
7482
| Just i64 <- Sci.toInt64 x
75-
= B.fromBounded Nat.constant (Bounded.word8 0x25 `Bounded.append` vlqSmile64 (toZigzag64 i64))
76-
| otherwise = Sci.withExposed encodeSmallDecimal encodeBigDecimal x
77-
recurse Null = B.word8 0x21
78-
recurse False = B.word8 0x22
79-
recurse True = B.word8 0x23
80-
recMember :: Member -> Builder
81-
recMember Member{key,value} = encodeKey key <> recurse value
82-
encodeSmallDecimal :: Int -> Int -> Builder
83-
encodeSmallDecimal !c !e = encodeBigDecimal (fromIntegral c) (fromIntegral e)
84-
encodeBigDecimal :: Integer -> Integer -> Builder
85-
encodeBigDecimal c e = case e of
86-
0 -> encodeBigInteger c
87-
_ -> B.word8 0x2A -- bigdecimal token tag
88-
<> vlqSmile ( fromIntegral @Word32 @Natural
89-
$ toZigzag32 scale)
90-
<> vlqSmile (fromIntegral @Int @Natural $ sizeofByteArray raw) -- size of byte digits
91-
<> B.sevenEightSmile (Bytes.fromByteArray raw) -- 7/8 encoding of byte digits
92-
where
93-
scale :: Int32
94-
-- WARNING smile can't handle exponents outside int32_t, so this truncates
95-
-- WARNING "scale" is what Java BigDecimal thinks, which is
96-
-- negative of all mathematics since exponential notation was invented 💩
97-
scale = fromIntegral @Integer @Int32 (-e)
98-
raw = integerToBase256ByteArray c
83+
-> B.fromBounded Nat.constant (Bounded.word8 0x25 `Bounded.append` vlqSmile64 (toZigzag64 i64))
84+
| otherwise -> Sci.withExposed encodeSmallDecimal encodeBigDecimal x
85+
Null -> B.word8 0x21
86+
False -> B.word8 0x22
87+
True -> B.word8 0x23
88+
89+
encodeSmallDecimal :: Int -> Int -> Builder
90+
encodeSmallDecimal !c !e = encodeBigDecimal (fromIntegral c) (fromIntegral e)
91+
92+
encodeBigDecimal :: Integer -> Integer -> Builder
93+
encodeBigDecimal c e = case e of
94+
0 -> encodeBigInteger c
95+
_ -> B.word8 0x2A -- bigdecimal token tag
96+
<> vlqSmile ( fromIntegral @Word32 @Natural
97+
$ toZigzag32 scale)
98+
<> vlqSmile (fromIntegral @Int @Natural $ sizeofByteArray raw) -- size of byte digits
99+
<> B.sevenEightSmile (Bytes.fromByteArray raw) -- 7/8 encoding of byte digits
100+
where
101+
scale :: Int32
102+
-- WARNING smile can't handle exponents outside int32_t, so this truncates
103+
-- WARNING "scale" is what Java BigDecimal thinks, which is
104+
-- negative of all mathematics since exponential notation was invented 💩
105+
scale = fromIntegral @Integer @Int32 (-e)
106+
raw = integerToBase256ByteArray c
99107

100108
-- | Encode a number using as SMILE @BigInteger@ token type (prefix @0x26@).
101109
encodeBigInteger :: Integer -> Builder
@@ -215,7 +223,6 @@ encodeAsciiString !str
215223

216224
-- | Encode a string.
217225
encodeString :: ShortText -> Builder
218-
{-# inline encodeString #-}
219226
encodeString !str = case SBS.length (TS.toShortByteString str) of
220227
0 -> B.word8 0x20
221228
n -> case TS.isAscii str of
@@ -228,7 +235,6 @@ encodeString !str = case SBS.length (TS.toShortByteString str) of
228235

229236
-- | Encode a key.
230237
encodeKey :: ShortText -> Builder
231-
{-# inline encodeKey #-}
232238
encodeKey !str = case SBS.length (TS.toShortByteString str) of
233239
0 -> B.word8 0x20
234240
n | n <= 64

0 commit comments

Comments
 (0)