Skip to content

Commit a87c94d

Browse files
committed
Correctly handle non-BMP characters
1 parent 2ec6e80 commit a87c94d

2 files changed

Lines changed: 23 additions & 2 deletions

File tree

src/Json.hs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ data Value
6969

7070
-- | Exceptions that can happen while parsing JSON. Do not pattern
7171
-- match on values of this type. New data constructors may be added
72-
-- at any time.
72+
-- at any time without a major version bump.
7373
data SyntaxException
7474
= EmptyInput
7575
| ExpectedColon
@@ -322,7 +322,16 @@ encodeUtf8Char !marr !ix !c
322322
PM.writeByteArray marr (ix + 2)
323323
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (ord c))))
324324
pure (ix + 3)
325-
| otherwise = error "encodeUtf8Char: write this"
325+
| otherwise = do
326+
PM.writeByteArray marr ix
327+
(fromIntegral @Int @Word8 (unsafeShiftR (ord c) 18 .|. 0b11110000))
328+
PM.writeByteArray marr (ix + 1)
329+
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (unsafeShiftR (ord c) 12))))
330+
PM.writeByteArray marr (ix + 2)
331+
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (unsafeShiftR (ord c) 6))))
332+
PM.writeByteArray marr (ix + 3)
333+
(0b10000000 .|. (0b00111111 .&. (fromIntegral @Int @Word8 (ord c))))
334+
pure (ix + 4)
326335

327336
byteArrayToShortByteString :: ByteArray -> BSS.ShortByteString
328337
byteArrayToShortByteString (PM.ByteArray x) = BSS.SBS x

test/Main.hs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
{-# language LambdaCase #-}
22
{-# language OverloadedStrings #-}
33

4+
import Data.ByteString.Short.Internal (ShortByteString(SBS))
5+
import Data.Bytes (Bytes)
6+
import Data.Primitive (ByteArray(ByteArray))
47
import Data.Scientific (Scientific,scientific)
8+
import Data.Text.Short (ShortText)
59
import Test.Tasty (defaultMain,testGroup,TestTree)
610
import Test.Tasty.HUnit ((@=?))
711
import Twitter100 (encodedTwitter100,byteStringTwitter100)
@@ -36,6 +40,10 @@ tests = testGroup "Tests"
3640
Right (J.Object (Exts.fromList [Exts.fromList [J.Member "foo" J.True, J.Member "bar" J.False]]))
3741
@=?
3842
J.decode (Bytes.fromAsciiString "{\"foo\" : true, \"bar\": false }")
43+
, THU.testCase "D" $
44+
Right (J.String "Smile: 😂")
45+
@=?
46+
J.decode (shortTextToBytes "\"Smile: 😂\"")
3947
, THU.testCase "Twitter100" $
4048
case J.decode (Bytes.fromByteArray encodedTwitter100) of
4149
Left _ -> fail "nope"
@@ -62,3 +70,7 @@ toAesonValue = \case
6270
HM.empty mbrs
6371
J.Array vals -> AE.Array $ Exts.fromList $ foldr
6472
(\x xs -> toAesonValue x : xs) [] vals
73+
74+
shortTextToBytes :: ShortText -> Bytes
75+
shortTextToBytes str = case TS.toShortByteString str of
76+
SBS x -> let y = ByteArray x in Bytes.fromByteArray y

0 commit comments

Comments
 (0)