Skip to content

Commit 296b22b

Browse files
committed
Add take for ascii text parser
1 parent 746c754 commit 296b22b

3 files changed

Lines changed: 19 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Revision history for bytesmith
22

3+
## 0.3.14.0 -- 2025-??-??
4+
5+
* Add `take` for ASCII parser.
6+
37
## 0.3.13.0 -- 2025-07-11
48

59
* Add `hexFixedWord16#`

bytesmith.cabal

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cabal-version: 3.0
22
name: bytesmith
3-
version: 0.3.13.0
3+
version: 0.3.14.0
44
synopsis: Nonresumable byte parser
55
description:
66
Parse bytes as fast as possible. This is a nonresumable parser
@@ -48,6 +48,7 @@ library
4848
, natural-arithmetic >=0.1.3
4949
, primitive >=0.7 && <0.10
5050
, text-short >=0.1.3 && <0.2
51+
, text >=0.2.1
5152
, wide-word >=0.1.0.9 && <0.2
5253

5354
hs-source-dirs: src

src/Data/Bytes/Parser/Ascii.hs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ module Data.Bytes.Parser.Ascii
3232
, opt
3333

3434
-- * Match Many
35+
, take
3536
, shortTrailedBy
3637
, takeShortWhile
3738

@@ -52,7 +53,7 @@ module Data.Bytes.Parser.Ascii
5253
, Latin.decWord32
5354
) where
5455

55-
import Prelude hiding (any, fail, length, takeWhile)
56+
import Prelude hiding (any, fail, length, takeWhile, take)
5657

5758
import Control.Monad.ST (runST)
5859
import Data.Bits (clearBit)
@@ -62,9 +63,11 @@ import Data.Char (ord)
6263
import Data.Text.Short (ShortText)
6364
import Data.Word (Word8)
6465
import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, gtChar#, indexCharArray#, ord#, (+#), (-#), (<#))
66+
import Data.Text.Internal (Text(Text))
6567

6668
import qualified Data.ByteString.Short.Internal as BSS
6769
import qualified Data.Bytes as Bytes
70+
import qualified Data.Bytes.Parser as Parser
6871
import qualified Data.Bytes.Parser.Latin as Latin
6972
import qualified Data.Bytes.Parser.Unsafe as Unsafe
7073
import qualified Data.Primitive as PM
@@ -103,6 +106,15 @@ skipTrailedBy e !c = do
103106
else go
104107
go
105108

109+
-- | Consume a fixed number of ASCII characters (all less than codepoint 128).
110+
take :: e -> Int -> Parser e s Text
111+
{-# INLINE take #-}
112+
take e !n = do
113+
bs@(Bytes arr off len) <- Parser.take e n
114+
if Bytes.all (\w -> w < 128) bs
115+
then pure (Text arr off len)
116+
else Parser.fail e
117+
106118
{- | Consume characters matching the predicate. The stops when it
107119
encounters a non-matching character or when it encounters a byte
108120
above @0x7F@. This never fails.

0 commit comments

Comments
 (0)