@@ -45,47 +45,47 @@ const BMP_MAX = 0xFFFF;
4545 * @return {GraphemeSegmenter } iterator for grapheme cluster segments
4646 */
4747export function * graphemeSegments ( input ) {
48+ let cp = input . codePointAt ( 0 ) ;
49+
50+ // do nothing on empty string
51+ if ( cp == null ) return ;
52+
53+ /** Current cursor position. */
54+ let cursor = cp <= BMP_MAX ? 1 : 2 ;
55+
4856 /** Total length of the input string. */
4957 let len = input . length ;
5058
51- // do nothing on empty string
52- if ( len === 0 ) return ;
59+ /** Category of codepoint immediately preceding cursor */
60+ let catBefore = cat ( cp ) ;
5361
54- let cp = /** @type {number }*/ ( input . codePointAt ( 0 ) ) ;
62+ /** @type {GraphemeCategoryNum } Category of codepoint immediately preceding cursor. */
63+ let catAfter = 0 ;
5564
56- /** Memoize the beginning code point of the segment . */
57- let _hd = cp ;
65+ /** The number of RIS codepoints preceding `cursor` . */
66+ let risCount = 0 ;
5867
5968 /**
6069 * Emoji state for GB11: tracks if we've seen Extended_Pictographic followed by Extend* ZWJ
6170 * Only relevant when catBefore === ZWJ && catAfter === Extended_Pictographic
6271 */
6372 let emoji = false ;
6473
65- /** The number of RI codepoints preceding `cursor`. */
66- let riCount = 0 ;
67-
6874 /** InCB=Consonant - segment started with Indic consonant */
6975 let consonant = false ;
7076
7177 /** InCB=Linker - seen a linker after consonant */
7278 let linker = false ;
7379
74- /** Category of codepoint immediately preceding cursor */
75- let catBefore = cat ( cp ) ;
80+ let index = 0 ;
7681
77- /** Memoize the beginning category of the segment */
82+ /** Beginning category of a segment */
7883 let _catBegin = catBefore ;
7984
80- /** @type {GraphemeCategoryNum } Category of codepoint immediately preceding cursor. */
81- let catAfter = 0 ;
82-
83- let index = 0 ;
84- let cursor = 0 ;
85+ /** Memoize the beginning code point of the segment. */
86+ let _hd = cp ;
8587
8688 while ( cursor < len ) {
87- cursor += cp <= BMP_MAX ? 1 : 2 ;
88-
8989 cp = /** @type {number } */ ( input . codePointAt ( cursor ) ) ;
9090 catAfter = cat ( cp ) ;
9191
@@ -117,8 +117,8 @@ export function* graphemeSegments(input) {
117117 }
118118 // GB12, GB13: RI × RI (odd count means no break)
119119 else if ( catBefore === 10 && catAfter === 10 ) {
120- // riCount is count BEFORE current RI, so odd means this is 2nd, 4th, etc.
121- boundary = riCount ++ % 2 === 1 ;
120+ // risCount is count BEFORE current RI, so odd means this is 2nd, 4th, etc.
121+ boundary = risCount ++ % 2 === 1 ;
122122 }
123123 // GB6: L × (L | V | LV | LVT)
124124 else if ( catBefore === 5 ) {
@@ -150,7 +150,7 @@ export function* graphemeSegments(input) {
150150
151151 // Reset segment state
152152 emoji = false ;
153- riCount = 0 ;
153+ risCount = 0 ;
154154 index = cursor ;
155155 _catBegin = catAfter ;
156156 _hd = cp ;
@@ -181,6 +181,7 @@ export function* graphemeSegments(input) {
181181 }
182182 }
183183
184+ cursor += cp <= BMP_MAX ? 1 : 2 ;
184185 catBefore = catAfter ;
185186 }
186187
0 commit comments