Skip to content

Commit 0d7b461

Browse files
zherczegdbatyai
authored andcommitted
Correctly handle celestial plane codepoints in ES5.1. (#3510)
Fixes #3498. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
1 parent d6070a9 commit 0d7b461

4 files changed

Lines changed: 34 additions & 7 deletions

File tree

jerry-core/lit/lit-char-helpers.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point
223223
/* TODO: detect these ranges correctly. */
224224
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
225225
}
226+
#else /* !ENABLED (JERRY_ES2015) */
227+
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
226228
#endif /* ENABLED (JERRY_ES2015) */
227229

228230
return lit_char_is_unicode_letter ((ecma_char_t) code_point);
@@ -252,6 +254,8 @@ lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point
252254
/* TODO: detect these ranges correctly. */
253255
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
254256
}
257+
#else /* !ENABLED (JERRY_ES2015) */
258+
JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
255259
#endif /* ENABLED (JERRY_ES2015) */
256260

257261
return (lit_char_is_unicode_letter ((ecma_char_t) code_point)

jerry-core/parser/js/js-lexer.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -693,12 +693,12 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
693693

694694
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
695695
{
696+
#if ENABLED (JERRY_ES2015)
696697
utf8_length = lit_read_code_point_from_utf8 (source_p,
697698
(lit_utf8_size_t) (source_end_p - source_p),
698699
&code_point);
699700
decoded_length = utf8_length;
700701

701-
#if ENABLED (JERRY_ES2015)
702702
/* Only ES2015 supports code points outside of the basic plane which can be part of an identifier. */
703703
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
704704
&& source_p + 3 < source_end_p)
@@ -717,11 +717,23 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
717717
char_count = 2;
718718
}
719719
}
720-
else if (source_p[0] >= LEXER_UTF8_4BYTE_START)
720+
else if (source_p[0] >= LIT_UTF8_4_BYTE_MARKER)
721721
{
722722
decoded_length = 2 * 3;
723723
has_escape = true;
724724
}
725+
#else /* !ENABLED (JERRY_ES2015) */
726+
if (code_point < LIT_UTF8_4_BYTE_MARKER)
727+
{
728+
utf8_length = lit_read_code_point_from_utf8 (source_p,
729+
(lit_utf8_size_t) (source_end_p - source_p),
730+
&code_point);
731+
decoded_length = utf8_length;
732+
}
733+
else
734+
{
735+
code_point = 0;
736+
}
725737
#endif /* ENABLED (JERRY_ES2015) */
726738
}
727739

@@ -1091,7 +1103,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */
10911103
}
10921104
#endif /* ENABLED (JERRY_ES2015) */
10931105

1094-
if (*source_p >= LEXER_UTF8_4BYTE_START)
1106+
if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
10951107
{
10961108
/* Processing 4 byte unicode sequence (even if it is
10971109
* after a backslash). Always converted to two 3 byte
@@ -1893,7 +1905,7 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
18931905
}
18941906

18951907
#if ENABLED (JERRY_ES2015)
1896-
if (*source_p >= LEXER_UTF8_4BYTE_START)
1908+
if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
18971909
{
18981910
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
18991911

@@ -2113,7 +2125,7 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
21132125
}
21142126
#endif /* ENABLED (JERRY_ES2015) */
21152127

2116-
if (*source_p >= LEXER_UTF8_4BYTE_START)
2128+
if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
21172129
{
21182130
/* Processing 4 byte unicode sequence (even if it is
21192131
* after a backslash). Always converted to two 3 byte
@@ -3028,7 +3040,7 @@ lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier *
30283040

30293041
escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
30303042
}
3031-
else if (*left_p >= LEXER_UTF8_4BYTE_START)
3043+
else if (*left_p >= LIT_UTF8_4_BYTE_MARKER)
30323044
{
30333045
lit_four_byte_utf8_char_to_cesu8 (utf8_buf, left_p);
30343046
escape_size = 3 * 2;

jerry-core/parser/js/js-lexer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,6 @@ typedef enum
201201
#define LEXER_NEWLINE_LS_PS_BYTE_1 0xe2
202202
#define LEXER_NEWLINE_LS_PS_BYTE_23(source) \
203203
((source)[1] == LIT_UTF8_2_BYTE_CODE_POINT_MIN && ((source)[2] | 0x1) == 0xa9)
204-
#define LEXER_UTF8_4BYTE_START 0xf0
205204

206205
#define LEXER_IS_LEFT_BRACKET(type) \
207206
((type) == LEXER_LEFT_BRACE || (type) == LEXER_LEFT_PAREN || (type) == LEXER_LEFT_SQUARE)

tests/unit-core/test-api-errortype.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,17 @@ main (void)
6262
jerry_release_value (test_values[idx]);
6363
}
6464

65+
char test_source[] = "\xF0\x9D\x84\x9E";
66+
67+
jerry_value_t result = jerry_parse (NULL,
68+
0,
69+
(const jerry_char_t *) test_source,
70+
sizeof (test_source) - 1,
71+
JERRY_PARSE_NO_OPTS);
72+
TEST_ASSERT (jerry_value_is_error (result));
73+
TEST_ASSERT (jerry_get_error_type (result) == JERRY_ERROR_SYNTAX);
74+
75+
jerry_release_value (result);
76+
6577
jerry_cleanup ();
6678
} /* main */

0 commit comments

Comments
 (0)