Skip to content

Commit 2960dff

Browse files
LaszloLangorerobika
authored andcommitted
Support all literals in 'jerry_get_literals_from_snapshot' when C format is used. (#3472)
The literals must be saved in hex format to support cases with special characters, like `var s = 'hello",\n"world';` JerryScript-DCO-1.0-Signed-off-by: László Langó lango@inf.u-szeged.hu
1 parent 08da8bc commit 2960dff

2 files changed

Lines changed: 41 additions & 94 deletions

File tree

jerry-core/api/jerry-snapshot.c

Lines changed: 25 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,70 +1551,6 @@ jerry_append_number_to_buffer (uint8_t *buffer_p, /**< buffer */
15511551
utf8_str_size);
15521552
} /* jerry_append_number_to_buffer */
15531553

1554-
/**
1555-
* Check whether the passed ecma-string is a valid identifier.
1556-
*
1557-
* @return true - if the ecma-string is a valid identifier,
1558-
* false - otherwise
1559-
*/
1560-
static bool
1561-
ecma_string_is_valid_identifier (const ecma_string_t *string_p)
1562-
{
1563-
ECMA_STRING_TO_UTF8_STRING (string_p, str_buffer_p, str_buffer_size);
1564-
1565-
const uint8_t *str_p = str_buffer_p;
1566-
const uint8_t *str_end_p = str_buffer_p + str_buffer_size;
1567-
1568-
while (str_p < str_end_p)
1569-
{
1570-
lit_code_point_t code_point = *str_p;
1571-
lit_utf8_size_t utf8_length = 1;
1572-
1573-
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
1574-
{
1575-
utf8_length = lit_read_code_point_from_utf8 (str_p,
1576-
(lit_utf8_size_t) (str_end_p - str_p),
1577-
&code_point);
1578-
1579-
#if ENABLED (JERRY_ES2015)
1580-
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
1581-
&& str_p + 3 < str_end_p)
1582-
{
1583-
lit_code_point_t low_surrogate;
1584-
lit_read_code_point_from_utf8 (str_p + 3,
1585-
(lit_utf8_size_t) (str_end_p - (str_p + 3)),
1586-
&low_surrogate);
1587-
1588-
if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
1589-
{
1590-
code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
1591-
(ecma_char_t) low_surrogate);
1592-
utf8_length = 2 * 3;
1593-
}
1594-
}
1595-
#endif /* ENABLED (JERRY_ES2015) */
1596-
}
1597-
1598-
if (str_p == str_buffer_p)
1599-
{
1600-
if (!lit_code_point_is_identifier_start (code_point))
1601-
{
1602-
break;
1603-
}
1604-
}
1605-
else if (!lit_code_point_is_identifier_part (code_point))
1606-
{
1607-
break;
1608-
}
1609-
1610-
str_p += utf8_length;
1611-
}
1612-
1613-
ECMA_FINALIZE_UTF8_STRING (str_buffer_p, str_buffer_size);
1614-
1615-
return str_p == str_end_p;
1616-
} /* ecma_string_is_valid_identifier */
1617-
16181554
#endif /* ENABLED (JERRY_SNAPSHOT_SAVE) */
16191555

16201556
/**
@@ -1667,14 +1603,7 @@ jerry_get_literals_from_snapshot (const uint32_t *snapshot_p, /**< input snapsho
16671603
{
16681604
ecma_string_t *literal_p = ecma_get_string_from_value (buffer_p[i]);
16691605

1670-
/* NOTE:
1671-
* We don't save a literal (in C format) which isn't a valid
1672-
* identifier or it's a magic string.
1673-
* TODO:
1674-
* Save all of the literals in C format as well.
1675-
*/
1676-
if (ecma_get_string_magic (literal_p) == LIT_MAGIC_STRING__COUNT
1677-
&& (!is_c_format || ecma_string_is_valid_identifier (literal_p)))
1606+
if (ecma_get_string_magic (literal_p) == LIT_MAGIC_STRING__COUNT)
16781607
{
16791608
literal_count++;
16801609
}
@@ -1702,14 +1631,7 @@ jerry_get_literals_from_snapshot (const uint32_t *snapshot_p, /**< input snapsho
17021631
{
17031632
ecma_string_t *literal_p = ecma_get_string_from_value (buffer_p[i]);
17041633

1705-
/* NOTE:
1706-
* We don't save a literal (in C format) which isn't a valid
1707-
* identifier or it's a magic string.
1708-
* TODO:
1709-
* Save all of the literals in C format as well.
1710-
*/
1711-
if (ecma_get_string_magic (literal_p) == LIT_MAGIC_STRING__COUNT
1712-
&& (!is_c_format || ecma_string_is_valid_identifier (literal_p)))
1634+
if (ecma_get_string_magic (literal_p) == LIT_MAGIC_STRING__COUNT)
17131635
{
17141636
literal_array[literal_idx++] = literal_p;
17151637
}
@@ -1743,7 +1665,29 @@ jerry_get_literals_from_snapshot (const uint32_t *snapshot_p, /**< input snapsho
17431665
for (lit_utf8_size_t i = 0; i < literal_count; i++)
17441666
{
17451667
lit_buf_p = jerry_append_chars_to_buffer (lit_buf_p, buffer_end_p, " \"", 0);
1746-
lit_buf_p = jerry_append_ecma_string_to_buffer (lit_buf_p, buffer_end_p, literal_array[i]);
1668+
ECMA_STRING_TO_UTF8_STRING (literal_array[i], str_buffer_p, str_buffer_size);
1669+
for (lit_utf8_size_t j = 0; j < str_buffer_size; j++)
1670+
{
1671+
uint8_t byte = str_buffer_p[j];
1672+
if (byte < 32 || byte > 127)
1673+
{
1674+
lit_buf_p = jerry_append_chars_to_buffer (lit_buf_p, buffer_end_p, "\\x", 0);
1675+
ecma_char_t hex_digit = (ecma_char_t) (byte >> 4);
1676+
*lit_buf_p++ = (lit_utf8_byte_t) ((hex_digit > 9) ? (hex_digit + ('A' - 10)) : (hex_digit + '0'));
1677+
hex_digit = (lit_utf8_byte_t) (byte & 0xf);
1678+
*lit_buf_p++ = (lit_utf8_byte_t) ((hex_digit > 9) ? (hex_digit + ('A' - 10)) : (hex_digit + '0'));
1679+
}
1680+
else
1681+
{
1682+
if (byte == '\\' || byte == '"')
1683+
{
1684+
*lit_buf_p++ = '\\';
1685+
}
1686+
*lit_buf_p++ = byte;
1687+
}
1688+
}
1689+
1690+
ECMA_FINALIZE_UTF8_STRING (str_buffer_p, str_buffer_size);
17471691
lit_buf_p = jerry_append_chars_to_buffer (lit_buf_p, buffer_end_p, "\"", 0);
17481692

17491693
if (i < literal_count - 1)

tests/unit-core/test-snapshot.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ main (void)
400400

401401
static jerry_char_t literal_buffer_c[LITERAL_BUFFER_SIZE];
402402
static uint32_t literal_snapshot_buffer[SNAPSHOT_BUFFER_SIZE];
403-
static const jerry_char_t code_for_c_format[] = "var object = { aa:'fo o', Bb:'max', aaa:'xzy0' };";
403+
static const jerry_char_t code_for_c_format[] = "var object = { aa:'fo\" o\\n \\\\', Bb:'max', aaa:'xzy0' };";
404404

405405
jerry_value_t generate_result;
406406
generate_result = jerry_generate_snapshot (NULL,
@@ -411,35 +411,37 @@ main (void)
411411
literal_snapshot_buffer,
412412
SNAPSHOT_BUFFER_SIZE);
413413

414-
TEST_ASSERT (!jerry_value_is_error (generate_result)
415-
&& jerry_value_is_number (generate_result));
414+
TEST_ASSERT (!jerry_value_is_error (generate_result));
415+
TEST_ASSERT (jerry_value_is_number (generate_result));
416416

417417
size_t snapshot_size = (size_t) jerry_get_number_value (generate_result);
418418
jerry_release_value (generate_result);
419-
TEST_ASSERT (snapshot_size == 120);
419+
TEST_ASSERT (snapshot_size == 124);
420420

421421
const size_t lit_c_buf_sz = jerry_get_literals_from_snapshot (literal_snapshot_buffer,
422422
snapshot_size,
423423
literal_buffer_c,
424424
LITERAL_BUFFER_SIZE,
425425
true);
426-
TEST_ASSERT (lit_c_buf_sz == 200);
426+
TEST_ASSERT (lit_c_buf_sz == 239);
427427

428428
static const char *expected_c_format = (
429-
"jerry_length_t literal_count = 4;\n\n"
430-
"jerry_char_t *literals[4] =\n"
429+
"jerry_length_t literal_count = 5;\n\n"
430+
"jerry_char_t *literals[5] =\n"
431431
"{\n"
432432
" \"Bb\",\n"
433433
" \"aa\",\n"
434434
" \"aaa\",\n"
435-
" \"xzy0\"\n"
435+
" \"xzy0\",\n"
436+
" \"fo\\\" o\\x0A \\\\\"\n"
436437
"};\n\n"
437-
"jerry_length_t literal_sizes[4] =\n"
438+
"jerry_length_t literal_sizes[5] =\n"
438439
"{\n"
439440
" 2 /* Bb */,\n"
440441
" 2 /* aa */,\n"
441442
" 3 /* aaa */,\n"
442-
" 4 /* xzy0 */\n"
443+
" 4 /* xzy0 */,\n"
444+
" 8 /* fo\" o\n \\ */\n"
443445
"};\n"
444446
);
445447

@@ -452,9 +454,10 @@ main (void)
452454
literal_buffer_list,
453455
LITERAL_BUFFER_SIZE,
454456
false);
455-
456-
TEST_ASSERT (lit_list_buf_sz == 30);
457-
TEST_ASSERT (!strncmp ((char *) literal_buffer_list, "2 Bb\n2 aa\n3 aaa\n4 fo o\n4 xzy0\n", lit_list_buf_sz));
457+
TEST_ASSERT (lit_list_buf_sz == 34);
458+
TEST_ASSERT (!strncmp ((char *) literal_buffer_list,
459+
"2 Bb\n2 aa\n3 aaa\n4 xzy0\n8 fo\" o\n \\\n",
460+
lit_list_buf_sz));
458461

459462
jerry_cleanup ();
460463
}

0 commit comments

Comments
 (0)