Skip to content

Commit c09400c

Browse files
Share some common code.
1 parent accd150 commit c09400c

1 file changed

Lines changed: 33 additions & 33 deletions

File tree

Modules/unicodedata.c

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,35 @@ _check_alias_and_seq(Py_UCS4* code, int with_named_seq)
14451445
return 1;
14461446
}
14471447

1448+
static Py_UCS4
1449+
parse_hex_code(const char *name, int namelen)
1450+
{
1451+
if (namelen < 4 || namelen > 6) {
1452+
return (Py_UCS4)-1;
1453+
}
1454+
if (*name == '0') {
1455+
return (Py_UCS4)-1;
1456+
}
1457+
int v = 0;
1458+
while (namelen--) {
1459+
v *= 16;
1460+
Py_UCS1 c = Py_TOUPPER(*name);
1461+
if (c >= '0' && c <= '9') {
1462+
v += c - '0';
1463+
}
1464+
else if (c >= 'A' && c <= 'F') {
1465+
v += c - 'A' + 10;
1466+
}
1467+
else {
1468+
return (Py_UCS4)-1;
1469+
}
1470+
name++;
1471+
}
1472+
if (v > 0x10ffff) {
1473+
return (Py_UCS4)-1;
1474+
}
1475+
return v;
1476+
}
14481477

14491478
static int
14501479
_getcode(const char* name, int namelen, Py_UCS4* code)
@@ -1474,50 +1503,21 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
14741503
/* Check for CJK unified ideographs. */
14751504
if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
14761505
/* Four or five hexdigits must follow. */
1477-
unsigned int v;
1478-
v = 0;
1479-
name += 22;
1480-
namelen -= 22;
1481-
if (namelen != 4 && namelen != 5)
1506+
Py_UCS4 v = parse_hex_code(name + 22, namelen - 22);
1507+
if (!is_cjk_unified_ideograph(v)) {
14821508
return 0;
1483-
while (namelen--) {
1484-
v *= 16;
1485-
Py_UCS1 c = Py_TOUPPER(*name);
1486-
if (c >= '0' && c <= '9')
1487-
v += c - '0';
1488-
else if (c >= 'A' && c <= 'F')
1489-
v += c - 'A' + 10;
1490-
else
1491-
return 0;
1492-
name++;
14931509
}
1494-
if (!is_cjk_unified_ideograph(v))
1495-
return 0;
14961510
*code = v;
14971511
return 1;
14981512
}
14991513

15001514
/* Check for Tangut ideographs. */
15011515
if (PyOS_strnicmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
15021516
/* Five hexdigits must follow. */
1503-
unsigned int v = 0;
1504-
name += 17;
1505-
namelen -= 17;
1506-
if (namelen != 5)
1517+
Py_UCS4 v = parse_hex_code(name + 17, namelen - 17);
1518+
if (!is_tangut_ideograph(v)) {
15071519
return 0;
1508-
while (namelen--) {
1509-
v *= 16;
1510-
Py_UCS1 c = Py_TOUPPER(*name);
1511-
if (c >= '0' && c <= '9')
1512-
v += c - '0';
1513-
else if (c >= 'A' && c <= 'F')
1514-
v += c - 'A' + 10;
1515-
else
1516-
return 0;
1517-
name++;
15181520
}
1519-
if (!is_tangut_ideograph(v))
1520-
return 0;
15211521
*code = v;
15221522
return 1;
15231523
}

0 commit comments

Comments
 (0)