@@ -1445,6 +1445,35 @@ _check_alias_and_seq(Py_UCS4* code, int with_named_seq)
14451445 return 1 ;
14461446}
14471447
1448+ static Py_UCS4
1449+ parse_hex_code (const char * name , int namelen )
1450+ {
1451+ if (namelen < 4 || namelen > 6 ) {
1452+ return (Py_UCS4 )- 1 ;
1453+ }
1454+ if (* name == '0' ) {
1455+ return (Py_UCS4 )- 1 ;
1456+ }
1457+ int v = 0 ;
1458+ while (namelen -- ) {
1459+ v *= 16 ;
1460+ Py_UCS1 c = Py_TOUPPER (* name );
1461+ if (c >= '0' && c <= '9' ) {
1462+ v += c - '0' ;
1463+ }
1464+ else if (c >= 'A' && c <= 'F' ) {
1465+ v += c - 'A' + 10 ;
1466+ }
1467+ else {
1468+ return (Py_UCS4 )- 1 ;
1469+ }
1470+ name ++ ;
1471+ }
1472+ if (v > 0x10ffff ) {
1473+ return (Py_UCS4 )- 1 ;
1474+ }
1475+ return v ;
1476+ }
14481477
14491478static int
14501479_getcode (const char * name , int namelen , Py_UCS4 * code )
@@ -1474,50 +1503,21 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
14741503 /* Check for CJK unified ideographs. */
14751504 if (PyOS_strnicmp (name , "CJK UNIFIED IDEOGRAPH-" , 22 ) == 0 ) {
14761505 /* Four or five hexdigits must follow. */
1477- unsigned int v ;
1478- v = 0 ;
1479- name += 22 ;
1480- namelen -= 22 ;
1481- if (namelen != 4 && namelen != 5 )
1506+ Py_UCS4 v = parse_hex_code (name + 22 , namelen - 22 );
1507+ if (!is_cjk_unified_ideograph (v )) {
14821508 return 0 ;
1483- while (namelen -- ) {
1484- v *= 16 ;
1485- Py_UCS1 c = Py_TOUPPER (* name );
1486- if (c >= '0' && c <= '9' )
1487- v += c - '0' ;
1488- else if (c >= 'A' && c <= 'F' )
1489- v += c - 'A' + 10 ;
1490- else
1491- return 0 ;
1492- name ++ ;
14931509 }
1494- if (!is_cjk_unified_ideograph (v ))
1495- return 0 ;
14961510 * code = v ;
14971511 return 1 ;
14981512 }
14991513
15001514 /* Check for Tangut ideographs. */
15011515 if (PyOS_strnicmp (name , "TANGUT IDEOGRAPH-" , 17 ) == 0 ) {
15021516 /* Five hexdigits must follow. */
1503- unsigned int v = 0 ;
1504- name += 17 ;
1505- namelen -= 17 ;
1506- if (namelen != 5 )
1517+ Py_UCS4 v = parse_hex_code (name + 17 , namelen - 17 );
1518+ if (!is_tangut_ideograph (v )) {
15071519 return 0 ;
1508- while (namelen -- ) {
1509- v *= 16 ;
1510- Py_UCS1 c = Py_TOUPPER (* name );
1511- if (c >= '0' && c <= '9' )
1512- v += c - '0' ;
1513- else if (c >= 'A' && c <= 'F' )
1514- v += c - 'A' + 10 ;
1515- else
1516- return 0 ;
1517- name ++ ;
15181520 }
1519- if (!is_tangut_ideograph (v ))
1520- return 0 ;
15211521 * code = v ;
15221522 return 1 ;
15231523 }
0 commit comments