Skip to content

Commit caf7182

Browse files
committed
gh-145668: Optimize FOR_ITER virtual iterators for bytes, bytearray, and str
1 parent 149c465 commit caf7182

2 files changed

Lines changed: 54 additions & 7 deletions

File tree

Python/bytecodes.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3277,8 +3277,10 @@ dummy_func(
32773277
_Py_GatherStats_GetIter(iterable);
32783278
#endif
32793279
PyTypeObject *tp = PyStackRef_TYPE(iterable);
3280-
if (tp == &PyTuple_Type || tp == &PyList_Type) {
3281-
/* Leave iterable on stack and pushed tagged 0 */
3280+
if (tp == &PyTuple_Type || tp == &PyList_Type ||
3281+
tp == &PyBytes_Type || tp == &PyByteArray_Type ||
3282+
tp == &PyUnicode_Type) {
3283+
/* Leave iterable on stack and push tagged 0 */
32823284
iter = iterable;
32833285
DEAD(iterable);
32843286
index_or_null = PyStackRef_TagInt(0);

Python/ceval.c

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3704,20 +3704,65 @@ static _PyStackRef
37043704
foriter_next(PyObject *seq, _PyStackRef index)
37053705
{
37063706
assert(PyStackRef_IsTaggedInt(index));
3707-
assert(PyTuple_CheckExact(seq) || PyList_CheckExact(seq));
37083707
intptr_t i = PyStackRef_UntagInt(index);
3708+
37093709
if (PyTuple_CheckExact(seq)) {
37103710
size_t size = PyTuple_GET_SIZE(seq);
37113711
if ((size_t)i >= size) {
37123712
return PyStackRef_NULL;
37133713
}
37143714
return PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, i));
37153715
}
3716-
PyObject *item = _PyList_GetItemRef((PyListObject *)seq, i);
3717-
if (item == NULL) {
3718-
return PyStackRef_NULL;
3716+
3717+
if (PyList_CheckExact(seq)) {
3718+
PyObject *item = _PyList_GetItemRef((PyListObject *)seq, i);
3719+
if (item == NULL) {
3720+
return PyStackRef_NULL;
3721+
}
3722+
return PyStackRef_FromPyObjectSteal(item);
37193723
}
3720-
return PyStackRef_FromPyObjectSteal(item);
3724+
3725+
if (PyBytes_CheckExact(seq)) {
3726+
Py_ssize_t size = PyBytes_GET_SIZE(seq);
3727+
if (i < 0 || i >= size) {
3728+
return PyStackRef_NULL;
3729+
}
3730+
unsigned char ch = (unsigned char)PyBytes_AS_STRING(seq)[i];
3731+
PyObject *item = PyLong_FromUnsignedLong((unsigned long)ch);
3732+
if (item == NULL) {
3733+
return PyStackRef_ERROR;
3734+
}
3735+
return PyStackRef_FromPyObjectSteal(item);
3736+
}
3737+
3738+
if (PyByteArray_CheckExact(seq)) {
3739+
Py_ssize_t size = PyByteArray_GET_SIZE(seq);
3740+
if (i < 0 || i >= size) {
3741+
return PyStackRef_NULL;
3742+
}
3743+
unsigned char ch = (unsigned char)PyByteArray_AS_STRING(seq)[i];
3744+
PyObject *item = PyLong_FromUnsignedLong((unsigned long)ch);
3745+
if (item == NULL) {
3746+
return PyStackRef_ERROR;
3747+
}
3748+
return PyStackRef_FromPyObjectSteal(item);
3749+
}
3750+
3751+
if (PyUnicode_CheckExact(seq)) {
3752+
Py_ssize_t size = PyUnicode_GET_LENGTH(seq);
3753+
if (i < 0 || i >= size) {
3754+
return PyStackRef_NULL;
3755+
}
3756+
// Iteration over str yields 1-character substrings.
3757+
PyObject *item = PyUnicode_Substring(seq, i, i + 1);
3758+
if (item == NULL) {
3759+
return PyStackRef_ERROR;
3760+
}
3761+
return PyStackRef_FromPyObjectSteal(item);
3762+
}
3763+
3764+
// Fallback: use the iterator protocol for unsupported types.
3765+
return PyStackRef_ERROR;
37213766
}
37223767

37233768
_PyStackRef _PyForIter_VirtualIteratorNext(PyThreadState* tstate, _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef* index_ptr)

0 commit comments

Comments
 (0)