Skip to content

Commit 910caae

Browse files
authored
use fast path in blendSegment to bump up FPS (#5464)
* use fast path in blendSegment to bump up FPS
1 parent 35ce05a commit 910caae

File tree

1 file changed

+63
-6
lines changed

1 file changed

+63
-6
lines changed

wled00/FX_fcn.cpp

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,6 +1367,7 @@ static uint8_t _dummy (uint8_t a, uint8_t b) { return a; } // dummy (same as
13671367
void WS2812FX::blendSegment(const Segment &topSegment) const {
13681368
typedef uint8_t(*FuncType)(uint8_t, uint8_t);
13691369
// function pointer array: fill with _dummy if using special case: avoid OOB access and always provide a valid path
1370+
// note: making the function array static const uses more ram and comes at no significant speed gain
13701371
FuncType funcs[] = {
13711372
_dummy, _dummy, _dummy, _subtract,
13721373
_difference, _average, _dummy, _divide,
@@ -1398,14 +1399,71 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
13981399
const size_t matrixSize = Segment::maxWidth * Segment::maxHeight;
13991400
const size_t startIndx = XY(topSegment.start, topSegment.startY);
14001401
const size_t stopIndx = startIndx + length;
1401-
const unsigned progress = topSegment.progress();
1402-
const unsigned progInv = 0xFFFFU - progress;
14031402
uint8_t opacity = topSegment.currentBri(); // returns transitioned opacity for style FADE
14041403
uint8_t cct = topSegment.currentCCT();
14051404
if (gammaCorrectCol) opacity = gamma8inv(opacity); // use inverse gamma on brightness for correct color scaling after gamma correction (see #5343 for details)
14061405

1407-
Segment::setClippingRect(0, 0); // disable clipping by default
1406+
const Segment *segO = topSegment.getOldSegment();
1407+
const bool hasGrouping = topSegment.groupLength() != 1;
14081408

1409+
// fast path: handle the default case - no transitions, no grouping/spacing, no mirroring, no CCT
1410+
if (!segO && blendingStyle == TRANSITION_FADE && !hasGrouping && !topSegment.mirror && !topSegment.mirror_y) {
1411+
if (isMatrix && stopIndx <= matrixSize && !_pixelCCT) {
1412+
#ifndef WLED_DISABLE_2D
1413+
// Calculate pointer steps to avoid 'if' and 'XY()' inside loops
1414+
int x_inc = 1;
1415+
int y_inc = Segment::maxWidth;
1416+
int start_offset = XY(topSegment.start, topSegment.startY);
1417+
1418+
// adjust starting position and steps based on Reverse/Transpose
1419+
// note: transpose is handled in separate loop so it is still fast and no branching is needed in default path
1420+
if (!topSegment.transpose) {
1421+
if (topSegment.reverse) { start_offset += (width - 1); x_inc = -1; }
1422+
if (topSegment.reverse_y) { start_offset += (height - 1) * Segment::maxWidth; y_inc = -Segment::maxWidth; }
1423+
1424+
for (int y = 0; y < height; y++) {
1425+
uint32_t* pRow = &_pixels[start_offset + y * y_inc];
1426+
const int y_width = y * width;
1427+
for (int x = 0; x < width; x++) {
1428+
uint32_t* p = pRow + x * x_inc;
1429+
uint32_t c_a = topSegment.getPixelColorRaw(x + y_width);
1430+
*p = color_blend(*p, segblend(c_a, *p), opacity);
1431+
}
1432+
}
1433+
} else { // transposed
1434+
for (int y = 0; y < height; y++) {
1435+
const int px = topSegment.reverse ? (height - y - 1) : y; // source pixel: swap y into x, reverse if needed
1436+
for (int x = 0; x < width; x++) {
1437+
const int py = topSegment.reverse_y ? (width - x - 1) : x; // source pixel: swap x into y, reverse if needed
1438+
const uint32_t c_a = topSegment.getPixelColorRaw(px + py * height); // height = virtual width
1439+
const size_t idx = XY(topSegment.start + x, topSegment.startY + y); // write logical (non swapped) pixel coordinate
1440+
_pixels[idx] = color_blend(_pixels[idx], segblend(c_a, _pixels[idx]), opacity);
1441+
}
1442+
}
1443+
}
1444+
return;
1445+
#endif
1446+
} else if (!isMatrix) {
1447+
// 1D fast path, include CCT as it is more common on 1D setups
1448+
uint32_t* strip = _pixels;
1449+
int start = topSegment.start;
1450+
int off = topSegment.offset;
1451+
for (int i = 0; i < length; i++) {
1452+
uint32_t c_a = topSegment.getPixelColorRaw(i);
1453+
int p = topSegment.reverse ? (length - i - 1) : i;
1454+
int idx = start + p + off;
1455+
if (idx >= topSegment.stop) idx -= length;
1456+
strip[idx] = color_blend(strip[idx], segblend(c_a, strip[idx]), opacity);
1457+
if (_pixelCCT) _pixelCCT[idx] = cct;
1458+
}
1459+
return;
1460+
}
1461+
}
1462+
1463+
// slow path: handle transitions, grouping/spacing, segments with clipping and CCT pixels
1464+
Segment::setClippingRect(0, 0); // disable clipping by default
1465+
const unsigned progress = topSegment.progress();
1466+
const unsigned progInv = 0xFFFFU - progress;
14091467
const unsigned dw = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * width / 0xFFFFU + 1;
14101468
const unsigned dh = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * height / 0xFFFFU + 1;
14111469
const unsigned orgBS = blendingStyle;
@@ -1466,7 +1524,6 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
14661524
#ifndef WLED_DISABLE_2D
14671525
const int nCols = topSegment.virtualWidth();
14681526
const int nRows = topSegment.virtualHeight();
1469-
const Segment *segO = topSegment.getOldSegment();
14701527
const int oCols = segO ? segO->virtualWidth() : nCols;
14711528
const int oRows = segO ? segO->virtualHeight() : nRows;
14721529

@@ -1562,8 +1619,8 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
15621619
}
15631620
#endif
15641621
} else {
1622+
// 1D Slow Path
15651623
const int nLen = topSegment.virtualLength();
1566-
const Segment *segO = topSegment.getOldSegment();
15671624
const int oLen = segO ? segO->virtualLength() : nLen;
15681625

15691626
const auto setMirroredPixel = [&](int i, uint32_t c, uint8_t o) {
@@ -1643,7 +1700,7 @@ void WS2812FX::show() {
16431700

16441701
if (realtimeMode == REALTIME_MODE_INACTIVE || useMainSegmentOnly || realtimeOverride > REALTIME_OVERRIDE_NONE) {
16451702
// clear frame buffer
1646-
for (size_t i = 0; i < totalLen; i++) _pixels[i] = BLACK; // memset(_pixels, 0, sizeof(uint32_t) * getLengthTotal());
1703+
memset(_pixels, 0, sizeof(uint32_t) * totalLen);
16471704
// blend all segments into (cleared) buffer
16481705
for (Segment &seg : _segments) if (seg.isActive() && (seg.on || seg.isInTransition())) {
16491706
blendSegment(seg); // blend segment's buffer into frame buffer

0 commit comments

Comments
 (0)