Skip to content

Commit 549a343

Browse files
committed
BUGFIX: ensure in cloud environments, only one content release is building at any given time
We usually rely on prunner to ensure that only one build is running at any given time. However, when running in a cloud environment with no shared storage, the prunner data folder is not shared between instances. In this case, during a deployment, two containers run concurrently, with two separate prunner instances (the old and the new one), which do not see each other. We could fix this in prunner itself, but this would be a bigger undertaking (different storage backends for prunner), or we can work around this in DecoupledContentStore. This is what we do here.
1 parent 4e7c162 commit 549a343

6 files changed

Lines changed: 102 additions & 1 deletion

File tree

Classes/Command/ContentReleasePrepareCommandController.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
namespace Flowpack\DecoupledContentStore\Command;
55

6+
use Flowpack\DecoupledContentStore\Core\ConcurrentBuildLockService;
67
use Flowpack\DecoupledContentStore\Core\Domain\ValueObject\PrunnerJobId;
78
use Flowpack\DecoupledContentStore\PrepareContentRelease\Infrastructure\RedisContentReleaseService;
89
use Neos\Flow\Annotations as Flow;
@@ -21,6 +22,12 @@ class ContentReleasePrepareCommandController extends CommandController
2122
*/
2223
protected $redisContentReleaseService;
2324

25+
/**
26+
* @Flow\Inject
27+
* @var ConcurrentBuildLockService
28+
*/
29+
protected $concurrentBuildLock;
30+
2431
public function createContentReleaseCommand(string $contentReleaseIdentifier, string $prunnerJobId)
2532
{
2633
$contentReleaseIdentifier = ContentReleaseIdentifier::fromString($contentReleaseIdentifier);
@@ -30,6 +37,13 @@ public function createContentReleaseCommand(string $contentReleaseIdentifier, st
3037
$this->redisContentReleaseService->createContentRelease($contentReleaseIdentifier, $prunnerJobId, $logger);
3138
}
3239

40+
public function ensureAllOtherInProgressContentReleasesWillBeTerminatedCommand(string $contentReleaseIdentifier)
41+
{
42+
$contentReleaseIdentifier = ContentReleaseIdentifier::fromString($contentReleaseIdentifier);
43+
44+
$this->concurrentBuildLock->ensureAllOtherInProgressContentReleasesWillBeTerminated($contentReleaseIdentifier);
45+
}
46+
3347
public function registerManualTransferJobCommand(string $contentReleaseIdentifier, string $prunnerJobId)
3448
{
3549
$contentReleaseIdentifier = ContentReleaseIdentifier::fromString($contentReleaseIdentifier);
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<?php
2+
3+
namespace Flowpack\DecoupledContentStore\Core;
4+
5+
use Flowpack\DecoupledContentStore\Core\Domain\ValueObject\ContentReleaseIdentifier;
6+
use Flowpack\DecoupledContentStore\Core\Infrastructure\RedisClientManager;
7+
use Neos\Flow\Annotations as Flow;
8+
9+
/**
10+
* We usually rely on prunner to ensure that only one build is running at any given time.
11+
*
12+
* However, when running in a cloud environment with no shared storage, the prunner data folder is not shared between
13+
* instances. In this case, during a deployment, two containers run concurrently, with two separate prunner instances
14+
* (the old and the new one), which do not see each other.
15+
*
16+
* We could fix this in prunner itself, but this would be a bigger undertaking (different storage backends for prunner),
17+
* or we can work around this in DecoupledContentStore. This is what this class does.
18+
*
19+
* ## Main Idea
20+
*
21+
* - We use a special redis key "contentStore:concurrentBuildLock" which is set to the current being-built release ID in
22+
* `./flow contentReleasePrepare:ensureAllOtherInProgressContentReleasesWillBeTerminated`
23+
* - In the "Enumerate" and "Render" phases, we periodically check whether the concurrentBuildLock is set to the currently
24+
* in-progress content release. If NO, we abort.
25+
*
26+
* @Flow\Scope("singleton")
27+
*/
28+
class ConcurrentBuildLockService
29+
{
30+
31+
/**
32+
* @Flow\Inject
33+
* @var RedisClientManager
34+
*/
35+
protected $redisClientManager;
36+
37+
public function ensureAllOtherInProgressContentReleasesWillBeTerminated(ContentReleaseIdentifier $contentReleaseIdentifier)
38+
{
39+
$this->redisClientManager->getPrimaryRedis()->set('contentStore:concurrentBuildLock', $contentReleaseIdentifier->getIdentifier());
40+
}
41+
42+
public function assertNoOtherContentReleaseWasStarted(ContentReleaseIdentifier $contentReleaseIdentifier)
43+
{
44+
$concurrentBuildLock = ContentReleaseIdentifier::fromString($this->redisClientManager->getPrimaryRedis()->get('contentStore:concurrentBuildLock'));
45+
46+
if (!$contentReleaseIdentifier->equals($concurrentBuildLock)) {
47+
// the concurrent build lock is different (i.e. newer) than our currently-running content release.
48+
// Thus, we abort the in-progress content release as quickly as we can - by DYING.
49+
50+
echo '!!!!! Hard-aborting the current job ' . $concurrentBuildLock->getIdentifier() . ' because the concurrentBuildLock contains ' . $concurrentBuildLock->getIdentifier() . "\n\n";
51+
echo "This is no error during deployment, but should never happen outside a deployment.\n\n It can only happen if two prunner instances run concurrently.\n\n";
52+
exit(1);
53+
}
54+
}
55+
56+
}

Classes/NodeEnumeration/NodeEnumerator.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Flowpack\DecoupledContentStore\NodeEnumeration;
44

55

6+
use Flowpack\DecoupledContentStore\Core\ConcurrentBuildLockService;
67
use Flowpack\DecoupledContentStore\Core\Domain\ValueObject\ContentReleaseIdentifier;
78
use Flowpack\DecoupledContentStore\Core\Domain\ValueObject\RedisInstanceIdentifier;
89
use Flowpack\DecoupledContentStore\Core\Infrastructure\ContentReleaseLogger;
@@ -37,6 +38,12 @@ class NodeEnumerator
3738
*/
3839
protected $redisContentReleaseService;
3940

41+
/**
42+
* @Flow\Inject
43+
* @var ConcurrentBuildLockService
44+
*/
45+
protected $concurrentBuildLockService;
46+
4047
/**
4148
* @Flow\InjectConfiguration("nodeRendering.nodeTypeWhitelist")
4249
* @var string
@@ -55,6 +62,7 @@ public function enumerateAndStoreInRedis(?Site $site, ContentReleaseLogger $cont
5562

5663
$this->redisEnumerationRepository->clearDocumentNodesEnumeration($releaseIdentifier);
5764
foreach (GeneratorUtility::createArrayBatch($this->enumerateAll($site, $contentReleaseLogger), 100) as $enumeration) {
65+
$this->concurrentBuildLockService->assertNoOtherContentReleaseWasStarted($releaseIdentifier);
5866
// $enumeration is an array of EnumeratedNode, with at most 100 elements in it.
5967
// TODO: EXTENSION POINT HERE, TO ADD ADDITIONAL ENUMERATIONS (.metadata.json f.e.)
6068
// TODO: not yet fully sure how to handle Enumeration

Classes/NodeRendering/NodeRenderOrchestrator.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Flowpack\DecoupledContentStore\NodeRendering;
66

7+
use Flowpack\DecoupledContentStore\Core\ConcurrentBuildLockService;
78
use Flowpack\DecoupledContentStore\Core\Domain\ValueObject\RedisInstanceIdentifier;
89
use Flowpack\DecoupledContentStore\NodeRendering\Dto\DocumentNodeCacheKey;
910
use Flowpack\DecoupledContentStore\NodeRendering\Dto\RenderingStatistics;
@@ -93,6 +94,12 @@ class NodeRenderOrchestrator
9394
*/
9495
protected $redisContentReleaseService;
9596

97+
/**
98+
* @Flow\Inject
99+
* @var ConcurrentBuildLockService
100+
*/
101+
protected $concurrentBuildLockService;
102+
96103
private const EXIT_ERRORSTATUSCODE_RELEASE_ALREADY_COMPLETED = 1;
97104
private const EXIT_ERRORSTATUSCODE_EMPTY_ENUMERATION = 2;
98105
private const EXIT_ERRORSTATUSCODE_RETRY_LIMIT_REACHED = 3;
@@ -140,6 +147,7 @@ public function renderContentRelease(ContentReleaseIdentifier $contentReleaseIde
140147
}
141148

142149
$contentReleaseLogger->info('Starting iteration ' . $i);
150+
$this->concurrentBuildLockService->assertNoOtherContentReleaseWasStarted($contentReleaseIdentifier);
143151

144152
$this->redisRenderingStatisticsStore->addStatisticsIteration($contentReleaseIdentifier, RenderingStatistics::create(0, 0, []));
145153

@@ -200,11 +208,12 @@ public function renderContentRelease(ContentReleaseIdentifier $contentReleaseIde
200208
$jobsWorkedThroughOverLastTenSeconds = $previousRemainingJobs - $remainingJobsCount;
201209
$renderingsPerSecondDataPoints[] = $jobsWorkedThroughOverLastTenSeconds / 10;
202210

203-
204211
$contentReleaseLogger->debug('Waiting... ', [
205212
'numberOfQueuedJobs' => $remainingJobsCount,
206213
'numberOfRenderingsInProgress' => $this->redisRenderingQueue->numberOfRenderingsInProgress($contentReleaseIdentifier),
207214
]);
215+
216+
$this->concurrentBuildLockService->assertNoOtherContentReleaseWasStarted($contentReleaseIdentifier);
208217
}
209218
}
210219

Classes/NodeRendering/NodeRenderer.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
namespace Flowpack\DecoupledContentStore\NodeRendering;
66

77
use Flowpack\DecoupledContentStore\ContentReleaseManager;
8+
use Flowpack\DecoupledContentStore\Core\ConcurrentBuildLockService;
89
use Flowpack\DecoupledContentStore\NodeRendering\ProcessEvents\DocumentRenderedEvent;
910
use Flowpack\DecoupledContentStore\NodeRendering\ProcessEvents\ExitEvent;
1011
use Flowpack\DecoupledContentStore\NodeRendering\ProcessEvents\QueueEmptyEvent;
@@ -112,6 +113,12 @@ class NodeRenderer
112113
*/
113114
protected $persistenceManager;
114115

116+
/**
117+
* @Flow\Inject
118+
* @var ConcurrentBuildLockService
119+
*/
120+
protected $concurrentBuildLockService;
121+
115122

116123
public function render(ContentReleaseIdentifier $contentReleaseIdentifier, ContentReleaseLogger $contentReleaseLogger, RendererIdentifier $rendererIdentifier)
117124
{
@@ -133,6 +140,7 @@ public function render(ContentReleaseIdentifier $contentReleaseIdentifier, Conte
133140
// determining what needs to be done. We just need to wait a bit and retry.
134141
$contentReleaseLogger->debug('Rendering queue currently empty; we wait a bit see if there is work for us.');
135142
sleep(2);
143+
$this->concurrentBuildLockService->assertNoOtherContentReleaseWasStarted($contentReleaseIdentifier);
136144
continue;
137145
}
138146

@@ -149,6 +157,11 @@ public function render(ContentReleaseIdentifier $contentReleaseIdentifier, Conte
149157
yield DocumentRenderedEvent::create();
150158

151159
$i++;
160+
161+
if ($i % 5 === 0) {
162+
$this->concurrentBuildLockService->assertNoOtherContentReleaseWasStarted($contentReleaseIdentifier);
163+
}
164+
152165
if ($i % 20 === 0) {
153166
$contentReleaseLogger->info('Restarting after 20 renders.');
154167
yield ExitEvent::createWithStatusCode(193);

pipelines_template.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ pipelines:
3333
prepare_finished:
3434
script:
3535
- ./flow contentReleasePrepare:createContentRelease {{ .contentReleaseId }} {{ .__jobID }}
36+
- ./flow contentReleasePrepare:ensureAllOtherInProgressContentReleasesWillBeTerminated {{ .contentReleaseId }}
3637

3738
################################################################################
3839
# 1) ENUMERATION

0 commit comments

Comments
 (0)