Skip to content

Commit 3a786ef

Browse files
authored
Merge pull request #154 from kdambekalns/fulltext-removed-hidden
BUGFIX: Correctly handle removed/hidden nodes in fulltext index
2 parents aa9769d + f6c06fd commit 3a786ef

1 file changed

Lines changed: 92 additions & 50 deletions

File tree

  • Classes/Flowpack/ElasticSearch/ContentRepositoryAdaptor/Indexer

Classes/Flowpack/ElasticSearch/ContentRepositoryAdaptor/Indexer/NodeIndexer.php

Lines changed: 92 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ public function getIndex()
134134
}
135135

136136
/**
137-
* index this node, and add it to the current bulk request.
137+
* Index this node, and add it to the current bulk request.
138138
*
139139
* @param NodeInterface $node
140140
* @param string $targetWorkspaceName In case this is triggered during publishing, a workspace name will be passed in
@@ -163,20 +163,20 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
163163
$contextPath = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $contextPath);
164164
}
165165

166-
$contextPathHash = sha1($contextPath);
166+
$documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName);
167167
$nodeType = $node->getNodeType();
168168

169169
$mappingType = $this->getIndex()->findType(NodeTypeMappingBuilder::convertNodeTypeNameToMappingName($nodeType));
170170

171171
if ($this->bulkProcessing === false) {
172-
// Remove document with the same contextPathHash but different NodeType, required after NodeType change
173-
$this->logger->log(sprintf('NodeIndexer: Search and remove duplicate document if needed. ID: %s', $contextPath, $node->getNodeType()->getName(), $contextPathHash), LOG_DEBUG, null, 'ElasticSearch (CR)');
172+
// Remove document with the same documentIdentifier but different NodeType, required after NodeType change
173+
$this->logger->log(sprintf('NodeIndexer (%s): Search and remove duplicate document for node %s (%s) if needed.', $documentIdentifier, $contextPath, $node->getIdentifier()), LOG_DEBUG, null, 'ElasticSearch (CR)');
174174
$this->getIndex()->request('DELETE', '/_query', [], json_encode([
175175
'query' => [
176176
'bool' => [
177177
'must' => [
178178
'ids' => [
179-
'values' => [$contextPathHash]
179+
'values' => [$documentIdentifier]
180180
]
181181
],
182182
'must_not' => [
@@ -189,23 +189,15 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
189189
]));
190190
}
191191

192-
if ($node->isRemoved()) {
193-
// TODO: handle deletion from the fulltext index as well
194-
$mappingType->deleteDocumentById($contextPathHash);
195-
$this->logger->log(sprintf('NodeIndexer: Removed node %s from index (node flagged as removed). ID: %s', $contextPath, $contextPathHash), LOG_DEBUG, null, 'ElasticSearch (CR)');
196-
197-
return;
198-
}
199-
200192
$logger = $this->logger;
201193
$fulltextIndexOfNode = [];
202-
$nodePropertiesToBeStoredInIndex = $this->extractPropertiesAndFulltext($node, $fulltextIndexOfNode, function ($propertyName) use ($logger, $contextPathHash) {
203-
$logger->log(sprintf('NodeIndexer (%s) - Property "%s" not indexed because no configuration found.', $contextPathHash, $propertyName), LOG_DEBUG, null, 'ElasticSearch (CR)');
194+
$nodePropertiesToBeStoredInIndex = $this->extractPropertiesAndFulltext($node, $fulltextIndexOfNode, function ($propertyName) use ($logger, $documentIdentifier, $node) {
195+
$logger->log(sprintf('NodeIndexer (%s) - Property "%s" not indexed because no configuration found, node type %s.', $documentIdentifier, $propertyName, $node->getNodeType()->getName()), LOG_DEBUG, null, 'ElasticSearch (CR)');
204196
});
205197

206198
$document = new ElasticSearchDocument($mappingType,
207199
$nodePropertiesToBeStoredInIndex,
208-
$contextPathHash
200+
$documentIdentifier
209201
);
210202

211203
$documentData = $document->getData();
@@ -263,62 +255,92 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
263255
$this->updateFulltext($node, $fulltextIndexOfNode, $targetWorkspaceName);
264256
}
265257

266-
$this->logger->log(sprintf('NodeIndexer: Added / updated node %s. ID: %s Context: %s', $contextPath, $contextPathHash, json_encode($node->getContext()->getProperties())), LOG_DEBUG, null, 'ElasticSearch (CR)');
258+
$this->logger->log(sprintf('NodeIndexer (%s): Indexed node %s.', $documentIdentifier, $contextPath), LOG_DEBUG, null, 'ElasticSearch (CR)');
259+
};
260+
261+
$handleNode = function (NodeInterface $node, \TYPO3\TYPO3CR\Domain\Service\Context $context) use ($targetWorkspaceName, $indexer) {
262+
$nodeFromContext = $context->getNodeByIdentifier($node->getIdentifier());
263+
if ($nodeFromContext instanceof NodeInterface) {
264+
$indexer($nodeFromContext, $targetWorkspaceName);
265+
} else {
266+
$documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName);
267+
if ($node->isRemoved()) {
268+
$this->removeNode($node, $context->getWorkspaceName());
269+
$this->logger->log(sprintf('NodeIndexer (%s): Removed node with identifier %s, no longer in workspace %s', $documentIdentifier, $node->getIdentifier(), $context->getWorkspaceName()), LOG_DEBUG, null, 'ElasticSearch (CR)');
270+
} else {
271+
$this->logger->log(sprintf('NodeIndexer (%s): Could not index node with identifier %s, not found in workspace %s', $documentIdentifier, $node->getIdentifier(), $context->getWorkspaceName()), LOG_DEBUG, null, 'ElasticSearch (CR)');
272+
}
273+
}
267274
};
268275

276+
$workspaceName = $targetWorkspaceName ?: $node->getContext()->getWorkspaceName();
269277
$dimensionCombinations = $this->contentDimensionCombinator->getAllAllowedCombinations();
270-
$workspaceName = $targetWorkspaceName ?: 'live';
271-
$nodeIdentifier = $node->getIdentifier();
272278
if ($dimensionCombinations !== []) {
273279
foreach ($dimensionCombinations as $combination) {
274-
$context = $this->contextFactory->create(['workspaceName' => $workspaceName, 'dimensions' => $combination]);
275-
$node = $context->getNodeByIdentifier($nodeIdentifier);
276-
if ($node !== null) {
277-
$indexer($node, $targetWorkspaceName);
278-
}
280+
$context = $this->contextFactory->create(['workspaceName' => $workspaceName, 'dimensions' => $combination, 'invisibleContentShown' => true]);
281+
$handleNode($node, $context);
279282
}
280283
} else {
281-
$context = $this->contextFactory->create(['workspaceName' => $workspaceName]);
282-
$node = $context->getNodeByIdentifier($nodeIdentifier);
283-
if ($node !== null) {
284-
$indexer($node, $targetWorkspaceName);
285-
}
284+
$context = $this->contextFactory->create(['workspaceName' => $workspaceName, 'invisibleContentShown' => true]);
285+
$handleNode($node, $context);
286+
}
287+
}
288+
289+
/**
290+
* Returns a stable identifier for the Elasticsearch document representing the node
291+
*
292+
* @param NodeInterface $node
293+
* @param string $targetWorkspaceName
294+
* @return string
295+
*/
296+
protected function calculateDocumentIdentifier(NodeInterface $node, $targetWorkspaceName = null) {
297+
$contextPath = $node->getContextPath();
298+
299+
if ($targetWorkspaceName !== null) {
300+
$contextPath = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $contextPath);
286301
}
302+
303+
return sha1($contextPath);
287304
}
288305

289306
/**
290307
*
291308
*
292309
* @param NodeInterface $node
293310
* @param array $fulltextIndexOfNode
294-
* @param string $targetWorkspaceName
311+
* @param $targetWorkspaceName
295312
* @return void
296313
*/
297-
protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNode, $targetWorkspaceName = null)
314+
protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNode, $targetWorkspaceName)
298315
{
299-
if ((($targetWorkspaceName !== null && $targetWorkspaceName !== 'live') || $node->getWorkspace()->getName() !== 'live') || count($fulltextIndexOfNode) === 0) {
300-
return;
301-
}
302-
303316
$closestFulltextNode = $node;
304317
while (!$this->isFulltextRoot($closestFulltextNode)) {
305318
$closestFulltextNode = $closestFulltextNode->getParent();
306319
if ($closestFulltextNode === null) {
307320
// root of hierarchy, no fulltext root found anymore, abort silently...
308-
$this->logger->log('No fulltext root found for ' . $node->getPath(), LOG_WARNING);
321+
$this->logger->log(sprintf('NodeIndexer: No fulltext root found for node %s (%)', $node->getPath(), $node->getIdentifier()), LOG_WARNING, null, 'ElasticSearch (CR)');
309322

310323
return;
311324
}
312325
}
313326

314-
$closestFulltextNodeContextPath = str_replace($closestFulltextNode->getContext()->getWorkspace()->getName(), 'live', $closestFulltextNode->getContextPath());
315-
$closestFulltextNodeContextPathHash = sha1($closestFulltextNodeContextPath);
327+
$closestFulltextNodeContextPath = $closestFulltextNode->getContextPath();
328+
if ($targetWorkspaceName !== null) {
329+
$closestFulltextNodeContextPath = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $closestFulltextNodeContextPath);
330+
}
331+
$closestFulltextNodeDocumentIdentifier = sha1($closestFulltextNodeContextPath);
332+
333+
if ($closestFulltextNode->isRemoved()) {
334+
// fulltext root is removed, abort silently...
335+
$this->logger->log(sprintf('NodeIndexer (%s): Fulltext root found for %s (%s) not updated, it is removed', $closestFulltextNodeDocumentIdentifier, $node->getPath(), $node->getIdentifier()), LOG_DEBUG, null, 'ElasticSearch (CR)');
336+
return;
337+
}
316338

317339
$this->currentBulkRequest[] = [
318340
[
319341
'update' => [
320342
'_type' => NodeTypeMappingBuilder::convertNodeTypeNameToMappingName($closestFulltextNode->getNodeType()->getName()),
321-
'_id' => $closestFulltextNodeContextPathHash
343+
'_id' => $closestFulltextNodeDocumentIdentifier
322344
]
323345
],
324346
// http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-update.html
@@ -328,7 +350,14 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
328350
if (!ctx._source.containsKey("__fulltextParts")) {
329351
ctx._source.__fulltextParts = new LinkedHashMap();
330352
}
331-
ctx._source.__fulltextParts[identifier] = fulltext;
353+
354+
if (nodeIsRemoved || nodeIsHidden || fulltext.size() == 0) {
355+
if (ctx._source.__fulltextParts.containsKey(identifier)) {
356+
ctx._source.__fulltextParts.remove(identifier);
357+
}
358+
} else {
359+
ctx._source.__fulltextParts[identifier] = fulltext;
360+
}
332361
ctx._source.__fulltext = new LinkedHashMap();
333362
334363
Iterator<LinkedHashMap.Entry<String, LinkedHashMap>> fulltextByNode = ctx._source.__fulltextParts.entrySet().iterator();
@@ -350,6 +379,8 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
350379
',
351380
'params' => [
352381
'identifier' => $node->getIdentifier(),
382+
'nodeIsRemoved' => $node->isRemoved(),
383+
'nodeIsHidden' => $node->isHidden(),
353384
'fulltext' => $fulltextIndexOfNode
354385
],
355386
'upsert' => [
@@ -361,6 +392,8 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
361392
'lang' => 'groovy'
362393
]
363394
];
395+
396+
$this->logger->log(sprintf('NodeIndexer (%s): Updated fulltext index for %s (%s)', $closestFulltextNodeDocumentIdentifier, $closestFulltextNodeContextPath, $closestFulltextNode->getIdentifier()), LOG_WARNING, null, 'ElasticSearch (CR)');
364397
}
365398

366399
/**
@@ -385,33 +418,42 @@ protected function isFulltextRoot(NodeInterface $node)
385418
* Schedule node removal into the current bulk request.
386419
*
387420
* @param NodeInterface $node
421+
* @param string $targetWorkspaceName
388422
* @return string
389423
*/
390-
public function removeNode(NodeInterface $node)
424+
public function removeNode(NodeInterface $node, $targetWorkspaceName = null)
391425
{
392426
if ($this->settings['indexAllWorkspaces'] === false) {
393-
if ($node->getContext()->getWorkspaceName() !== 'live') {
427+
// we are only supposed to index the live workspace.
428+
// We need to check the workspace at two occasions; checking the
429+
// $targetWorkspaceName and the workspace name of the node's context as fallback
430+
if ($targetWorkspaceName !== null && $targetWorkspaceName !== 'live') {
431+
return;
432+
}
433+
434+
if ($targetWorkspaceName === null && $node->getContext()->getWorkspaceName() !== 'live') {
394435
return;
395436
}
396437
}
397438

398-
// TODO: handle deletion from the fulltext index as well
399-
$identifier = sha1($node->getContextPath());
439+
$documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName);
400440

401441
$this->currentBulkRequest[] = [
402442
[
403443
'delete' => [
404444
'_type' => NodeTypeMappingBuilder::convertNodeTypeNameToMappingName($node->getNodeType()),
405-
'_id' => $identifier
445+
'_id' => $documentIdentifier
406446
]
407447
]
408448
];
409449

410-
$this->logger->log(sprintf('NodeIndexer: Removed node %s from index (node actually removed). Persistence ID: %s', $node->getContextPath(), $identifier), LOG_DEBUG, null, 'ElasticSearch (CR)');
450+
$this->updateFulltext($node, [], $targetWorkspaceName);
451+
452+
$this->logger->log(sprintf('NodeIndexer (%s): Removed node %s (%s) from index.', $documentIdentifier, $node->getContextPath(), $node->getIdentifier()), LOG_DEBUG, null, 'ElasticSearch (CR)');
411453
}
412454

413455
/**
414-
* perform the current bulk request
456+
* Perform the current bulk request
415457
*
416458
* @return void
417459
*/
@@ -427,7 +469,7 @@ public function flush()
427469
foreach ($bulkRequestTuple as $bulkRequestItem) {
428470
$itemAsJson = json_encode($bulkRequestItem);
429471
if ($itemAsJson === false) {
430-
$this->logger->log('Indexing Error: Bulk request item could not be encoded as JSON - ' . json_last_error_msg(), LOG_ERR, $bulkRequestItem);
472+
$this->logger->log('NodeIndexer: Bulk request item could not be encoded as JSON - ' . json_last_error_msg(), LOG_ERR, $bulkRequestItem, 'ElasticSearch (CR)');
431473
continue 2;
432474
}
433475
$tupleAsJson .= $itemAsJson . chr(10);
@@ -437,10 +479,10 @@ public function flush()
437479

438480
if ($content !== '') {
439481
$responseAsLines = $this->getIndex()->request('POST', '/_bulk', [], $content)->getOriginalResponse()->getContent();
440-
foreach (explode("\n", $responseAsLines) as $responseLine) {
482+
foreach (explode(chr(10), $responseAsLines) as $responseLine) {
441483
$response = json_decode($responseLine);
442484
if (!is_object($response) || (isset($response->errors) && $response->errors !== false)) {
443-
$this->logger->log('Indexing Error: ' . $responseLine, LOG_ERR);
485+
$this->logger->log('NodeIndexer: ' . $responseLine, LOG_ERR, null, 'ElasticSearch (CR)');
444486
}
445487
}
446488
}

0 commit comments

Comments
 (0)