Skip to content

Commit dbea4f6

Browse files
authored
Merge pull request #2 from kitsunet/support-es20
TASK: Adjust script parts to 2.x and finish deletion
2 parents 4522420 + 9fcab09 commit dbea4f6

2 files changed

Lines changed: 68 additions & 63 deletions

File tree

Classes/Flowpack/ElasticSearch/ContentRepositoryAdaptor/Command/NodeIndexCommandController.php

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
use TYPO3\Flow\Annotations as Flow;
1515
use TYPO3\Flow\Cli\CommandController;
1616
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Mapping\NodeTypeMappingBuilder;
17+
use TYPO3\TYPO3CR\Domain\Model\NodeInterface;
1718
use TYPO3\TYPO3CR\Domain\Service\ContentDimensionCombinator;
1819
use TYPO3\TYPO3CR\Search\Indexer\NodeIndexingManager;
1920

@@ -72,16 +73,6 @@ class NodeIndexCommandController extends CommandController
7273
*/
7374
protected $nodeTypeMappingBuilder;
7475

75-
/**
76-
* @var integer
77-
*/
78-
protected $indexedNodes;
79-
80-
/**
81-
* @var integer
82-
*/
83-
protected $countedIndexedNodes;
84-
8576
/**
8677
* @var integer
8778
*/
@@ -193,22 +184,17 @@ public function buildCommand($limit = null, $update = false, $workspace = null,
193184

194185
$count = 0;
195186
$this->limit = $limit;
196-
$this->indexedNodes = 0;
197-
$this->countedIndexedNodes = 0;
198187

199188
if ($workspace === null && $this->settings['indexAllWorkspaces'] === false) {
200189
$workspace = 'live';
201190
}
202191

203192
if ($workspace === null) {
204193
foreach ($this->workspaceRepository->findAll() as $workspace) {
205-
$this->indexWorkspace($workspace->getName());
206-
207-
$count = $count + $this->countedIndexedNodes;
194+
$count += $this->indexWorkspace($workspace->getName());
208195
}
209196
} else {
210-
$this->indexWorkspace($workspace);
211-
$count = $count + $this->countedIndexedNodes;
197+
$count = $this->indexWorkspace($workspace);
212198
}
213199

214200
$this->nodeIndexingManager->flushQueues();
@@ -250,14 +236,17 @@ public function cleanupCommand()
250236
*/
251237
protected function indexWorkspace($workspaceName)
252238
{
239+
$indexedNodes = 0;
253240
$combinations = $this->contentDimensionCombinator->getAllAllowedCombinations();
254241
if ($combinations === array()) {
255-
$this->indexWorkspaceWithDimensions($workspaceName);
242+
$indexedNodes += $this->indexWorkspaceWithDimensions($workspaceName);
256243
} else {
257244
foreach ($combinations as $combination) {
258-
$this->indexWorkspaceWithDimensions($workspaceName, $combination);
245+
$indexedNodes += $this->indexWorkspaceWithDimensions($workspaceName, $combination);
259246
}
260247
}
248+
249+
return $indexedNodes;
261250
}
262251

263252
/**
@@ -267,36 +256,39 @@ protected function indexWorkspace($workspaceName)
267256
*/
268257
protected function indexWorkspaceWithDimensions($workspaceName, array $dimensions = array())
269258
{
259+
$indexedNodes = 0;
270260
$context = $this->contextFactory->create(array('workspaceName' => $workspaceName, 'dimensions' => $dimensions));
271261
$rootNode = $context->getRootNode();
272262

273-
$this->traverseNodes($rootNode);
263+
$indexedNodes += $this->traverseNodes($rootNode);
274264

275265
if ($dimensions === array()) {
276-
$this->outputLine('Workspace "' . $workspaceName . '" without dimensions done. (Indexed ' . $this->indexedNodes . ' nodes)');
266+
$this->outputLine('Workspace "' . $workspaceName . '" without dimensions done. (Indexed ' . $indexedNodes . ' nodes)');
277267
} else {
278-
$this->outputLine('Workspace "' . $workspaceName . '" and dimensions "' . json_encode($dimensions) . '" done. (Indexed ' . $this->indexedNodes . ' nodes)');
268+
$this->outputLine('Workspace "' . $workspaceName . '" and dimensions "' . json_encode($dimensions) . '" done. (Indexed ' . $indexedNodes . ' nodes)');
279269
}
280270

281-
$this->countedIndexedNodes = $this->countedIndexedNodes + $this->indexedNodes;
282-
$this->indexedNodes = 0;
271+
return $indexedNodes;
283272
}
284273

285274
/**
286-
* @param \TYPO3\TYPO3CR\Domain\Model\NodeInterface $currentNode
287-
* @return void
275+
* @param NodeInterface $currentNode
276+
* @param integer $traversedUntilNow
277+
* @return integer Indexed nodes in this traversal
288278
*/
289-
protected function traverseNodes(\TYPO3\TYPO3CR\Domain\Model\NodeInterface $currentNode)
279+
protected function traverseNodes(NodeInterface $currentNode, $traversedUntilNow = 0)
290280
{
291-
if ($this->limit !== null && $this->indexedNodes > $this->limit) {
292-
return;
281+
if ($this->limit !== null && $traversedUntilNow > $this->limit) {
282+
return $traversedUntilNow;
293283
}
294284

295285
$this->nodeIndexingManager->indexNode($currentNode);
296-
$this->indexedNodes++;
286+
$traversedUntilNow++;
297287

298288
foreach ($currentNode->getChildNodes() as $childNode) {
299-
$this->traverseNodes($childNode);
289+
$traversedUntilNow = $this->traverseNodes($childNode, $traversedUntilNow);
300290
}
291+
292+
return $traversedUntilNow;
301293
}
302294
}

Classes/Flowpack/ElasticSearch/ContentRepositoryAdaptor/Indexer/NodeIndexer.php

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Exception;
1515
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Mapping\NodeTypeMappingBuilder;
16-
use Flowpack\ElasticSearch\Domain\Model\Client;
1716
use Flowpack\ElasticSearch\Domain\Model\Document as ElasticSearchDocument;
1817
use Flowpack\ElasticSearch\Domain\Model\Index;
1918
use TYPO3\Flow\Annotations as Flow;
@@ -259,16 +258,18 @@ protected function appendToBulkRequest(NodeInterface $node, ElasticSearchDocumen
259258
[
260259
'update' => [
261260
'_type' => $document->getType()->getName(),
262-
'_id' => $document->getId()
261+
'_id' => $document->getId(),
262+
'_index' => $this->getIndexName(),
263+
'_retry_on_conflict' => 3
263264
]
264265
],
265266
// http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-update.html
266267
[
267268
'script' => [
268269
'inline' => '
269-
fulltext = (ctx._source.containsKey("__fulltext") ? ctx._source.__fulltext : new LinkedHashMap());
270-
fulltextParts = (ctx._source.containsKey("__fulltextParts") ? ctx._source.__fulltextParts : new LinkedHashMap());
271-
ctx._source = newData;
270+
fulltext = (ctx._source.containsKey("__fulltext") ? ctx._source.__fulltext : new HashMap());
271+
fulltextParts = (ctx._source.containsKey("__fulltextParts") ? ctx._source.__fulltextParts : new HashMap());
272+
_source = newData;
272273
ctx._source.__fulltext = fulltext;
273274
ctx._source.__fulltextParts = fulltextParts
274275
',
@@ -308,7 +309,8 @@ protected function removeDuplicateDocuments($contextPath, $contextPathHash, Node
308309
{
309310
$type = NodeTypeMappingBuilder::convertNodeTypeNameToMappingName($node->getNodeType()->getName());
310311
$this->logger->log(sprintf('NodeIndexer: Check duplicate nodes for %s (%s). ContentContextHash: %s', $contextPath, $type, $contextPathHash), LOG_DEBUG, null, 'ElasticSearch (CR)');
311-
$result = $this->getIndex()->request('GET', '/_search?scroll=1m&search_type=scan', [], json_encode([
312+
$result = $this->getIndex()->request('GET', '/_search?scroll=1m', [], json_encode([
313+
'sort' => ['_doc'],
312314
'query' => [
313315
'bool' => [
314316
'must' => [
@@ -327,14 +329,33 @@ protected function removeDuplicateDocuments($contextPath, $contextPathHash, Node
327329
$treatedContent = $result->getTreatedContent();
328330
$scrollId = $treatedContent['_scroll_id'];
329331

330-
$result = $this->getIndex()->request('GET', '/_search/scroll?scroll=1m', [], $scrollId, false);
331-
$treatedContent = $result->getTreatedContent();
332+
333+
$mapHitToDeleteRequest = function ($hit) {
334+
$bulkRequest[] = json_encode([
335+
'delete' => [
336+
'_type' => $hit['_type'],
337+
'_id' => $hit['_id']
338+
]
339+
]);
340+
};
341+
342+
$bulkRequest = [];
332343
while (isset($treatedContent['hits']['hits']) && $treatedContent['hits']['hits'] !== []) {
333344
$hits = $treatedContent['hits']['hits'];
334-
$this->logger->log(sprintf('NodeIndexer: Check duplicate nodes for %s (%s), found %d document(s). ContentContextHash: %s', $contextPath, $type, count($hits), $contextPathHash), LOG_DEBUG, null, 'ElasticSearch (CR)');
345+
$bulkRequest = array_merge($bulkRequest, array_map($mapHitToDeleteRequest, $hits));
335346
$result = $this->getIndex()->request('GET', '/_search/scroll?scroll=1m', [], $scrollId, false);
336347
$treatedContent = $result->getTreatedContent();
337348
}
349+
350+
$this->logger->log(sprintf('NodeIndexer: Check duplicate nodes for %s (%s), found %d document(s). ContentContextHash: %s', $contextPath, $type, count($bulkRequest), $contextPathHash), LOG_DEBUG, null, 'ElasticSearch (CR)');
351+
if ($bulkRequest !== []) {
352+
$this->getIndex()->request('POST', '/_bulk', [], implode("\n", $bulkRequest));
353+
}
354+
$this->searchClient->request('DELETE', '/_search/scroll', [], json_encode([
355+
'scroll_id' => [
356+
$scrollId
357+
]
358+
]));
338359
}
339360

340361
/**
@@ -376,29 +397,21 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
376397
// first, update the __fulltextParts, then re-generate the __fulltext from all __fulltextParts
377398
'script' => [
378399
'inline' => '
379-
if (!ctx._source.containsKey("__fulltextParts")) {
380-
ctx._source.__fulltextParts = new LinkedHashMap();
381-
}
382-
ctx._source.__fulltextParts[identifier] = fulltext;
383-
ctx._source.__fulltext = new LinkedHashMap();
384-
385-
Iterator<LinkedHashMap.Entry<String, LinkedHashMap>> fulltextByNode = ctx._source.__fulltextParts.entrySet().iterator();
386-
for (fulltextByNode; fulltextByNode.hasNext();) {
387-
Iterator<LinkedHashMap.Entry<String, String>> elementIterator = fulltextByNode.next().getValue().entrySet().iterator();
388-
for (elementIterator; elementIterator.hasNext();) {
389-
Map.Entry<String, String> element = elementIterator.next();
390-
String value;
391-
392-
if (ctx._source.__fulltext.containsKey(element.key)) {
393-
value = ctx._source.__fulltext[element.key] + " " + element.value.trim();
394-
} else {
395-
value = element.value.trim();
396-
}
397-
398-
ctx._source.__fulltext[element.key] = value;
399-
}
400-
}
401-
',
400+
ctx._source.__fulltext = new HashMap();
401+
if (!ctx._source.containsKey("__fulltextParts")) {
402+
ctx._source.__fulltextParts = new HashMap();
403+
}
404+
ctx._source.__fulltextParts[identifier] = fulltext;
405+
ctx._source.__fulltextParts.each { originNodeIdentifier, partContent -> partContent.each { bucketKey, content ->
406+
if (ctx._source.__fulltext.containsKey(bucketKey)) {
407+
value = ctx._source.__fulltext[bucketKey] + " " + content.trim();
408+
} else {
409+
value = content.trim();
410+
}
411+
ctx._source.__fulltext[bucketKey] = value;
412+
}
413+
}
414+
',
402415
'params' => [
403416
'identifier' => $node->getIdentifier(),
404417
'fulltext' => $fulltextIndexOfNode

0 commit comments

Comments
 (0)