@@ -134,7 +134,7 @@ public function getIndex()
134134 }
135135
136136 /**
137- * index this node, and add it to the current bulk request.
137+ * Index this node, and add it to the current bulk request.
138138 *
139139 * @param NodeInterface $node
140140 * @param string $targetWorkspaceName In case this is triggered during publishing, a workspace name will be passed in
@@ -163,20 +163,20 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
163163 $ contextPath = str_replace ($ node ->getContext ()->getWorkspace ()->getName (), $ targetWorkspaceName , $ contextPath );
164164 }
165165
166- $ contextPathHash = sha1 ( $ contextPath );
166+ $ documentIdentifier = $ this -> calculateDocumentIdentifier ( $ node , $ targetWorkspaceName );
167167 $ nodeType = $ node ->getNodeType ();
168168
169169 $ mappingType = $ this ->getIndex ()->findType (NodeTypeMappingBuilder::convertNodeTypeNameToMappingName ($ nodeType ));
170170
171171 if ($ this ->bulkProcessing === false ) {
172- // Remove document with the same contextPathHash but different NodeType, required after NodeType change
173- $ this ->logger ->log (sprintf ('NodeIndexer: Search and remove duplicate document if needed. ID: %s ' , $ contextPath , $ node ->getNodeType ()-> getName (), $ contextPathHash ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
172+ // Remove document with the same documentIdentifier but different NodeType, required after NodeType change
173+ $ this ->logger ->log (sprintf ('NodeIndexer (%s) : Search and remove duplicate document for node %s (%s) if needed. ' , $ documentIdentifier , $ contextPath , $ node ->getIdentifier () ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
174174 $ this ->getIndex ()->request ('DELETE ' , '/_query ' , [], json_encode ([
175175 'query ' => [
176176 'bool ' => [
177177 'must ' => [
178178 'ids ' => [
179- 'values ' => [$ contextPathHash ]
179+ 'values ' => [$ documentIdentifier ]
180180 ]
181181 ],
182182 'must_not ' => [
@@ -189,23 +189,15 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
189189 ]));
190190 }
191191
192- if ($ node ->isRemoved ()) {
193- // TODO: handle deletion from the fulltext index as well
194- $ mappingType ->deleteDocumentById ($ contextPathHash );
195- $ this ->logger ->log (sprintf ('NodeIndexer: Removed node %s from index (node flagged as removed). ID: %s ' , $ contextPath , $ contextPathHash ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
196-
197- return ;
198- }
199-
200192 $ logger = $ this ->logger ;
201193 $ fulltextIndexOfNode = [];
202- $ nodePropertiesToBeStoredInIndex = $ this ->extractPropertiesAndFulltext ($ node , $ fulltextIndexOfNode , function ($ propertyName ) use ($ logger , $ contextPathHash ) {
203- $ logger ->log (sprintf ('NodeIndexer (%s) - Property "%s" not indexed because no configuration found. ' , $ contextPathHash , $ propertyName ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
194+ $ nodePropertiesToBeStoredInIndex = $ this ->extractPropertiesAndFulltext ($ node , $ fulltextIndexOfNode , function ($ propertyName ) use ($ logger , $ documentIdentifier , $ node ) {
195+ $ logger ->log (sprintf ('NodeIndexer (%s) - Property "%s" not indexed because no configuration found, node type %s . ' , $ documentIdentifier , $ propertyName, $ node -> getNodeType ()-> getName () ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
204196 });
205197
206198 $ document = new ElasticSearchDocument ($ mappingType ,
207199 $ nodePropertiesToBeStoredInIndex ,
208- $ contextPathHash
200+ $ documentIdentifier
209201 );
210202
211203 $ documentData = $ document ->getData ();
@@ -263,62 +255,92 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
263255 $ this ->updateFulltext ($ node , $ fulltextIndexOfNode , $ targetWorkspaceName );
264256 }
265257
266- $ this ->logger ->log (sprintf ('NodeIndexer: Added / updated node %s. ID: %s Context: %s ' , $ contextPath , $ contextPathHash , json_encode ($ node ->getContext ()->getProperties ())), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
258+ $ this ->logger ->log (sprintf ('NodeIndexer (%s): Indexed node %s. ' , $ documentIdentifier , $ contextPath ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
259+ };
260+
261+ $ handleNode = function (NodeInterface $ node , \TYPO3 \TYPO3CR \Domain \Service \Context $ context ) use ($ targetWorkspaceName , $ indexer ) {
262+ $ nodeFromContext = $ context ->getNodeByIdentifier ($ node ->getIdentifier ());
263+ if ($ nodeFromContext instanceof NodeInterface) {
264+ $ indexer ($ nodeFromContext , $ targetWorkspaceName );
265+ } else {
266+ $ documentIdentifier = $ this ->calculateDocumentIdentifier ($ node , $ targetWorkspaceName );
267+ if ($ node ->isRemoved ()) {
268+ $ this ->removeNode ($ node , $ context ->getWorkspaceName ());
269+ $ this ->logger ->log (sprintf ('NodeIndexer (%s): Removed node with identifier %s, no longer in workspace %s ' , $ documentIdentifier , $ node ->getIdentifier (), $ context ->getWorkspaceName ()), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
270+ } else {
271+ $ this ->logger ->log (sprintf ('NodeIndexer (%s): Could not index node with identifier %s, not found in workspace %s ' , $ documentIdentifier , $ node ->getIdentifier (), $ context ->getWorkspaceName ()), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
272+ }
273+ }
267274 };
268275
276+ $ workspaceName = $ targetWorkspaceName ?: $ node ->getContext ()->getWorkspaceName ();
269277 $ dimensionCombinations = $ this ->contentDimensionCombinator ->getAllAllowedCombinations ();
270- $ workspaceName = $ targetWorkspaceName ?: 'live ' ;
271- $ nodeIdentifier = $ node ->getIdentifier ();
272278 if ($ dimensionCombinations !== []) {
273279 foreach ($ dimensionCombinations as $ combination ) {
274- $ context = $ this ->contextFactory ->create (['workspaceName ' => $ workspaceName , 'dimensions ' => $ combination ]);
275- $ node = $ context ->getNodeByIdentifier ($ nodeIdentifier );
276- if ($ node !== null ) {
277- $ indexer ($ node , $ targetWorkspaceName );
278- }
280+ $ context = $ this ->contextFactory ->create (['workspaceName ' => $ workspaceName , 'dimensions ' => $ combination , 'invisibleContentShown ' => true ]);
281+ $ handleNode ($ node , $ context );
279282 }
280283 } else {
281- $ context = $ this ->contextFactory ->create (['workspaceName ' => $ workspaceName ]);
282- $ node = $ context ->getNodeByIdentifier ($ nodeIdentifier );
283- if ($ node !== null ) {
284- $ indexer ($ node , $ targetWorkspaceName );
285- }
284+ $ context = $ this ->contextFactory ->create (['workspaceName ' => $ workspaceName , 'invisibleContentShown ' => true ]);
285+ $ handleNode ($ node , $ context );
286+ }
287+ }
288+
289+ /**
290+ * Returns a stable identifier for the Elasticsearch document representing the node
291+ *
292+ * @param NodeInterface $node
293+ * @param string $targetWorkspaceName
294+ * @return string
295+ */
296+ protected function calculateDocumentIdentifier (NodeInterface $ node , $ targetWorkspaceName = null ) {
297+ $ contextPath = $ node ->getContextPath ();
298+
299+ if ($ targetWorkspaceName !== null ) {
300+ $ contextPath = str_replace ($ node ->getContext ()->getWorkspace ()->getName (), $ targetWorkspaceName , $ contextPath );
286301 }
302+
303+ return sha1 ($ contextPath );
287304 }
288305
289306 /**
290307 *
291308 *
292309 * @param NodeInterface $node
293310 * @param array $fulltextIndexOfNode
294- * @param string $targetWorkspaceName
311+ * @param $targetWorkspaceName
295312 * @return void
296313 */
297- protected function updateFulltext (NodeInterface $ node , array $ fulltextIndexOfNode , $ targetWorkspaceName = null )
314+ protected function updateFulltext (NodeInterface $ node , array $ fulltextIndexOfNode , $ targetWorkspaceName )
298315 {
299- if ((($ targetWorkspaceName !== null && $ targetWorkspaceName !== 'live ' ) || $ node ->getWorkspace ()->getName () !== 'live ' ) || count ($ fulltextIndexOfNode ) === 0 ) {
300- return ;
301- }
302-
303316 $ closestFulltextNode = $ node ;
304317 while (!$ this ->isFulltextRoot ($ closestFulltextNode )) {
305318 $ closestFulltextNode = $ closestFulltextNode ->getParent ();
306319 if ($ closestFulltextNode === null ) {
307320 // root of hierarchy, no fulltext root found anymore, abort silently...
308- $ this ->logger ->log (' No fulltext root found for ' . $ node ->getPath (), LOG_WARNING );
321+ $ this ->logger ->log (sprintf ( ' NodeIndexer: No fulltext root found for node %s (%) ' , $ node ->getPath (), $ node -> getIdentifier ()), LOG_WARNING , null , ' ElasticSearch (CR) ' );
309322
310323 return ;
311324 }
312325 }
313326
314- $ closestFulltextNodeContextPath = str_replace ($ closestFulltextNode ->getContext ()->getWorkspace ()->getName (), 'live ' , $ closestFulltextNode ->getContextPath ());
315- $ closestFulltextNodeContextPathHash = sha1 ($ closestFulltextNodeContextPath );
327+ $ closestFulltextNodeContextPath = $ closestFulltextNode ->getContextPath ();
328+ if ($ targetWorkspaceName !== null ) {
329+ $ closestFulltextNodeContextPath = str_replace ($ node ->getContext ()->getWorkspace ()->getName (), $ targetWorkspaceName , $ closestFulltextNodeContextPath );
330+ }
331+ $ closestFulltextNodeDocumentIdentifier = sha1 ($ closestFulltextNodeContextPath );
332+
333+ if ($ closestFulltextNode ->isRemoved ()) {
334+ // fulltext root is removed, abort silently...
335+ $ this ->logger ->log (sprintf ('NodeIndexer (%s): Fulltext root found for %s (%s) not updated, it is removed ' , $ closestFulltextNodeDocumentIdentifier , $ node ->getPath (), $ node ->getIdentifier ()), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
336+ return ;
337+ }
316338
317339 $ this ->currentBulkRequest [] = [
318340 [
319341 'update ' => [
320342 '_type ' => NodeTypeMappingBuilder::convertNodeTypeNameToMappingName ($ closestFulltextNode ->getNodeType ()->getName ()),
321- '_id ' => $ closestFulltextNodeContextPathHash
343+ '_id ' => $ closestFulltextNodeDocumentIdentifier
322344 ]
323345 ],
324346 // http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-update.html
@@ -328,7 +350,14 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
328350 if (!ctx._source.containsKey("__fulltextParts")) {
329351 ctx._source.__fulltextParts = new LinkedHashMap();
330352 }
331- ctx._source.__fulltextParts[identifier] = fulltext;
353+
354+ if (nodeIsRemoved || nodeIsHidden || fulltext.size() == 0) {
355+ if (ctx._source.__fulltextParts.containsKey(identifier)) {
356+ ctx._source.__fulltextParts.remove(identifier);
357+ }
358+ } else {
359+ ctx._source.__fulltextParts[identifier] = fulltext;
360+ }
332361 ctx._source.__fulltext = new LinkedHashMap();
333362
334363 Iterator<LinkedHashMap.Entry<String, LinkedHashMap>> fulltextByNode = ctx._source.__fulltextParts.entrySet().iterator();
@@ -350,6 +379,8 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
350379 ' ,
351380 'params ' => [
352381 'identifier ' => $ node ->getIdentifier (),
382+ 'nodeIsRemoved ' => $ node ->isRemoved (),
383+ 'nodeIsHidden ' => $ node ->isHidden (),
353384 'fulltext ' => $ fulltextIndexOfNode
354385 ],
355386 'upsert ' => [
@@ -361,6 +392,8 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
361392 'lang ' => 'groovy '
362393 ]
363394 ];
395+
396+ $ this ->logger ->log (sprintf ('NodeIndexer (%s): Updated fulltext index for %s (%s) ' , $ closestFulltextNodeDocumentIdentifier , $ closestFulltextNodeContextPath , $ closestFulltextNode ->getIdentifier ()), LOG_WARNING , null , 'ElasticSearch (CR) ' );
364397 }
365398
366399 /**
@@ -385,33 +418,42 @@ protected function isFulltextRoot(NodeInterface $node)
385418 * Schedule node removal into the current bulk request.
386419 *
387420 * @param NodeInterface $node
421+ * @param string $targetWorkspaceName
388422 * @return string
389423 */
390- public function removeNode (NodeInterface $ node )
424+ public function removeNode (NodeInterface $ node, $ targetWorkspaceName = null )
391425 {
392426 if ($ this ->settings ['indexAllWorkspaces ' ] === false ) {
393- if ($ node ->getContext ()->getWorkspaceName () !== 'live ' ) {
427+ // we are only supposed to index the live workspace.
428+ // We need to check the workspace at two occasions; checking the
429+ // $targetWorkspaceName and the workspace name of the node's context as fallback
430+ if ($ targetWorkspaceName !== null && $ targetWorkspaceName !== 'live ' ) {
431+ return ;
432+ }
433+
434+ if ($ targetWorkspaceName === null && $ node ->getContext ()->getWorkspaceName () !== 'live ' ) {
394435 return ;
395436 }
396437 }
397438
398- // TODO: handle deletion from the fulltext index as well
399- $ identifier = sha1 ($ node ->getContextPath ());
439+ $ documentIdentifier = $ this ->calculateDocumentIdentifier ($ node , $ targetWorkspaceName );
400440
401441 $ this ->currentBulkRequest [] = [
402442 [
403443 'delete ' => [
404444 '_type ' => NodeTypeMappingBuilder::convertNodeTypeNameToMappingName ($ node ->getNodeType ()),
405- '_id ' => $ identifier
445+ '_id ' => $ documentIdentifier
406446 ]
407447 ]
408448 ];
409449
410- $ this ->logger ->log (sprintf ('NodeIndexer: Removed node %s from index (node actually removed). Persistence ID: %s ' , $ node ->getContextPath (), $ identifier ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
450+ $ this ->updateFulltext ($ node , [], $ targetWorkspaceName );
451+
452+ $ this ->logger ->log (sprintf ('NodeIndexer (%s): Removed node %s (%s) from index. ' , $ documentIdentifier , $ node ->getContextPath (), $ node ->getIdentifier ()), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
411453 }
412454
413455 /**
414- * perform the current bulk request
456+ * Perform the current bulk request
415457 *
416458 * @return void
417459 */
@@ -427,7 +469,7 @@ public function flush()
427469 foreach ($ bulkRequestTuple as $ bulkRequestItem ) {
428470 $ itemAsJson = json_encode ($ bulkRequestItem );
429471 if ($ itemAsJson === false ) {
430- $ this ->logger ->log ('Indexing Error : Bulk request item could not be encoded as JSON - ' . json_last_error_msg (), LOG_ERR , $ bulkRequestItem );
472+ $ this ->logger ->log ('NodeIndexer : Bulk request item could not be encoded as JSON - ' . json_last_error_msg (), LOG_ERR , $ bulkRequestItem, ' ElasticSearch (CR) ' );
431473 continue 2 ;
432474 }
433475 $ tupleAsJson .= $ itemAsJson . chr (10 );
@@ -437,10 +479,10 @@ public function flush()
437479
438480 if ($ content !== '' ) {
439481 $ responseAsLines = $ this ->getIndex ()->request ('POST ' , '/_bulk ' , [], $ content )->getOriginalResponse ()->getContent ();
440- foreach (explode ("\n" , $ responseAsLines ) as $ responseLine ) {
482+ foreach (explode (chr ( 10 ) , $ responseAsLines ) as $ responseLine ) {
441483 $ response = json_decode ($ responseLine );
442484 if (!is_object ($ response ) || (isset ($ response ->errors ) && $ response ->errors !== false )) {
443- $ this ->logger ->log ('Indexing Error : ' . $ responseLine , LOG_ERR );
485+ $ this ->logger ->log ('NodeIndexer : ' . $ responseLine , LOG_ERR , null , ' ElasticSearch (CR) ' );
444486 }
445487 }
446488 }
0 commit comments