@@ -134,7 +134,7 @@ public function getIndex()
134134 }
135135
136136 /**
137- * index this node, and add it to the current bulk request.
137+ * Index this node, and add it to the current bulk request.
138138 *
139139 * @param NodeInterface $node
140140 * @param string $targetWorkspaceName In case this is triggered during publishing, a workspace name will be passed in
@@ -163,20 +163,20 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
163163 $ contextPath = str_replace ($ node ->getContext ()->getWorkspace ()->getName (), $ targetWorkspaceName , $ contextPath );
164164 }
165165
166- $ contextPathHash = sha1 ( $ contextPath );
166+ $ documentIdentifier = $ this -> calculateDocumentIdentifier ( $ node , $ targetWorkspaceName );
167167 $ nodeType = $ node ->getNodeType ();
168168
169169 $ mappingType = $ this ->getIndex ()->findType (NodeTypeMappingBuilder::convertNodeTypeNameToMappingName ($ nodeType ));
170170
171171 if ($ this ->bulkProcessing === false ) {
172- // Remove document with the same contextPathHash but different NodeType, required after NodeType change
173- $ this ->logger ->log (sprintf ('NodeIndexer: Search and remove duplicate document if needed. ID: %s ' , $ contextPath , $ node ->getNodeType ()-> getName (), $ contextPathHash ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
172+ // Remove document with the same documentIdentifier but different NodeType, required after NodeType change
173+ $ this ->logger ->log (sprintf ('NodeIndexer (%s) : Search and remove duplicate document for node %s (%s) if needed. ' , $ documentIdentifier , $ contextPath , $ node ->getIdentifier () ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
174174 $ this ->getIndex ()->request ('DELETE ' , '/_query ' , [], json_encode ([
175175 'query ' => [
176176 'bool ' => [
177177 'must ' => [
178178 'ids ' => [
179- 'values ' => [$ contextPathHash ]
179+ 'values ' => [$ documentIdentifier ]
180180 ]
181181 ],
182182 'must_not ' => [
@@ -199,13 +199,13 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
199199
200200 $ logger = $ this ->logger ;
201201 $ fulltextIndexOfNode = [];
202- $ nodePropertiesToBeStoredInIndex = $ this ->extractPropertiesAndFulltext ($ node , $ fulltextIndexOfNode , function ($ propertyName ) use ($ logger , $ contextPathHash ) {
203- $ logger ->log (sprintf ('NodeIndexer (%s) - Property "%s" not indexed because no configuration found. ' , $ contextPathHash , $ propertyName ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
202+ $ nodePropertiesToBeStoredInIndex = $ this ->extractPropertiesAndFulltext ($ node , $ fulltextIndexOfNode , function ($ propertyName ) use ($ logger , $ documentIdentifier , $ node ) {
203+ $ logger ->log (sprintf ('NodeIndexer (%s) - Property "%s" not indexed because no configuration found, node type %s . ' , $ documentIdentifier , $ propertyName, $ node -> getNodeType ()-> getName () ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
204204 });
205205
206206 $ document = new ElasticSearchDocument ($ mappingType ,
207207 $ nodePropertiesToBeStoredInIndex ,
208- $ contextPathHash
208+ $ documentIdentifier
209209 );
210210
211211 $ documentData = $ document ->getData ();
@@ -263,7 +263,7 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
263263 $ this ->updateFulltext ($ node , $ fulltextIndexOfNode , $ targetWorkspaceName );
264264 }
265265
266- $ this ->logger ->log (sprintf ('NodeIndexer: Added / updated node %s. ID: %s Context: %s ' , $ contextPath , $ contextPathHash , json_encode ( $ node -> getContext ()-> getProperties ()) ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
266+ $ this ->logger ->log (sprintf ('NodeIndexer (%s) : Added / updated node %s. ' , $ documentIdentifier , $ contextPath ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
267267 };
268268
269269 $ dimensionCombinations = $ this ->contentDimensionCombinator ->getAllAllowedCombinations ();
@@ -286,6 +286,23 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null)
286286 }
287287 }
288288
289+ /**
290+ * Returns a stable identifier for the Elasticsearch document representing the node
291+ *
292+ * @param NodeInterface $node
293+ * @param string $targetWorkspaceName
294+ * @return string
295+ */
296+ protected function calculateDocumentIdentifier (NodeInterface $ node , $ targetWorkspaceName = null ) {
297+ $ contextPath = $ node ->getContextPath ();
298+
299+ if ($ targetWorkspaceName !== null ) {
300+ $ contextPath = str_replace ($ node ->getContext ()->getWorkspace ()->getName (), $ targetWorkspaceName , $ contextPath );
301+ }
302+
303+ return sha1 ($ contextPath );
304+ }
305+
289306 /**
290307 *
291308 *
@@ -305,20 +322,20 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
305322 $ closestFulltextNode = $ closestFulltextNode ->getParent ();
306323 if ($ closestFulltextNode === null ) {
307324 // root of hierarchy, no fulltext root found anymore, abort silently...
308- $ this ->logger ->log ('No fulltext root found for ' . $ node ->getPath (), LOG_WARNING );
325+ $ this ->logger ->log ('NodeIndexer: No fulltext root found for ' . $ node ->getPath (), LOG_WARNING );
309326
310327 return ;
311328 }
312329 }
313330
314331 $ closestFulltextNodeContextPath = str_replace ($ closestFulltextNode ->getContext ()->getWorkspace ()->getName (), 'live ' , $ closestFulltextNode ->getContextPath ());
315- $ closestFulltextNodeContextPathHash = sha1 ($ closestFulltextNodeContextPath );
332+ $ closestFulltextNodeDocumentIdentifier = sha1 ($ closestFulltextNodeContextPath );
316333
317334 $ this ->currentBulkRequest [] = [
318335 [
319336 'update ' => [
320337 '_type ' => NodeTypeMappingBuilder::convertNodeTypeNameToMappingName ($ closestFulltextNode ->getNodeType ()->getName ()),
321- '_id ' => $ closestFulltextNodeContextPathHash
338+ '_id ' => $ closestFulltextNodeDocumentIdentifier
322339 ]
323340 ],
324341 // http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-update.html
@@ -361,6 +378,8 @@ protected function updateFulltext(NodeInterface $node, array $fulltextIndexOfNod
361378 'lang ' => 'groovy '
362379 ]
363380 ];
381+
382+ $ this ->logger ->log (sprintf ('NodeIndexer (%s): Updated fulltext index for %s (%s) ' , $ closestFulltextNodeDocumentIdentifier , $ closestFulltextNodeContextPath , $ closestFulltextNode ->getIdentifier ()), LOG_WARNING );
364383 }
365384
366385 /**
@@ -396,22 +415,22 @@ public function removeNode(NodeInterface $node)
396415 }
397416
398417 // TODO: handle deletion from the fulltext index as well
399- $ identifier = sha1 ( $ node -> getContextPath () );
418+ $ documentIdentifier = $ this -> calculateDocumentIdentifier ( $ node );
400419
401420 $ this ->currentBulkRequest [] = [
402421 [
403422 'delete ' => [
404423 '_type ' => NodeTypeMappingBuilder::convertNodeTypeNameToMappingName ($ node ->getNodeType ()),
405- '_id ' => $ identifier
424+ '_id ' => $ documentIdentifier
406425 ]
407426 ]
408427 ];
409428
410- $ this ->logger ->log (sprintf ('NodeIndexer: Removed node %s from index (node actually removed). Persistence ID: %s ' , $ node ->getContextPath (), $ identifier ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
429+ $ this ->logger ->log (sprintf ('NodeIndexer (%s) : Removed node %s (%s) from index. ' , $ documentIdentifier , $ node ->getContextPath (), $ node -> getIdentifier () ), LOG_DEBUG , null , 'ElasticSearch (CR) ' );
411430 }
412431
413432 /**
414- * perform the current bulk request
433+ * Perform the current bulk request
415434 *
416435 * @return void
417436 */
@@ -427,7 +446,7 @@ public function flush()
427446 foreach ($ bulkRequestTuple as $ bulkRequestItem ) {
428447 $ itemAsJson = json_encode ($ bulkRequestItem );
429448 if ($ itemAsJson === false ) {
430- $ this ->logger ->log ('Indexing Error: Bulk request item could not be encoded as JSON - ' . json_last_error_msg (), LOG_ERR , $ bulkRequestItem );
449+ $ this ->logger ->log ('Indexing Error: Bulk request item could not be encoded as JSON - ' . json_last_error_msg (), LOG_ERR , $ bulkRequestItem, ' ElasticSearch (CR) ' );
431450 continue 2 ;
432451 }
433452 $ tupleAsJson .= $ itemAsJson . chr (10 );
@@ -437,10 +456,10 @@ public function flush()
437456
438457 if ($ content !== '' ) {
439458 $ responseAsLines = $ this ->getIndex ()->request ('POST ' , '/_bulk ' , [], $ content )->getOriginalResponse ()->getContent ();
440- foreach (explode ("\n" , $ responseAsLines ) as $ responseLine ) {
459+ foreach (explode (chr ( 10 ) , $ responseAsLines ) as $ responseLine ) {
441460 $ response = json_decode ($ responseLine );
442461 if (!is_object ($ response ) || (isset ($ response ->errors ) && $ response ->errors !== false )) {
443- $ this ->logger ->log ('Indexing Error: ' . $ responseLine , LOG_ERR );
462+ $ this ->logger ->log ('Indexing Error: ' . $ responseLine , LOG_ERR , null , ' ElasticSearch (CR) ' );
444463 }
445464 }
446465 }
0 commit comments