Skip to content

Commit 6d271b9

Browse files
committed
FEATURE: Implement asset indexing using attachment-ingest plugin
1 parent 2f80e3c commit 6d271b9

3 files changed

Lines changed: 273 additions & 0 deletions

File tree

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace Flowpack\ElasticSearch\ContentRepositoryAdaptor\AssetExtraction;
5+
6+
/*
7+
* This file is part of the Flowpack.ElasticSearch.ContentRepositoryAdaptor package.
8+
*
9+
* (c) Contributors of the Neos Project - www.neos.io
10+
*
11+
* This package is Open Source Software. For the full copyright and license
12+
* information, please view the LICENSE file which was distributed with this
13+
* source code.
14+
*/
15+
16+
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Dto\AssetContent;
17+
use Neos\Media\Domain\Model\AssetInterface;
18+
19+
interface AssetExtractorInterface
20+
{
21+
/**
22+
* @param AssetInterface $asset
23+
* @return AssetContent
24+
*/
25+
public function extract(AssetInterface $asset): AssetContent;
26+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace Flowpack\ElasticSearch\ContentRepositoryAdaptor\AssetExtraction;
5+
6+
/*
7+
* This file is part of the Flowpack.ElasticSearch.ContentRepositoryAdaptor package.
8+
*
9+
* (c) Contributors of the Neos Project - www.neos.io
10+
*
11+
* This package is Open Source Software. For the full copyright and license
12+
* information, please view the LICENSE file which was distributed with this
13+
* source code.
14+
*/
15+
16+
use Neos\Flow\Annotations as FLow;
17+
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Dto\AssetContent;
18+
use Flowpack\ElasticSearch\ContentRepositoryAdaptor\ElasticSearchClient;
19+
use Neos\Flow\Log\Utility\LogEnvironment;
20+
use Neos\Media\Domain\Model\AssetInterface;
21+
use Neos\Utility\Arrays;
22+
use Psr\Log\LoggerInterface;
23+
24+
/**
25+
* @Flow\Scope("singleton")
26+
*/
27+
class IngestAttachmentAssetExtractor implements AssetExtractorInterface
28+
{
29+
30+
/**
31+
* @Flow\Inject
32+
* @var ElasticSearchClient
33+
*/
34+
protected $elasticsearchClient;
35+
36+
/**
37+
* @Flow\Inject
38+
* @var LoggerInterface
39+
*/
40+
protected $logger;
41+
42+
public function extract(AssetInterface $asset): AssetContent
43+
{
44+
$request = [
45+
'pipeline' => [
46+
'description' => 'Attachment Extraction',
47+
'processors' => [
48+
[
49+
'attachment' => [
50+
'field' => 'neos_asset',
51+
'indexed_chars' => 100000,
52+
'ignore_missing' => true,
53+
]
54+
]
55+
]
56+
],
57+
'docs' => [
58+
[
59+
'_source' => [
60+
'neos_asset' => $this->getAssetContent($asset)
61+
]
62+
]
63+
]
64+
];
65+
66+
$result = $this->elasticsearchClient->request('POST', '_ingest/pipeline/_simulate', [], json_encode($request))->getTreatedContent();
67+
$extractedAsset = Arrays::getValueByPath($result, 'docs.0.doc._source.attachment');
68+
69+
$this->logger->debug(sprintf('Extracted asset %s of type %s. Extracted %s characters of content', $asset->getResource()->getFilename(), $extractedAsset['content_type'], $extractedAsset['content_length']), LogEnvironment::fromMethodName(__METHOD__));
70+
71+
return new AssetContent(
72+
$extractedAsset['content'] ?? '',
73+
$extractedAsset['title'] ?? '',
74+
$extractedAsset['name'] ?? '',
75+
$extractedAsset['author'] ?? '',
76+
$extractedAsset['keywords'] ?? '',
77+
$extractedAsset['date'] ?? '',
78+
$extractedAsset['content_type'] ?? '',
79+
$extractedAsset['content_length'] ?? '',
80+
$extractedAsset['language'] ?? ''
81+
);
82+
}
83+
84+
/**
85+
* @param AssetInterface $asset
86+
* @return null|string
87+
*/
88+
protected function getAssetContent(AssetInterface $asset): ?string
89+
{
90+
$stream = $asset->getResource()->getStream();
91+
stream_filter_append($stream, 'convert.base64-encode');
92+
$result = stream_get_contents($stream);
93+
return $result !== false ? $result : null;
94+
}
95+
}

Classes/Dto/AssetContent.php

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace Flowpack\ElasticSearch\ContentRepositoryAdaptor\Dto;
5+
6+
/*
7+
* This file is part of the Flowpack.ElasticSearch.ContentRepositoryAdaptor package.
8+
*
9+
* (c) Contributors of the Neos Project - www.neos.io
10+
*
11+
* This package is Open Source Software. For the full copyright and license
12+
* information, please view the LICENSE file which was distributed with this
13+
* source code.
14+
*/
15+
16+
class AssetContent
17+
{
18+
19+
/**
20+
* @var string
21+
*/
22+
protected $content;
23+
/**
24+
* @var string
25+
*/
26+
protected $title;
27+
/**
28+
* @var string
29+
*/
30+
protected $name;
31+
/**
32+
* @var string
33+
*/
34+
protected $author;
35+
/**
36+
* @var string
37+
*/
38+
protected $keywords;
39+
/**
40+
* @var string
41+
*/
42+
protected $date;
43+
/**
44+
* @var string
45+
*/
46+
protected $contentType;
47+
/**
48+
* @var int
49+
*/
50+
protected $contentLength;
51+
/**
52+
* @var string
53+
*/
54+
protected $language;
55+
56+
/**
57+
* AssetContent constructor.
58+
* @param string $content
59+
* @param string $title
60+
* @param string $name
61+
* @param string $author
62+
* @param string $keywords
63+
* @param string $date
64+
* @param string $contentType
65+
* @param int $contentLength
66+
* @param string $language
67+
*/
68+
public function __construct(string $content, string $title, string $name, string $author, string $keywords, string $date, string $contentType, int $contentLength, string $language)
69+
{
70+
$this->content = $content;
71+
$this->title = $title;
72+
$this->name = $name;
73+
$this->author = $author;
74+
$this->keywords = $keywords;
75+
$this->date = $date;
76+
$this->contentType = $contentType;
77+
$this->contentLength = $contentLength;
78+
$this->language = $language;
79+
}
80+
81+
/**
82+
* @return string
83+
*/
84+
public function getContent(): string
85+
{
86+
return $this->content;
87+
}
88+
89+
/**
90+
* @return string
91+
*/
92+
public function getTitle(): string
93+
{
94+
return $this->title;
95+
}
96+
97+
/**
98+
* @return string
99+
*/
100+
public function getName(): string
101+
{
102+
return $this->name;
103+
}
104+
105+
/**
106+
* @return string
107+
*/
108+
public function getAuthor(): string
109+
{
110+
return $this->author;
111+
}
112+
113+
/**
114+
* @return string
115+
*/
116+
public function getKeywords(): string
117+
{
118+
return $this->keywords;
119+
}
120+
121+
/**
122+
* @return string
123+
*/
124+
public function getDate(): string
125+
{
126+
return $this->date;
127+
}
128+
129+
/**
130+
* @return string
131+
*/
132+
public function getContentType(): string
133+
{
134+
return $this->contentType;
135+
}
136+
137+
/**
138+
* @return int
139+
*/
140+
public function getContentLength(): int
141+
{
142+
return $this->contentLength;
143+
}
144+
145+
/**
146+
* @return string
147+
*/
148+
public function getLanguage(): string
149+
{
150+
return $this->language;
151+
}
152+
}

0 commit comments

Comments
 (0)