Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/Document/ContentStream/ContentStream.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,18 +87,36 @@ public function getText(Document $document, Page $page, LineGroupingStrategy $li

$previousTextElementOnLine = null;
foreach ($positionedTextElementsForLine as $positionedTextElement) {
if (($positionedTextElementText = $positionedTextElement->getText($document, $page)) === '') {
$elementText = $positionedTextElement->getText($document, $page);
if ($elementText === '') {
$previousTextElementOnLine = $positionedTextElement;
continue;
}

if ($previousTextElementOnLine !== null
&& ($positionedTextElement->absoluteMatrix->offsetX - $previousTextElementOnLine->absoluteMatrix->offsetX - $positionedTextElement->getFont($document, $page)->getWidthForChars($previousTextElementOnLine->getCodePoints(), $previousTextElementOnLine->textState, $previousTextElementOnLine->absoluteMatrix)) >= ($previousTextElementOnLine->textState->fontSize ?? 10) * $previousTextElementOnLine->absoluteMatrix->scaleX * 0.40
&& str_ends_with($text, ' ') === false && str_starts_with($positionedTextElementText, ' ') === false) {
$text .= ' ';
if ($previousTextElementOnLine !== null) {
// The gap between two elements is what remains of the horizontal distance once the previous
// element's own advance is subtracted. That advance is reconstructed by getAdvanceWidth() because
// Tj/TJ do not move the text matrix here; ignoring it (as the old next-element-font width did) left
// the TJ kerning term in the gap and forced a slack threshold.
$gap = $positionedTextElement->absoluteMatrix->offsetX
- $previousTextElementOnLine->absoluteMatrix->offsetX
- $previousTextElementOnLine->getAdvanceWidth($document, $page);

$wordBreakThreshold = ($previousTextElementOnLine->textState->fontSize ?? 10)
* $previousTextElementOnLine->absoluteMatrix->scaleX
* ($previousTextElementOnLine->textState->scale / 100)
* PositionedTextElement::WORD_BREAK_THRESHOLD_EM;

if (
$gap >= $wordBreakThreshold
&& str_ends_with($text, ' ') === false
&& str_starts_with($elementText, ' ') === false
) {
$text .= ' ';
}
}

$text .= $positionedTextElementText;
$text .= $elementText;
$previousTextElementOnLine = $positionedTextElement;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,35 @@ public function getHeight(): float {
* abs($this->absoluteMatrix->scaleY)
* ($this->textState->scale / 100);
}

/**
* The horizontal distance, in device space, that showing this element advances the text cursor, per the
* displacement formula in the PDF spec §9.4.4:
*
* ((w0 − Tj/1000)·Tfs + Tc + Tw·[single-byte code 32]) · Th , transformed by the text rendering matrix.
*
* Reconstructed here because Tj/TJ do not advance the text matrix in this parser.
*/
public function getAdvanceWidth(Document $document, Page $page): float {
$font = $this->getFont($document, $page);
$scaleX = $this->absoluteMatrix->scaleX;
$fontSize = $this->textState->fontSize ?? 10;

$glyphAdvance = $font->getWidthForChars($this->getCodePoints(), $this->textState, $this->absoluteMatrix); // Σ (w0·Tfs + Tc + Tw·[code 32]) · scaleX
$offsetAdvance = -($this->getTotalOffset() / 1000) * $fontSize * $scaleX; // − Σ(Tj)/1000 · Tfs · scaleX

return ($glyphAdvance + $offsetAdvance) * ($this->textState->scale / 100); // · Th
}

/** The sum of the TJ adjustment numbers in this element's segments, in thousandths of an em. */
public function getTotalOffset(): float {
$totalOffset = 0.0;
foreach ($this->textSegments as $textSegment) {
if ($textSegment->offset !== null) {
$totalOffset += $textSegment->offset;
}
}

return $totalOffset;
}
}
7 changes: 6 additions & 1 deletion src/Document/Object/Decorator/Font.php
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,12 @@ public function getWidthForChar(int $characterCode, TextState $textState, Transf
$characterWidth = $this->getDefaultWidth();
}

return ($characterWidth * ($textState->fontSize ?? 10) + $textState->charSpace) * $transformationMatrix->scaleX;
// Word spacing (Tw) applies only to the single-byte character code 32, and never to composite (Type0) fonts (spec §9.3.3).
$wordSpace = ($textState->wordSpace !== 0.0 && $characterCode === 32 && $this->getDescendantFonts() === [])
? $textState->wordSpace
: 0.0;

return ($characterWidth * ($textState->fontSize ?? 10) + $textState->charSpace + $wordSpace) * $transformationMatrix->scaleX;
}

/** @param list<int> $chars */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ creationDate: null
modificationDate: null
pages:
-
content: Helloworld
content: 'Hello world'
2 changes: 1 addition & 1 deletion tests/Samples/files/gdocs-hello-world-simple/contents.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ creationDate: null
modificationDate: null
pages:
-
content: Helloworld
content: 'Hello world'
Original file line number Diff line number Diff line change
Expand Up @@ -12,56 +12,56 @@ modificationDate: null
pages:
-
content: |-
Namquodmolestiasvelcorporis
Nam quod molestias vel corporis
aperiam.
Loremipsumdolorsitamet.EtomnisperferendisEtquisquamquilaboriosamexplicaboet
natuscorruptiautrepudiandaeiurequoinventoreitaqueetodioatque.Quinecessitatibus
oditetcommodiaccusamusEtfuga!
Quidistinctiopraesentiumsedcorporisreiciendiseum
molestiaeeius.
Quiaperiamaspernaturremconsecteturullamestquodquiarchitectocorporiset
repellendusexpedita!ExfugaducimusEaipsametnobisarchitectosedrationedeseruntet
iuresapientequianimiQuis.CumquodfaciliseosexercitationemquamEstullamest
galisumassumendacumdistinctiotemporaquiquasirerumetpariaturharum.
Estinciduntrepellatautiustoodit.
EtmagnamincidunteumpossimusperspiciatisQuopariaturautperferendislaboriosam
eumrepellatipsautdebitisipsautdistinctiodolores!EtrerumexpeditaAutullamadullam
delenitiestfacilisconsequaturutarchitectonobis.
Nondebitisexpeditaeareprehenderitasperioresetvoluptatemquos.
NonminimasolutaSeddelectusquirepellendusdoloremetgalisumquia.Nonsuntdebitis
abvitaeaspernaturExminuseumoptioassumendaestvoluptatemillumetharumipsam
remveniamculpa.AutadipiscideseruntnoninventorerationeHicistevelsintundenon
solutanullainculpaiureetquiarecusandae?QuiveniamquiaidmaioresexcepturiQui
doloresetofficiaaperiamabsintexcepturi.
Estmolestiasillumestdolorempraesentiumcumsolutanesciunt.
UtvelitaspernaturutearumdebitisQuiullamautquoddolorumquomolestiaemagniet
corruptiminima.IddolorerrorabdignissimoscorporisQuocommodi.Etsolutadoloribus
Eosquaeinmagnampraesentiumautaccusantiumlaboriosamnonnemoreprehenderit.
EtevenietducimusUtvoluptatemetporrovelitidsuscipitimpedit!VelculpaitaqueEosfuga
auttotamlaudantiumquievenietmolestiaeveldoloresfugiat.
EstsaepesolutaetverovoluptatemQuomaxime!EostemporaquasadnesciuntitaqueEa
nihiletullamrepellatetvoluptatemdelectus.AutvoluptatumpariaturIdmolestiascum
nobismolestias.UtvelitdistinctioExconsectetureosdebitisperspiciatisaminuscommodi
eosdoloribusautemvelaccusamussequietquidemreiciendis.
Utautemexcepturiutsequilaboriosamestquasdebitisetplaceatconsequatur!
Lorem ipsum dolor sit amet. Et omnis perferendis Et quisquam qui laboriosam explicabo et
natus corrupti aut repudiandae iure quo inventore itaque et odio atque. Qui necessitatibus
odit et commodi accusamus Et fuga!
Qui distinctio praesentium sed corporis reiciendis eum
molestiae eius.
Qui aperiam aspernatur rem consectetur ullamest quod qui architecto corporis et
repellendus expedita! Ex fuga ducimus Ea ipsam et nobis architecto sed ratione deserunt et
iure sapiente qui animi Quis. Cum quod facilis eos exercitationem quam Est ullam est
galisum assumenda cum distinctio tempora qui quasi rerum et pariatur harum.
Est incidunt repellat aut iusto odit.
Et magnam incidunt eum possimus perspiciatis Quo pariatur aut perferendis laboriosam
eum repellat ipsa ut debitis ipsa ut distinctio dolores! Et rerum expeditaAut ullam ad ullam
deleniti est facilis consequatur ut architecto nobis.
Non debitis expedita ea reprehenderit asperiores et voluptatem quos.
Non minima soluta Sed delectus qui repellendus dolorem et galisum quia. Non sunt debitis
ab vitae aspernatur Ex minus eum optio assumenda est voluptatem illum et harum ipsam
rem veniam culpa. Aut adipisci deserunt non inventore ratione Hic iste vel sint unde non
soluta nulla in culpa iure et quia recusandae? Qui veniam quia id maiores excepturiQui
dolores et officia aperiam ab sint excepturi.
Est molestias illum est dolorem praesentium cum soluta nesciunt.
Ut velit aspernatur ut earum debitis Qui ullam aut quod dolorum quo molestiae magni et
corrupti minima. Id dolor error ab dignissimos corporis Quo commodi. Et soluta doloribus
Eos quae in magnam praesentium aut accusantium laboriosam non nemo reprehenderit.
Et eveniet ducimus Ut voluptatem et porro velit id suscipit impedit! Vel culpa itaque Eos fuga
aut totam laudantium qui eveniet molestiae vel dolores fugiat.
Est saepe soluta et vero voluptatemQuo maxime! Eos tempora quas ad nesciunt itaque Ea
nihil et ullam repellat et voluptatem delectus. Aut voluptatum pariatur Id molestias cum
nobis molestias. Ut velit distinctioEx consectetur eos debitis perspiciatis a minus commodi
eos doloribus autem vel accusamus sequi et quidem reiciendis.
Ut autem excepturi ut sequi laboriosam est quas debitis et placeat consequatur!
-
content: |-
Quiautemvoluptaseumdeseruntdolor.
Innesciuntquiaquiasperiorescorruptisedperspiciatistemporaidminusarchitecto!
Rembeataequosautfugitvero.
Atadipiscienimidreiciendisofficiis.
Sedsapientequiaquisintdoloremvelimpeditmagni.
Etdictavoluptatemsitfugitsaepevelnullaetdebitisiureetipsavoluptas?Estfacereharum
etinventoreevenietAutrerumestmolestiasquamquiperspiciatismagnam.
QuieligendisaepeetnequeautemAomnisetpariaturrerumestdolorererumestfacere
quasi.Sitdoloremquiaetveritatisremaliasdeleniti.Etvoluptatemlaborumetimpedit
atqueautsuscipit.RemundequoscumreprehenderittemporeQuieaquesitrerumdistinctio
utipsamculpaaberrorsuntetquaeratQuis.
Inrecusandaedoloridlaudantiumomnisautvelitautperferendisomnisquitenetur
explicabo.EtmaximeassumendaetipsamitaqueAutdelenitiutexcepturialiasetfugit
facerenonmodilaudantium.QuisapientererumnonsuntipsumAdmaximeestvelit
eligendiautquaeratofficiaestarchitectomagnam?
1. Etsequiipsumetexpeditaipsumeumdolorelaborum.
2. Idenimundeeoscommodiquidemeteiusquaerat.
3. EsttemporeveritatissedaliquamQuis.
4. Quiquastemporautvoluptatesdoloribusestfacilisdeserunt33distinctiointernos.
Qui autem voluptas eum deserunt dolor.
In nesciunt quia qui asperiores corrupti sed perspiciatis tempora id minus architecto!
Rem beatae quos aut fugit vero.
At adipisci enim id reiciendis officiis.
Sed sapiente quia qui sint dolorem vel impedit magni.
Et dicta voluptatem sit fugit saepevel nulla et debitis iure et ipsa voluptas? Est facere harum
et inventore eveniet Aut rerum est molestias quam qui perspiciatis magnam.
Qui eligendi saepe et neque autem A omnis et pariatur rerum est dolore rerum est facere
quasi. Sit dolorem quiaet veritatis rem alias deleniti. Et voluptatem laborum et impedit
atqueaut suscipit. Rem unde quos cum reprehenderit temporeQui eaque sit rerum distinctio
ut ipsam culpa ab error sunt et quaerat Quis.
In recusandae dolor id laudantium omnisaut velit aut perferendis omnis qui tenetur
explicabo. Et maxime assumenda et ipsam itaqueAut deleniti ut excepturi alias et fugit
facere non modi laudantium. Qui sapiente rerum non sunt ipsum Ad maxime est velit
eligendi aut quaerat officia est architecto magnam?
1. Et sequi ipsum et expedita ipsum eum dolore laborum.
2. Id enim unde eos commodi quidem et eius quaerat.
3. Est tempore veritatis sed aliquam Quis.
4. Qui quas tempora ut voluptates doloribus est facilis deserunt 33 distinctio internos.
2 changes: 1 addition & 1 deletion tests/Samples/files/issue-152/contents.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pages:
HORTUS BERGERIE ROUGE 2023 75CL AOP PIC ST LOUP210B7520237,951 669,501 13CRD
HORTUS BERGERIE BLANC 2023 75CL IGP VAL DE MONTFERRAND36B7520236,80 244,801 12,5CRD
HORTUS DOMAINE GRANDE CUVEE ROUGE 2022 75CL AOP PIC SAINT LOUP24B75202213,60326,401 13CRD
HORTUS BIB LE LOUP DANS LA BERGERIE ROUGE 3L IGP PAYS D'HERAULT30B3NM11,85355,501 13CRD
HORTUS BIB LE LOUP DANS LA BERGERIE ROUGE 3L IGP PAYS D'HERAULT30 B3NM11,85355,501 13CRD
Total :570
A Noter : seules les marchandises accompagnées d'un bon de livraison seront réceptionnées.
Merci de bien vouloir nous confirmer par retour la disponibilité, le millésime et le prix.
Expand Down
Loading
Loading