Skip to content

Commit 7e8f6eb

Browse files
committed
wip
1 parent 1a09a51 commit 7e8f6eb

5 files changed

Lines changed: 177 additions & 56 deletions

File tree

src/EPPlus.Fonts.OpenType/OpenTypeFonts.cs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Date Author Change
1515
using EPPlus.Fonts.OpenType.FontCache;
1616
using EPPlus.Fonts.OpenType.Scanner;
1717
using EPPlus.Fonts.OpenType.Tables;
18+
using EPPlus.Fonts.OpenType.Tables.Cmap;
1819
using EPPlus.Fonts.OpenType.Utils.Platform;
1920
using System;
2021
using System.Collections.Generic;
@@ -189,7 +190,18 @@ public static OpenTypeFont GetFontData(
189190
bool searchSystemDirectories = true,
190191
bool ignoreCache = false)
191192
{
192-
return GetFontDataOpen(fontDirectories, fontName, subFamily, searchSystemDirectories, ignoreCache);
193+
var font = GetFontDataOpen(fontDirectories, fontName, subFamily, searchSystemDirectories, ignoreCache);
194+
uint[] codePoints = { 0x1F600, 97, 98, 99 }; // 😀, a, b, c
195+
196+
foreach (uint cp in codePoints)
197+
{
198+
ushort glyphId;
199+
bool found = font.CmapTable.TryGetGlyphId(cp, out glyphId);
200+
201+
char display = cp <= 0xFFFF ? (char)cp : '?';
202+
Console.WriteLine($"U+{cp:X} ('{display}'): found={found}, glyphId={glyphId}");
203+
}
204+
return font;
193205
}
194206

195207
/// <summary>

src/EPPlus.Fonts.OpenType/Subsetting/CmapSubsetProcessor.cs

Lines changed: 116 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ public void Discover(FontSubsettingContext context)
3131
{
3232
ushort oldGid;
3333
if (context.OriginalFont.CmapTable.TryGetGlyphId(codePoint, out oldGid))
34+
{
35+
// DEBUGGA: Skriv ut mappningen
36+
Console.WriteLine($"Code point 0x{codePoint:X} → Glyph ID {oldGid}");
37+
}
38+
else
39+
{
40+
// VIKTIGT: Om detta körs för emoji betyder det att fonten inte har den!
41+
Console.WriteLine($"Code point 0x{codePoint:X} NOT FOUND in font!");
42+
}
43+
if (context.OriginalFont.CmapTable.TryGetGlyphId(codePoint, out oldGid))
3444
{
3545
if (!context.IncludedGlyphs.Contains(oldGid))
3646
{
@@ -48,21 +58,15 @@ public void Discover(FontSubsettingContext context)
4858

4959
public void Rewrite(FontSubsettingContext context)
5060
{
51-
// --- PHASE 3: REWRITE ---
52-
// Now context.OldToNewGlyphId IS populated. We can safely map
53-
// Unicode -> OldGID -> NewGID.
54-
5561
// Build mapping: Unicode code point → NEW glyph ID in subset
5662
Dictionary<uint, ushort> cmapMapping = new Dictionary<uint, ushort>();
5763

58-
5964
foreach (uint codePoint in context.UsedCodePoints)
6065
{
6166
ushort oldGid;
6267
if (context.OriginalFont.CmapTable.TryGetGlyphId(codePoint, out oldGid))
6368
{
6469
ushort newGid;
65-
// Map the old ID to the new dense ID (0, 1, 2...)
6670
if (context.OldToNewGlyphId.TryGetValue(oldGid, out newGid))
6771
{
6872
cmapMapping[codePoint] = newGid;
@@ -74,55 +78,126 @@ public void Rewrite(FontSubsettingContext context)
7478
}
7579
}
7680

77-
// DEBUG: Visa vad som faktiskt läggs in
78-
Console.WriteLine("=== cmapMapping innehåll ===");
79-
Console.WriteLine($"'T' (84) i cmapMapping: {(cmapMapping.ContainsKey(84) ? cmapMapping[84].ToString() : "SAKNAS")}");
80-
Console.WriteLine($"'A' (65) i cmapMapping: {(cmapMapping.ContainsKey(65) ? cmapMapping[65].ToString() : "SAKNAS")}");
81-
82-
// Visa också OldToNewGlyphId för 'T'
83-
ushort tOldGid;
84-
context.OriginalFont.CmapTable.TryGetGlyphId(84, out tOldGid);
85-
Console.WriteLine($"'T' OldGID: {tOldGid}");
86-
Console.WriteLine($"OldToNewGlyphId[{tOldGid}]: {(context.OldToNewGlyphId.ContainsKey(tOldGid) ? context.OldToNewGlyphId[tOldGid].ToString() : "SAKNAS")}");
87-
88-
8981
// Always map code point 0 to .notdef (required by spec)
9082
cmapMapping[0] = 0;
9183

92-
// Create format 4 subtable using the provided class names
93-
// This creates the internal segment structure (Start/EndCount, IdDelta, etc.)
94-
CmapSubtable4 format4 = CmapFormat4.CreateFromMappings(cmapMapping);
84+
// Check if we need Format 12 (for code points > 0xFFFF like emoji)
85+
bool needsFormat12 = cmapMapping.Keys.Any(cp => cp > 0xFFFF);
9586

9687
// Build new cmap table
9788
CmapTable newCmap = new CmapTable();
9889
newCmap.Version = 0;
99-
// Note: NumTables is usually updated automatically when records are added,
100-
// but we set it to be explicit.
101-
newCmap.NumTables = 2;
10290

103-
// (3,1) – Windows Unicode BMP
104-
EncodingRecord winRecord = new EncodingRecord(Platforms.Windows, 1, 0);
105-
winRecord.Subtable = format4;
91+
if (needsFormat12)
92+
{
93+
// Create Format 12 subtable for full Unicode support
94+
var format12 = CreateFormat12Subtable(cmapMapping);
95+
96+
// Also create Format 4 for BMP characters (backwards compatibility)
97+
var bmpMapping = cmapMapping.Where(kvp => kvp.Key <= 0xFFFF)
98+
.ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
99+
var format4 = CmapFormat4.CreateFromMappings(bmpMapping);
100+
101+
// Add Format 12 record (3,10) – Windows Unicode UCS-4 (full range)
102+
EncodingRecord format12Record = new EncodingRecord(Platforms.Windows, 10, 0);
103+
format12Record.Subtable = format12;
104+
newCmap.EncodingRecords.Add(format12Record);
105+
newCmap.SubTables.Add(format12);
106+
107+
// Add Format 4 record (3,1) – Windows Unicode BMP (backwards compatibility)
108+
EncodingRecord format4Record = new EncodingRecord(Platforms.Windows, 1, 0);
109+
format4Record.Subtable = format4;
110+
newCmap.EncodingRecords.Add(format4Record);
111+
newCmap.SubTables.Add(format4);
112+
113+
newCmap.NumTables = 2;
114+
}
115+
else
116+
{
117+
// Only BMP characters - Format 4 is sufficient
118+
CmapSubtable4 format4 = CmapFormat4.CreateFromMappings(cmapMapping);
106119

107-
// (0,3) – Unicode BMP
108-
EncodingRecord unicodeRecord = new EncodingRecord(Platforms.Unicode, 3, 0);
109-
unicodeRecord.Subtable = format4;
120+
// (3,1) – Windows Unicode BMP
121+
EncodingRecord winRecord = new EncodingRecord(Platforms.Windows, 1, 0);
122+
winRecord.Subtable = format4;
110123

111-
newCmap.EncodingRecords.Add(winRecord);
112-
newCmap.EncodingRecords.Add(unicodeRecord);
124+
// (0,3) – Unicode BMP
125+
EncodingRecord unicodeRecord = new EncodingRecord(Platforms.Unicode, 3, 0);
126+
unicodeRecord.Subtable = format4;
113127

114-
// Add the subtable to the table's internal list
115-
newCmap.SubTables.Add(format4);
128+
newCmap.EncodingRecords.Add(winRecord);
129+
newCmap.EncodingRecords.Add(unicodeRecord);
130+
newCmap.SubTables.Add(format4);
131+
newCmap.NumTables = 2;
132+
}
116133

117-
// Replace cmap in subset font.
118-
// Now GetMinCharCode() will return the correct value to the validator.
119134
context.SubsetFont.AddOrReplaceTable(newCmap);
135+
}
136+
137+
private CmapSubtable12 CreateFormat12Subtable(Dictionary<uint, ushort> mapping)
138+
{
139+
var subtable = new CmapSubtable12();
140+
141+
// Sort by code point
142+
var sortedMappings = mapping.OrderBy(kvp => kvp.Key).ToList();
143+
144+
if (sortedMappings.Count == 0)
145+
{
146+
subtable.NumGroups = 0;
147+
subtable.Length = 16; // Header only
148+
return subtable;
149+
}
150+
151+
// Build sequential groups
152+
uint currentStart = sortedMappings[0].Key;
153+
uint currentStartGid = sortedMappings[0].Value;
154+
uint currentEnd = currentStart;
155+
156+
for (int i = 1; i < sortedMappings.Count; i++)
157+
{
158+
uint codePoint = sortedMappings[i].Key;
159+
ushort glyphId = sortedMappings[i].Value;
160+
161+
// Check if this continues the current sequential group
162+
bool isSequential = (codePoint == currentEnd + 1) &&
163+
(glyphId == currentStartGid + (codePoint - currentStart));
164+
165+
if (isSequential)
166+
{
167+
// Extend current group
168+
currentEnd = codePoint;
169+
}
170+
else
171+
{
172+
// Save current group and start new one
173+
subtable.Groups.Add(new SequencialMapGroup
174+
{
175+
StartCharCode = currentStart,
176+
EndCharCode = currentEnd,
177+
StartGlyphId = currentStartGid
178+
});
179+
180+
currentStart = codePoint;
181+
currentStartGid = glyphId;
182+
currentEnd = codePoint;
183+
}
184+
}
185+
186+
// Add final group
187+
subtable.Groups.Add(new SequencialMapGroup
188+
{
189+
StartCharCode = currentStart,
190+
EndCharCode = currentEnd,
191+
StartGlyphId = currentStartGid
192+
});
193+
194+
// Update metadata
195+
subtable.NumGroups = (uint)subtable.Groups.Count;
196+
197+
// Calculate length: header (16 bytes) + groups (12 bytes each)
198+
subtable.Length = 16 + (subtable.NumGroups * 12);
120199

121-
// DEBUG: Verifiera direkt efter att tabellen lagts till
122-
Console.WriteLine("=== CmapSubsetProcessor.Rewrite DONE ===");
123-
Console.WriteLine($" SubsetFont hash: {context.SubsetFont.GetHashCode()}");
124-
Console.WriteLine($" CmapTable hash: {context.SubsetFont.CmapTable.GetHashCode()}");
125-
Console.WriteLine($" MapCharToGlyph('T'): {context.SubsetFont.CmapTable.MapCharToGlyph('T')}");
200+
return subtable;
126201
}
127202
}
128203
}

src/EPPlus.Fonts.OpenType/Subsetting/GlyphAndLocaSubsetProcessor.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ public void Rewrite(FontSubsettingContext context)
9090
context.SubsetFont.AddOrReplaceTable(LocaTable.CreateSubset(offsets, context.SubsetFont.HeadTable.IndexToLocFormat));
9191
Console.WriteLine($"=== GlyfAndLocaSubsetProcessor ===");
9292
Console.WriteLine($"NewToOldGlyphId count: {context.NewToOldGlyphId.Count}");
93-
Console.WriteLine($"NewGID 42 → OldGID: {context.NewToOldGlyphId[42]}");
9493
}
9594

9695
private static bool IsEmpty(Glyph g)

src/EPPlus.Fonts.OpenType/Tables/Cmap/CmapTable.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,9 @@ public bool TryGetGlyphId(uint codePoint, out ushort glyphId)
204204
glyphId = 0;
205205

206206
var preferred = GetPreferredSubtable();
207-
if (preferred != null)
207+
if (preferred != null && preferred.TryGetGlyphId(codePoint, out glyphId) && glyphId != 0)
208208
{
209-
return preferred.TryGetGlyphId(codePoint, out glyphId) && glyphId != 0;
209+
return true;
210210
}
211211

212212
// Fallback: loopa alla

src/EPPlus.Fonts.OpenType/TextShaping/TextShaper.cs

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -215,17 +215,50 @@ private List<ShapedGlyph> MapToGlyphs(string text)
215215
var cmapTable = _font.CmapTable;
216216
var hmtxTable = _font.HmtxTable;
217217

218-
for (ushort i = 0; i < text.Length; i++)
218+
int i = 0;
219+
while (i < text.Length)
219220
{
220-
char c = text[i];
221+
uint codePoint;
222+
int charCount;
221223

222-
// Map character to glyph ID
223-
int glyphId = cmapTable.MapCharToGlyph(c);
224+
// Check if this is a surrogate pair
225+
if (i < text.Length - 1 && char.IsHighSurrogate(text[i]))
226+
{
227+
// Potential surrogate pair: 2 chars → 1 Unicode code point
228+
char high = text[i];
229+
char low = text[i + 1];
230+
231+
if (char.IsLowSurrogate(low))
232+
{
233+
// Valid pair - convert to code point
234+
codePoint = (uint)char.ConvertToUtf32(high, low);
235+
charCount = 2;
236+
}
237+
else
238+
{
239+
// Invalid surrogate pair - treat as .notdef and skip high surrogate
240+
codePoint = 0;
241+
charCount = 1;
242+
}
243+
}
244+
else if (char.IsSurrogate(text[i]))
245+
{
246+
// Lone surrogate (invalid) - treat as .notdef
247+
codePoint = 0;
248+
charCount = 1;
249+
}
250+
else
251+
{
252+
// Normal BMP character
253+
codePoint = text[i];
254+
charCount = 1;
255+
}
224256

225-
// Handle missing glyphs (use .notdef)
226-
if (glyphId < 0)
257+
// Map code point to glyph ID
258+
ushort glyphId;
259+
if (!cmapTable.TryGetGlyphId(codePoint, out glyphId))
227260
{
228-
glyphId = 0; // .notdef
261+
glyphId = 0; // .notdef if not found
229262
}
230263

231264
// Get base advance width from hmtx (BEFORE any kerning)
@@ -234,14 +267,16 @@ private List<ShapedGlyph> MapToGlyphs(string text)
234267
glyphs.Add(new ShapedGlyph
235268
{
236269
GlyphId = (ushort)glyphId,
237-
BaseAdvance = baseAdvance, // ← Store original advance
238-
XAdvance = baseAdvance, // ← Initially same as base
270+
BaseAdvance = baseAdvance,
271+
XAdvance = baseAdvance,
239272
YAdvance = 0,
240273
XOffset = 0,
241274
YOffset = 0,
242-
ClusterIndex = i,
243-
CharCount = 1
275+
ClusterIndex = (ushort)i, // Points to FIRST char of the cluster
276+
CharCount = (byte)charCount // 1 for normal, 2 for surrogate pair
244277
});
278+
279+
i += charCount; // Skip both chars if surrogate pair
245280
}
246281

247282
return glyphs;

0 commit comments

Comments
 (0)