Skip to content

Commit 1bbb778

Browse files
authored
Merge pull request #653 from mathjax/better-ranges
Improve handling of arbitrary Unicode in TeX input.
2 parents e22efb4 + b98cbbe commit 1bbb778

3 files changed

Lines changed: 76 additions & 40 deletions

File tree

ts/core/MmlTree/MmlNodes/mo.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,6 @@ export class MmlMo extends AbstractMmlTokenNode {
6464
indentshiftlast: 'indentshift'
6565
};
6666

67-
/**
68-
* Unicode ranges and their default TeX classes
69-
*/
70-
public static RANGES = RANGES;
71-
7267
/**
7368
* The MathML spacing values for the TeX classes
7469
*/
@@ -389,7 +384,7 @@ export class MmlMo extends AbstractMmlTokenNode {
389384
this.lspace = (def[0] + 1) / 18;
390385
this.rspace = (def[1] + 1) / 18;
391386
} else {
392-
let range = this.getRange(mo);
387+
let range = getRange(mo);
393388
if (range) {
394389
if (this.getProperty('texClass') === undefined) {
395390
this.texClass = range[2];

ts/core/MmlTree/OperatorDictionary.ts

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -80,36 +80,74 @@ export const MO = {
8080
* The default TeX classes for the various unicode blocks, and their names
8181
*/
8282
export const RANGES: RangeDef[] = [
83-
[0x20, 0x7F, TEXCLASS.REL, 'BasicLatin'],
84-
[0xA0, 0xFF, TEXCLASS.ORD, 'Latin1Supplement'],
85-
[0x100, 0x17F, TEXCLASS.ORD, 'LatinExtendedA'],
86-
[0x180, 0x24F, TEXCLASS.ORD, 'LatinExtendedB'],
87-
[0x2B0, 0x2FF, TEXCLASS.ORD, 'SpacingModLetters'],
88-
[0x300, 0x36F, TEXCLASS.ORD, 'CombDiacritMarks'],
89-
[0x370, 0x3FF, TEXCLASS.ORD, 'GreekAndCoptic'],
90-
[0x1E00, 0x1EFF, TEXCLASS.ORD, 'LatinExtendedAdditional'],
91-
[0x2000, 0x206F, TEXCLASS.PUNCT, 'GeneralPunctuation'],
92-
[0x2070, 0x209F, TEXCLASS.ORD, 'SuperAndSubscripts'],
93-
[0x20A0, 0x20CF, TEXCLASS.ORD, 'Currency'],
94-
[0x20D0, 0x20FF, TEXCLASS.ORD, 'CombDiactForSymbols'],
95-
[0x2100, 0x214F, TEXCLASS.ORD, 'LetterlikeSymbols'],
96-
[0x2150, 0x218F, TEXCLASS.ORD, 'NumberForms'],
97-
[0x2190, 0x21FF, TEXCLASS.REL, 'Arrows'],
98-
[0x2200, 0x22FF, TEXCLASS.BIN, 'MathOperators'],
99-
[0x2300, 0x23FF, TEXCLASS.ORD, 'MiscTechnical'],
100-
[0x2460, 0x24FF, TEXCLASS.ORD, 'EnclosedAlphaNums'],
101-
[0x2500, 0x259F, TEXCLASS.ORD, 'BoxDrawing'],
102-
[0x25A0, 0x25FF, TEXCLASS.ORD, 'GeometricShapes'],
103-
[0x2700, 0x27BF, TEXCLASS.ORD, 'Dingbats'],
104-
[0x27C0, 0x27EF, TEXCLASS.ORD, 'MiscMathSymbolsA'],
105-
[0x27F0, 0x27FF, TEXCLASS.REL, 'SupplementalArrowsA'],
106-
[0x2900, 0x297F, TEXCLASS.REL, 'SupplementalArrowsB'],
107-
[0x2980, 0x29FF, TEXCLASS.ORD, 'MiscMathSymbolsB'],
108-
[0x2A00, 0x2AFF, TEXCLASS.BIN, 'SuppMathOperators'],
109-
[0x2B00, 0x2BFF, TEXCLASS.ORD, 'MiscSymbolsAndArrows'],
110-
[0x1D400, 0x1D7FF, TEXCLASS.ORD, 'MathAlphabets']
83+
[0x0020, 0x007F, TEXCLASS.REL, 'mo'], // Basic Latin
84+
[0x00A0, 0x024F, TEXCLASS.ORD, 'mi'], // Latin-1 Supplement, Latin Extended-A, Latin Extended-B
85+
[0x02B0, 0x036F, TEXCLASS.ORD, 'mo'], // Spacing modifier letters, Combining Diacritical Marks
86+
[0x0370, 0x1A20, TEXCLASS.ORD, 'mi'], // Greek and Coptic (through) Tai Tham
87+
[0x1AB0, 0x1AFF, TEXCLASS.ORD, 'mo'], // Combining Diacritical Marks Extended
88+
[0x1B00, 0x1DBF, TEXCLASS.ORD, 'mi'], // Balinese (through) Phonetic Extensions Supplement
89+
[0x1DC0, 0x1DFF, TEXCLASS.ORD, 'mo'], // Combining Diacritical Marks Supplement
90+
[0x1E00, 0x1FFF, TEXCLASS.ORD, 'mi'], // Latin Extended Additional, Greek Extended
91+
[0x2000, 0x206F, TEXCLASS.ORD, 'mo'], // General Punctuation
92+
[0x2070, 0x209F, TEXCLASS.ORD, 'mo'], // Superscript and Subscripts (through) Combining Diacritical Marks for Symbols
93+
[0x2100, 0x214F, TEXCLASS.ORD, 'mi'], // Letterlike Symbols
94+
[0x2150, 0x218F, TEXCLASS.ORD, 'mn'], // Number Forms
95+
[0x2190, 0x21FF, TEXCLASS.REL, 'mo'], // Arrows
96+
[0x2200, 0x22FF, TEXCLASS.BIN, 'mo'], // Mathematical Operators
97+
[0x2300, 0x23FF, TEXCLASS.ORD, 'mo'], // Miscellaneous Technical
98+
[0x2460, 0x24FF, TEXCLASS.ORD, 'mn'], // Enclosed Alphanumerics
99+
[0x2500, 0x27EF, TEXCLASS.ORD, 'mo'], // Box Drawing (though) Miscellaneous Math Symbols-A
100+
[0x27F0, 0x27FF, TEXCLASS.REL, 'mo'], // Supplemental Arrows-A
101+
[0x2800, 0x28FF, TEXCLASS.ORD, 'mtext'], // Braille Patterns
102+
[0x2900, 0x297F, TEXCLASS.REL, 'mo'], // Supplemental Arrows-B
103+
[0x2980, 0x29FF, TEXCLASS.ORD, 'mo'], // Miscellaneous Math Symbols-B
104+
[0x2A00, 0x2AFF, TEXCLASS.BIN, 'mo'], // Supplemental Math Operators
105+
[0x2B00, 0x2B2F, TEXCLASS.ORD, 'mo'], // Miscellaneous Symbols and Arrows
106+
[0x2B30, 0x2B4F, TEXCLASS.REL, 'mo'], // Arrows from above
107+
[0x2B50, 0x2BFF, TEXCLASS.ORD, 'mo'], // Rest of above
108+
[0x2C00, 0x2DE0, TEXCLASS.ORD, 'mi'], // Glagolitic (through) Ethipoc Extended
109+
[0x2E00, 0x2E7F, TEXCLASS.ORD, 'mo'], // Supplemental Punctuation
110+
[0x2E80, 0x2FDF, TEXCLASS.ORD, 'mi'], // CJK Radicals Supplement (through) Kangxi Radicals
111+
[0x2FF0, 0x303F, TEXCLASS.ORD, 'mo'], // Ideographic Desc. Characters, CJK Symbols and Punctuation
112+
[0x3040, 0xA82F, TEXCLASS.ORD, 'mi'], // Hiragana (through) Syloti Nagri
113+
[0xA830, 0xA83F, TEXCLASS.ORD, 'mn'], // Common Indic Number FormsArabic Presentation Forms-A
114+
[0xA840, 0xD7FF, TEXCLASS.ORD, 'mi'], // Phags-pa (though) Hangul Jamo Extended-B
115+
[0xF900, 0xFDFF, TEXCLASS.ORD, 'mi'], // CJK Compatibility Ideographs (though) Arabic Presentation Forms-A
116+
[0xFE00, 0xFE6F, TEXCLASS.ORD, 'mo'], // Variation Selector (through) Small Form Variants
117+
[0xFE70, 0x100FF, TEXCLASS.ORD, 'mi'], // Arabic Presentation Forms-B (through) Linear B Ideograms
118+
[0x10100, 0x1018F, TEXCLASS.ORD, 'mn'], // Aegean Numbers, Ancient Greek Numbers
119+
[0x10190, 0x123FF, TEXCLASS.ORD, 'mi'], // Ancient Symbols (through) Cuneiform
120+
[0x12400, 0x1247F, TEXCLASS.ORD, 'mn'], // Cuneiform Numbers and Punctuation
121+
[0x12480, 0x1BC9F, TEXCLASS.ORD, 'mi'], // Early Dynastic Cuneiform (through) Duployan
122+
[0x1BCA0, 0x1D25F, TEXCLASS.ORD, 'mo'], // Shorthand Format Controls (through) TaiXuan Jing Symbols
123+
[0x1D360, 0x1D37F, TEXCLASS.ORD, 'mn'], // Counting Rod Numerals
124+
[0x1D400, 0x1D7CD, TEXCLASS.ORD, 'mi'], // Math Alphanumeric Symbols
125+
[0x1D7CE, 0x1D7FF, TEXCLASS.ORD, 'mn'], // Numerals from above
126+
[0x1DF00, 0x1F7FF, TEXCLASS.ORD, 'mo'], // Mahjong Tiles (through) Geometric Shapes Extended
127+
[0x1F800, 0x1F8FF, TEXCLASS.REL, 'mo'], // Supplemental Arrows-C
128+
[0x1F900, 0x1F9FF, TEXCLASS.ORD, 'mo'], // Supplemental Symbols and Pictographs
129+
[0x20000, 0x2FA1F, TEXCLASS.ORD, 'mi'], // CJK Unified Ideographs Ext. B (through) CJK Sompatibility Ideographs Supp.
111130
];
112131

132+
/**
133+
* Get the Unicode range for the first character of a string
134+
*
135+
* @param {string} text The character to check
136+
* @return {RangeDef|null} The range containing that character, or null
137+
*/
138+
export function getRange(text: string): RangeDef | null {
139+
const n = text.codePointAt(0);
140+
for (const range of RANGES) {
141+
if (n <= range[1]) {
142+
if (n >= range[0]) {
143+
return range;
144+
}
145+
break;
146+
}
147+
}
148+
return null;
149+
}
150+
113151
/**
114152
* The default MathML spacing for the various TeX classes.
115153
*/

ts/input/tex/base/BaseConfiguration.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import {CharacterMap} from '../SymbolMap.js';
3131
import * as bitem from './BaseItems.js';
3232
import {AbstractTags} from '../Tags.js';
3333
import './BaseMappings.js';
34-
34+
import {getRange} from '../../../core/MmlTree/OperatorDictionary.js';
3535

3636
/**
3737
* Remapping some ASCII characters to their Unicode operator equivalent.
@@ -53,13 +53,16 @@ export function Other(parser: TexParser, char: string) {
5353
let def = font ?
5454
// @test Other Font
5555
{mathvariant: parser.stack.env['font']} : {};
56-
const remap = (MapHandler.getMap('remap') as CharacterMap).
57-
lookup(char);
56+
const remap = (MapHandler.getMap('remap') as CharacterMap).lookup(char);
57+
const range = getRange(char);
58+
const type = (range ? range[3] : 'mo');
5859
// @test Other
5960
// @test Other Remap
60-
let mo = parser.create('token', 'mo', def, (remap ? remap.char : char));
61-
NodeUtil.setProperty(mo, 'fixStretchy', true);
62-
parser.configuration.addNode('fixStretchy', mo);
61+
let mo = parser.create('token', type, def, (remap ? remap.char : char));
62+
if (type === 'mo') {
63+
NodeUtil.setProperty(mo, 'fixStretchy', true);
64+
parser.configuration.addNode('fixStretchy', mo);
65+
}
6366
parser.Push(mo);
6467
}
6568

0 commit comments

Comments
 (0)