Skip to content

Commit 1f50f00

Browse files
mmaterarocky
authored andcommitted
wip: implementing character enconding in boxes_to_text and ToString
1 parent 93cc41c commit 1f50f00

3 files changed

Lines changed: 29 additions & 7 deletions

File tree

mathics/builtin/strings.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from mathics.core.expression import (Expression, Symbol, SymbolFailed, SymbolFalse, SymbolTrue, String, Integer,
1717
from_python, string_list)
1818
from mathics.builtin.lists import python_seq, convert_seq
19+
from mathics_scanner.characters import replace_wl_with_plain_text
20+
from mathics.settings import SYSTEM_CHARACTER_ENCODING
1921

2022

2123
_regex_longest = {
@@ -254,6 +256,20 @@ def unpack_bytes(codes):
254256
return unpack('B' * len(codes), codes)
255257

256258

259+
class SystemCharacterEncoding(Predefined):
260+
"""
261+
<dl>
262+
<dt>$SystemCharacterEncoding
263+
264+
</dl>
265+
"""
266+
name = "$SystemCharacterEncoding"
267+
268+
rules = {
269+
'$CharacterEncoding': SYSTEM_CHARACTER_ENCODING,
270+
}
271+
272+
257273
class CharacterEncoding(Predefined):
258274
"""
259275
<dl>
@@ -1576,19 +1592,19 @@ class ToString(Builtin):
15761592
= U2
15771593
"""
15781594

1579-
options = {'CharacterEncoding' : '"Unicode"',
1595+
options = { 'CharacterEncoding' : '"Unicode"',
15801596
'FormatType' : 'OutputForm',
15811597
'NumberMarks': '$NumberMarks',
15821598
'PageHeight' : 'Infinity',
15831599
'PageWidth' : 'Infinity',
15841600
'TotalHeight' : 'Infinity',
15851601
'TotalWidth' : 'Infinity'}
15861602

1587-
def apply(self, value, evaluation):
1588-
'ToString[value_]'
1589-
1603+
def apply(self, value, evaluation, **options):
1604+
'ToString[value_, OptionsPattern[ToString]]'
1605+
encoding = options['options']["System`CharacterEncoding"]
15901606
text = value.format(evaluation, 'System`OutputForm').boxes_to_text(
1591-
evaluation=evaluation)
1607+
evaluation=evaluation, encoding=encoding)
15921608
return String(text)
15931609

15941610

mathics/core/expression.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from mathics.core.convert import sympy_symbol_prefix, SympyExpression
1818
import base64
1919

20+
from mathics_scanner.characters import replace_wl_with_plain_text
2021

2122
def fully_qualified_symbol_name(name) -> bool:
2223
return (
@@ -1904,7 +1905,9 @@ def do_copy(self) -> "Symbol":
19041905
return Symbol(self.name)
19051906

19061907
def boxes_to_text(self, **options) -> str:
1907-
return str(self.name)
1908+
if options.get("encoding", "UTF-8") in ("UTF-8", "Unicode"):
1909+
return str(self.name)
1910+
return replace_wl_with_plain_text(str(self.name), False)
19081911

19091912
def atom_to_boxes(self, f, evaluation) -> "String":
19101913
return String(evaluation.definitions.shorten_name(self.name))
@@ -2723,7 +2726,9 @@ def boxes_to_text(self, show_string_characters=False, **options) -> str:
27232726
):
27242727
value = value[1:-1]
27252728

2726-
return value
2729+
if options.get("encoding", "UTF-8") in ("UTF-8", "Unicode"):
2730+
return value
2731+
return replace_wl_with_plain_text(value, False)
27272732

27282733
def boxes_to_xml(self, show_string_characters=False, **options) -> str:
27292734
from mathics.core.parser import is_symbol_name

mathics/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,4 @@
7272
# whatever it is that setting this thing did.
7373
default_pymathics_modules = []
7474

75+
SYSTEM_CHARACTER_ENCODING = "UTF-8" if sys.getdefaultencoding()=='utf-8' else 'ASCII'

0 commit comments

Comments
 (0)