From 14a44eb4b9cb5cb1611d4b3ffedb1152ebbf95f0 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:18:00 +0100 Subject: [PATCH 01/10] Fix hang on UTF- 16 LE BOM file --- simplecpp.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 7afc17ab..b63bd0fd 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -282,9 +282,10 @@ class simplecpp::TokenList::Stream { // character is non-ASCII character then replace it with 0xff if (isUtf16) { (void)get(); - const auto ch2 = static_cast(peek()); - unget(); - const int ch16 = makeUtf16Char(ch, ch2); + const int ch2 = peek(); + if (ch2 != EOF) + unget(); + const int ch16 = makeUtf16Char(ch, static_cast(ch2)); ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); } From 4cc9e60ff86db4416bbaf1c101808ff3a81a5041 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:19:02 +0100 Subject: [PATCH 02/10] Update simplecpp.cpp --- simplecpp.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index b63bd0fd..4baa3728 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -276,16 +276,18 @@ class simplecpp::TokenList::Stream { } unsigned char peekChar() { - auto ch = static_cast(peek()); + const int pk = peek(); + auto ch = static_cast(pk); + if (pk == EOF) + return ch; // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff if (isUtf16) { (void)get(); - const int ch2 = peek(); - if (ch2 != EOF) - unget(); - const int ch16 = makeUtf16Char(ch, static_cast(ch2)); + const int ch2 = static_cast(peek()); + unget(); + const int ch16 = makeUtf16Char(ch, ch2); ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); } From b9287529cfaece6702b4eedbfe83f46f6c0c6fe6 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:26:49 +0100 Subject: [PATCH 03/10] Update simplecpp.cpp --- simplecpp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 4baa3728..55264794 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -285,7 +285,7 @@ class simplecpp::TokenList::Stream { // character is non-ASCII character then replace it with 0xff if (isUtf16) { (void)get(); - const int ch2 = static_cast(peek()); + const auto ch2 = static_cast(peek()); unget(); const int ch16 = makeUtf16Char(ch, ch2); ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); From bcb50eca8f66d75f33be75869a84d8a9fc2aaa85 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Wed, 13 May 2026 14:40:23 +0200 Subject: [PATCH 04/10] Update simplecpp.cpp [skip ci] --- simplecpp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 55264794..66fe9ebc 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -278,8 +278,8 @@ class simplecpp::TokenList::Stream { unsigned char peekChar() { const int pk = peek(); auto ch = static_cast(pk); - if (pk == EOF) - return ch; + //if (pk == EOF) + //return ch; // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff From 9f371acbabbcde2bdf42072ff196bcc17e5bc6f1 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Wed, 13 May 2026 14:49:12 +0200 Subject: [PATCH 05/10] Update integration_test.py --- integration_test.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/integration_test.py b/integration_test.py index 3ca2fd02..b7571ec0 100644 --- a/integration_test.py +++ b/integration_test.py @@ -502,4 +502,17 @@ def test_define(record_property, tmpdir): # #589 assert exitcode == 0 assert stderr == "test.cpp:1: syntax error: failed to expand 'TEST_P', Invalid ## usage when expanding 'TEST_P': Unexpected token ')'\n" - assert stdout == '\n' \ No newline at end of file + assert stdout == '\n' + +def test_utf16_bom(tmpdir): + test_file = os.path.join(tmpdir, "test.cpp") + with open(test_file, 'w') as f: + f.write(b'\xFF\xFE\x00\x3B') + + args = [test_file] + + exitcode, stdout, stderr = simplecpp(args, cwd=tmpdir) + + assert exitcode == 0 + assert stderr == '' + assert stdout == '\n' From 47f267cbb5e285f87b3136d7259a6375cebb92cf Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Wed, 13 May 2026 14:53:25 +0200 Subject: [PATCH 06/10] Update integration_test.py --- integration_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_test.py b/integration_test.py index b7571ec0..e1121014 100644 --- a/integration_test.py +++ b/integration_test.py @@ -507,7 +507,7 @@ def test_define(record_property, tmpdir): # #589 def test_utf16_bom(tmpdir): test_file = os.path.join(tmpdir, "test.cpp") with open(test_file, 'w') as f: - f.write(b'\xFF\xFE\x00\x3B') + f.write("\xFF\xFE\x00\x3B") args = [test_file] From ed1fef48c3c0a9b16f82b4351f12bac0bcd9cefc Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Wed, 13 May 2026 16:29:55 +0200 Subject: [PATCH 07/10] Update integration_test.py --- integration_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_test.py b/integration_test.py index e1121014..72cb0322 100644 --- a/integration_test.py +++ b/integration_test.py @@ -507,7 +507,7 @@ def test_define(record_property, tmpdir): # #589 def test_utf16_bom(tmpdir): test_file = os.path.join(tmpdir, "test.cpp") with open(test_file, 'w') as f: - f.write("\xFF\xFE\x00\x3B") + f.write("\xFF\xFE\x3B\x00") args = [test_file] From ebe842fe0fc45f7d2073b4f70142d0b42363f638 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Wed, 13 May 2026 16:49:51 +0200 Subject: [PATCH 08/10] Update simplecpp.cpp --- simplecpp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 66fe9ebc..55264794 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -278,8 +278,8 @@ class simplecpp::TokenList::Stream { unsigned char peekChar() { const int pk = peek(); auto ch = static_cast(pk); - //if (pk == EOF) - //return ch; + if (pk == EOF) + return ch; // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff From b004fad49ef1474a64a5e8e40a0f67b56add10c0 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Wed, 13 May 2026 16:54:54 +0200 Subject: [PATCH 09/10] Update integration_test.py --- integration_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_test.py b/integration_test.py index 72cb0322..55cce19c 100644 --- a/integration_test.py +++ b/integration_test.py @@ -515,4 +515,4 @@ def test_utf16_bom(tmpdir): assert exitcode == 0 assert stderr == '' - assert stdout == '\n' + assert stdout == ';\n' From 202f26529ba59ddec10ad851c9ee20b6382de003 Mon Sep 17 00:00:00 2001 From: chrchr-github <78114321+chrchr-github@users.noreply.github.com> Date: Wed, 13 May 2026 20:12:35 +0200 Subject: [PATCH 10/10] Update integration_test.py --- integration_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_test.py b/integration_test.py index 55cce19c..c000b27b 100644 --- a/integration_test.py +++ b/integration_test.py @@ -506,8 +506,8 @@ def test_define(record_property, tmpdir): # #589 def test_utf16_bom(tmpdir): test_file = os.path.join(tmpdir, "test.cpp") - with open(test_file, 'w') as f: - f.write("\xFF\xFE\x3B\x00") + with open(test_file, 'wb') as f: + f.write(b'\xFF\xFE\x3B\x00') args = [test_file]