diff --git a/integration_test.py b/integration_test.py index 3ca2fd02..c000b27b 100644 --- a/integration_test.py +++ b/integration_test.py @@ -502,4 +502,17 @@ def test_define(record_property, tmpdir): # #589 assert exitcode == 0 assert stderr == "test.cpp:1: syntax error: failed to expand 'TEST_P', Invalid ## usage when expanding 'TEST_P': Unexpected token ')'\n" - assert stdout == '\n' \ No newline at end of file + assert stdout == '\n' + +def test_utf16_bom(tmpdir): + test_file = os.path.join(tmpdir, "test.cpp") + with open(test_file, 'wb') as f: + f.write(b'\xFF\xFE\x3B\x00') + + args = [test_file] + + exitcode, stdout, stderr = simplecpp(args, cwd=tmpdir) + + assert exitcode == 0 + assert stderr == '' + assert stdout == ';\n' diff --git a/simplecpp.cpp b/simplecpp.cpp index a7ced05a..3faa6894 100644 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -275,8 +275,10 @@ class simplecpp::TokenList::Stream { return ch; } - unsigned char peekChar() { - auto ch = static_cast(peek()); + int peekChar() { + int ch = peek(); + if (ch == EOF) + return ch; // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff @@ -285,7 +287,7 @@ class simplecpp::TokenList::Stream { const auto ch2 = static_cast(peek()); unget(); const int ch16 = makeUtf16Char(ch, ch2); - ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); + ch = (ch16 >= 0x80) ? 0xff : ch16; } // Handling of newlines.. @@ -598,7 +600,7 @@ std::string simplecpp::TokenList::stringify(bool linenrs) const return ret.str(); } -static bool isNameChar(unsigned char ch) +static bool isNameChar(int ch) { return std::isalnum(ch) || ch == '_' || ch == '$'; }