Skip to content
15 changes: 14 additions & 1 deletion integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,4 +502,17 @@ def test_define(record_property, tmpdir): # #589

assert exitcode == 0
assert stderr == "test.cpp:1: syntax error: failed to expand 'TEST_P', Invalid ## usage when expanding 'TEST_P': Unexpected token ')'\n"
assert stdout == '\n'
assert stdout == '\n'

def test_utf16_bom(tmpdir):
test_file = os.path.join(tmpdir, "test.cpp")
with open(test_file, 'wb') as f:
f.write(b'\xFF\xFE\x3B\x00')

args = [test_file]

exitcode, stdout, stderr = simplecpp(args, cwd=tmpdir)

assert exitcode == 0
assert stderr == ''
assert stdout == ';\n'
10 changes: 6 additions & 4 deletions simplecpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,10 @@ class simplecpp::TokenList::Stream {
return ch;
}

unsigned char peekChar() {
auto ch = static_cast<unsigned char>(peek());
int peekChar() {
int ch = peek();
if (ch == EOF)
return ch;
Comment thread
chrchr-github marked this conversation as resolved.

// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
// character is non-ASCII character then replace it with 0xff
Expand All @@ -285,7 +287,7 @@ class simplecpp::TokenList::Stream {
const auto ch2 = static_cast<unsigned char>(peek());
unget();
const int ch16 = makeUtf16Char(ch, ch2);
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
ch = (ch16 >= 0x80) ? 0xff : ch16;
}

// Handling of newlines..
Expand Down Expand Up @@ -598,7 +600,7 @@ std::string simplecpp::TokenList::stringify(bool linenrs) const
return ret.str();
}

static bool isNameChar(unsigned char ch)
static bool isNameChar(int ch)
{
return std::isalnum(ch) || ch == '_' || ch == '$';
}
Expand Down
Loading