Skip to content

Commit

Permalink
Special-case ## pasting to string/character constants (issue danmar#168
Browse files Browse the repository at this point in the history
…) (danmar#255)

This enables use of macros to add literals/operator "".
  • Loading branch information
patrickdowling authored Mar 18, 2022
1 parent e4cb748 commit e7d85ea
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 43 deletions.
2 changes: 1 addition & 1 deletion run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def cleanup(out):
'has_attribute.cpp',
'header_lookup1.c', # missing include <stddef.h>
'line-directive-output.c',
'macro_paste_hashhash.c',
# 'macro_paste_hashhash.c',
'microsoft-ext.c',
'normalize-3.c', # gcc has different output \uAC00 vs \U0000AC00 on cygwin/linux
'pr63831-1.c', # __has_attribute => works differently on cygwin/linux
Expand Down
114 changes: 72 additions & 42 deletions simplecpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ static bool isOct(const std::string &s)
return s.size()>1 && (s[0]=='0') && (s[1] >= '0') && (s[1] < '8');
}

static bool isStringLiteral(const std::string &s)
{
return s.size() > 1 && (s[0]=='\"') && (*s.rbegin()=='\"');
}

static bool isCharLiteral(const std::string &s)
{
// char literal patterns can include 'a', '\t', '\000', '\xff', 'abcd', and maybe ''
// This only checks for the surrounding '' but doesn't parse the content.
return s.size() > 1 && (s[0]=='\'') && (*s.rbegin()=='\'');
}

static const simplecpp::TokenString DEFINE("define");
static const simplecpp::TokenString UNDEF("undef");
Expand Down Expand Up @@ -1922,7 +1933,8 @@ namespace simplecpp {
throw invalidHashHash(tok->location, name());

bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>";
if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual)
bool canBeConcatenatedStringOrChar = isStringLiteral(A->str()) || isCharLiteral(A->str());
if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual && !canBeConcatenatedStringOrChar)
throw invalidHashHash(tok->location, name());

Token *B = tok->next->next;
Expand All @@ -1933,55 +1945,73 @@ namespace simplecpp {
(!canBeConcatenatedWithEqual && B->op == '='))
throw invalidHashHash(tok->location, name());

std::string strAB;

const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U];
// Superficial check; more in-depth would in theory be possible _after_ expandArg
if (canBeConcatenatedStringOrChar && (B->number || !B->name))
throw invalidHashHash(tok->location, name());

TokenList tokensB(files);
if (expandArg(&tokensB, B, parametertokens)) {
if (tokensB.empty())
strAB = A->str();
else if (varargs && A->op == ',') {
strAB = ",";
const Token *nextTok = B->next;

if (canBeConcatenatedStringOrChar) {
// It seems clearer to handle this case separately even though the code is similar-ish, but we don't want to merge here.
// TODO The question is whether the ## or varargs may still apply, and how to provoke?
if (expandArg(&tokensB, B, parametertokens)) {
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
} else {
strAB = A->str() + tokensB.cfront()->str();
tokensB.deleteToken(tokensB.front());
tokensB.push_back(new Token(*B));
tokensB.back()->location = loc;
}
} else {
strAB = A->str() + B->str();
}

const Token *nextTok = B->next;
if (varargs && tokensB.empty() && tok->previous->str() == ",")
output->deleteToken(A);
else if (strAB != "," && macros.find(strAB) == macros.end()) {
A->setstr(strAB);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
} else if (nextTok->op == '#' && nextTok->next->op == '#') {
TokenList output2(files);
output2.push_back(new Token(strAB, tok->location));
nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens);
output->deleteToken(A);
output->takeTokens(output2);
} else {
output->deleteToken(A);
TokenList tokens(files);
tokens.push_back(new Token(strAB, tok->location));
// for function like macros, push the (...)
if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') {
const MacroMap::const_iterator it = macros.find(strAB);
if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) {
const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens);
if (tok2)
nextTok = tok2->next;
std::string strAB;

const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U];

if (expandArg(&tokensB, B, parametertokens)) {
if (tokensB.empty())
strAB = A->str();
else if (varargs && A->op == ',') {
strAB = ",";
} else {
strAB = A->str() + tokensB.cfront()->str();
tokensB.deleteToken(tokensB.front());
}
} else {
strAB = A->str() + B->str();
}

if (varargs && tokensB.empty() && tok->previous->str() == ",")
output->deleteToken(A);
else if (strAB != "," && macros.find(strAB) == macros.end()) {
A->setstr(strAB);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
} else if (nextTok->op == '#' && nextTok->next->op == '#') {
TokenList output2(files);
output2.push_back(new Token(strAB, tok->location));
nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens);
output->deleteToken(A);
output->takeTokens(output2);
} else {
output->deleteToken(A);
TokenList tokens(files);
tokens.push_back(new Token(strAB, tok->location));
// for function like macros, push the (...)
if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') {
const MacroMap::const_iterator it = macros.find(strAB);
if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) {
const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens);
if (tok2)
nextTok = tok2->next;
}
}
expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
}
expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
}

return nextTok;
Expand Down
120 changes: 120 additions & 0 deletions test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,105 @@ static void hashhash13()
ASSERT_EQUALS("\n& ab", preprocess(code2));
}

static void hashhash_string_literal()
{
const char code[] =
"#define UL(x) x##_ul\n"
"\"ABC\"_ul;\n"
"UL(\"ABC\");";

ASSERT_EQUALS("\n\"ABC\" _ul ;\n\"ABC\" _ul ;", preprocess(code));
}

static void hashhash_string_wrapped()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"#define STR(x) CONCAT(x,s)\n"
"STR(\"ABC\");";

ASSERT_EQUALS("\n\n\"ABC\" s ;", preprocess(code));
}

static void hashhash_char_literal()
{
const char code[] =
"#define CH(x) x##_ch\n"
"CH('a');";

ASSERT_EQUALS("\n'a' _ch ;", preprocess(code));
}

static void hashhash_multichar_literal()
{
const char code[] =
"#define CH(x) x##_ch\n"
"CH('abcd');";

ASSERT_EQUALS("\n'abcd' _ch ;", preprocess(code));
}

static void hashhash_char_escaped()
{
const char code[] =
"#define CH(x) x##_ch\n"
"CH('\\'');";

ASSERT_EQUALS("\n'\\'' _ch ;", preprocess(code));
}

static void hashhash_string_nothing()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"CONCAT(\"ABC\",);";

ASSERT_EQUALS("\n\"ABC\" ;", preprocess(code));
}

static void hashhash_string_char()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"CONCAT(\"ABC\", 'c');";

// This works, but maybe shouldn't since the result isn't useful.
ASSERT_EQUALS("\n\"ABC\" 'c' ;", preprocess(code));
}

static void hashhash_string_name()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"#define LIT _literal\n"
"CONCAT(\"string\", LIT);";

// TODO is this correct? clang fails because that's not really a valid thing but gcc seems to accept it
// see https://gist.github.com/patrickdowling/877a25294f069bf059f3b07f9b5b7039

ASSERT_EQUALS("\n\n\"string\" LIT ;", preprocess(code));
}

static void hashhashhash_int_literal()
{
const char code[] =
"#define CONCAT(a,b,c) a##b##c\n"
"#define PASTER(a,b,c) CONCAT(a,b,c)\n"
"PASTER(\"123\",_i,ul);";

ASSERT_EQUALS("\n\n\"123\" _iul ;", preprocess(code));
}

static void hashhash_int_literal()
{
const char code[] =
"#define PASTE(a,b) a##b\n"
"PASTE(123,_i);\n"
"1234_i;\n";

ASSERT_EQUALS("\n123_i ;\n1234_i ;", preprocess(code));
}

static void hashhash_invalid_1()
{
const char code[] = "#define f(a) (##x)\nf(1)";
Expand All @@ -1087,6 +1186,16 @@ static void hashhash_invalid_2()
ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f'.\n", toString(outputList));
}

static void hashhash_invalid_3()
{
const char code[] =
"#define BAD(x) x##12345\nBAD(\"ABC\")";

simplecpp::OutputList outputList;
preprocess(code, simplecpp::DUI(), &outputList);
ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'BAD', Invalid ## usage when expanding 'BAD'.\n", toString(outputList));
}

static void has_include_1()
{
const char code[] = "#ifdef __has_include\n"
Expand Down Expand Up @@ -2306,8 +2415,19 @@ int main(int argc, char **argv)
TEST_CASE(hashhash11); // #60: #define x # # #
TEST_CASE(hashhash12);
TEST_CASE(hashhash13);
TEST_CASE(hashhash_string_literal);
TEST_CASE(hashhash_string_wrapped);
TEST_CASE(hashhash_char_literal);
TEST_CASE(hashhash_multichar_literal);
TEST_CASE(hashhash_char_escaped);
TEST_CASE(hashhash_string_nothing);
TEST_CASE(hashhash_string_char);
TEST_CASE(hashhash_string_name);
TEST_CASE(hashhashhash_int_literal);
TEST_CASE(hashhash_int_literal);
TEST_CASE(hashhash_invalid_1);
TEST_CASE(hashhash_invalid_2);
TEST_CASE(hashhash_invalid_3);

// c++17 __has_include
TEST_CASE(has_include_1);
Expand Down

0 comments on commit e7d85ea

Please sign in to comment.