From 7a139a800e5ee81a3c2ec3c7b0e7c2ced4be10f6 Mon Sep 17 00:00:00 2001 From: acqn Date: Sat, 6 Aug 2022 18:45:49 +0800 Subject: [PATCH 1/5] Added # and ## as punctuators recognized by the tokenizer. Reorganized the token enum. --- src/cc65/scanner.c | 9 +++ src/cc65/scanner.h | 143 ++++++++++++++++++++++++++------------------- 2 files changed, 91 insertions(+), 61 deletions(-) diff --git a/src/cc65/scanner.c b/src/cc65/scanner.c index c7e9bb6c2..9f0498c26 100644 --- a/src/cc65/scanner.c +++ b/src/cc65/scanner.c @@ -1012,6 +1012,15 @@ void NextToken (void) SetTok (TOK_COMP); break; + case '#': + NextChar (); + if (CurC == '#') { + SetTok (TOK_DOUBLE_HASH); + } else { + NextTok.Tok = TOK_HASH; + } + break; + default: UnknownChar (CurC); diff --git a/src/cc65/scanner.h b/src/cc65/scanner.h index cd34cbbe8..2ed59771c 100644 --- a/src/cc65/scanner.h +++ b/src/cc65/scanner.h @@ -79,6 +79,10 @@ typedef enum token_t { TOK_FASTCALL, TOK_CDECL, + /* Address sizes */ + TOK_FAR, + TOK_NEAR, + /* Tokens denoting types */ TOK_FIRST_TYPE, TOK_ENUM = TOK_FIRST_TYPE, @@ -95,94 +99,100 @@ typedef enum token_t { TOK_VOID, TOK_LAST_TYPE = TOK_VOID, - /* Control statements */ + /* Selection statements */ + TOK_IF, + TOK_ELSE, + TOK_SWITCH, + + /* Iteration statements */ + TOK_WHILE, TOK_DO, TOK_FOR, - TOK_GOTO, - TOK_IF, - TOK_RETURN, - TOK_SWITCH, - TOK_WHILE, - TOK_ASM, + /* Jump statements */ + TOK_GOTO, + TOK_CONTINUE, + TOK_BREAK, + TOK_RETURN, + + /* Labels */ TOK_CASE, TOK_DEFAULT, - TOK_BREAK, - TOK_CONTINUE, - TOK_ELSE, - TOK_ELLIPSIS, + + /* Misc. */ + TOK_ATTRIBUTE, + TOK_PRAGMA, + TOK_STATIC_ASSERT, + TOK_ASM, TOK_SIZEOF, - TOK_IDENT, - TOK_SEMI, - - /* Primary operators */ - TOK_LBRACK, + /* Punctuators */ + TOK_FIRST_PUNC, + TOK_LBRACK = TOK_FIRST_PUNC, + TOK_RBRACK, TOK_LPAREN, + TOK_RPAREN, + TOK_LCURLY, + TOK_RCURLY, TOK_DOT, TOK_PTR_REF, - - TOK_LCURLY, - TOK_RBRACK, - TOK_COMP, TOK_INC, - TOK_PLUS_ASSIGN, - TOK_PLUS, - TOK_COMMA, TOK_DEC, - TOK_MINUS_ASSIGN, - TOK_RCURLY, - TOK_MINUS, - TOK_MUL_ASSIGN, + TOK_ADDR, + TOK_AND = TOK_ADDR, /* Alias */ TOK_STAR, TOK_MUL = TOK_STAR, /* Alias */ - TOK_DIV_ASSIGN, - TOK_DIV, - TOK_BOOL_AND, - TOK_AND_ASSIGN, - TOK_AND, - TOK_NE, + TOK_PLUS, + TOK_MINUS, + TOK_COMP, TOK_BOOL_NOT, - TOK_BOOL_OR, - TOK_OR_ASSIGN, - TOK_OR, - TOK_EQ, - TOK_ASSIGN, - - /* Inequalities */ - TOK_LE, - TOK_LT, - TOK_GE, - TOK_GT, - - TOK_SHL_ASSIGN, - TOK_SHL, - TOK_SHR_ASSIGN, - TOK_SHR, - TOK_XOR_ASSIGN, - TOK_XOR, - TOK_MOD_ASSIGN, + TOK_DIV, TOK_MOD, + TOK_SHL, + TOK_SHR, + TOK_LT, + TOK_GT, + TOK_LE, + TOK_GE, + TOK_EQ, + TOK_NE, + TOK_XOR, + TOK_OR, + TOK_BOOL_AND, + TOK_BOOL_OR, TOK_QUEST, TOK_COLON, - TOK_RPAREN, + TOK_SEMI, + TOK_ELLIPSIS, + TOK_ASSIGN, + TOK_MUL_ASSIGN, + TOK_DIV_ASSIGN, + TOK_MOD_ASSIGN, + TOK_PLUS_ASSIGN, + TOK_MINUS_ASSIGN, + TOK_SHL_ASSIGN, + TOK_SHR_ASSIGN, + TOK_AND_ASSIGN, + TOK_XOR_ASSIGN, + TOK_OR_ASSIGN, + TOK_COMMA, + TOK_HASH, + TOK_HASH_HASH, + TOK_DOUBLE_HASH = TOK_HASH_HASH, /* Alias */ + TOK_LAST_PUNC = TOK_DOUBLE_HASH, + + /* Primary expressions */ TOK_SCONST, TOK_ICONST, TOK_CCONST, TOK_FCONST, TOK_WCSCONST, - - TOK_ATTRIBUTE, - TOK_STATIC_ASSERT, - TOK_FAR, - TOK_NEAR, + TOK_IDENT, TOK_A, TOK_X, TOK_Y, TOK_AX, - TOK_EAX, - - TOK_PRAGMA + TOK_EAX } token_t; @@ -220,6 +230,17 @@ extern int NextLineDisabled; /* Disabled to read next line */ +#if defined(HAVE_INLINE) +INLINE int TokIsPunc (const Token* T) +/* Return true if the token is a punctuator */ +{ + return (T->Tok >= TOK_FIRST_PUNC && T->Tok <= TOK_LAST_PUNC); +} +#else +# define TokIsPunc(T) \ + ((T)->Tok >= TOK_FIRST_PUNC && (T)->Tok <= TOK_LAST_PUNC) +#endif + #if defined(HAVE_INLINE) INLINE int TokIsStorageClass (const Token* T) /* Return true if the token is a storage class specifier */ From 4bb4f033ea9774ae04c8fdaf42c34f8b572859db Mon Sep 17 00:00:00 2001 From: acqn Date: Sun, 7 Aug 2022 16:14:45 +0800 Subject: [PATCH 2/5] Fixed the bug that C keywords were not simply recognized as identifiers in preprocessing. --- src/cc65/ppexpr.c | 10 ++++++++-- src/cc65/scanner.c | 19 ++++++++++++------- src/cc65/scanner.h | 2 +- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/cc65/ppexpr.c b/src/cc65/ppexpr.c index af2c1de3b..788fb27d5 100644 --- a/src/cc65/ppexpr.c +++ b/src/cc65/ppexpr.c @@ -215,6 +215,12 @@ static void PPhie11 (PPExpr* Expr) } } + + /* Check for excessive expressions */ + if (!TokIsPunc (&CurTok)) { + PPError ("Missing binary operator"); + PPErrorSkipLine (); + } } @@ -854,7 +860,7 @@ void ParsePPExprInLine (PPExpr* Expr) /* Initialize the parser status */ PPEvaluationFailed = 0; PPEvaluationEnabled = 1; - NextLineDisabled = 1; + PPParserRunning = 1; /* Parse */ PPExprInit (Expr); @@ -867,5 +873,5 @@ void ParsePPExprInLine (PPExpr* Expr) } /* Restore parser status */ - NextLineDisabled = 0; + PPParserRunning = 0; } diff --git a/src/cc65/scanner.c b/src/cc65/scanner.c index 9f0498c26..af3ddaab5 100644 --- a/src/cc65/scanner.c +++ b/src/cc65/scanner.c @@ -69,7 +69,7 @@ Token CurTok; /* The current token */ Token NextTok; /* The next token */ -int NextLineDisabled; /* Disabled to read next line */ +int PPParserRunning; /* Is tokenizer used by the preprocessor */ @@ -189,8 +189,10 @@ static int SkipWhite (void) { while (1) { while (CurC == '\0') { - /* If reading next line fails or is forbidden, bail out */ - if (NextLineDisabled || PreprocessNextLine () == 0) { + /* If reading next line fails or is disabled with directives, bail + ** out. + */ + if (PPParserRunning || PreprocessNextLine () == 0) { return 0; } } @@ -759,11 +761,14 @@ void NextToken (void) /* Check for keywords and identifiers */ if (IsSym (token)) { - /* Check for a keyword */ - if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) { - /* Reserved word found */ - return; + if (!PPParserRunning) { + /* Check for a keyword */ + if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) { + /* Reserved word found */ + return; + } } + /* No reserved word, check for special symbols */ if (token[0] == '_' && token[1] == '_') { /* Special symbols */ diff --git a/src/cc65/scanner.h b/src/cc65/scanner.h index 2ed59771c..194f27b7c 100644 --- a/src/cc65/scanner.h +++ b/src/cc65/scanner.h @@ -220,7 +220,7 @@ struct Token { extern Token CurTok; /* The current token */ extern Token NextTok; /* The next token */ -extern int NextLineDisabled; /* Disabled to read next line */ +extern int PPParserRunning; /* Is tokenizer used by the preprocessor */ From 5cca1e8b1d8b6ac6adfd90de93326ee179863d5b Mon Sep 17 00:00:00 2001 From: acqn Date: Thu, 11 Aug 2022 10:55:16 +0800 Subject: [PATCH 3/5] Fixed parsing numeric constants. --- src/cc65/scanner.c | 178 +++++++++++++++++++++++++++++---------------- src/cc65/scanner.h | 8 ++ 2 files changed, 123 insertions(+), 63 deletions(-) diff --git a/src/cc65/scanner.c b/src/cc65/scanner.c index af3ddaab5..11d356281 100644 --- a/src/cc65/scanner.c +++ b/src/cc65/scanner.c @@ -248,6 +248,45 @@ int IsSym (char* S) +int IsPPNumber (int Cur, int Next) +/* Return 1 if the two successive characters indicate a pp-number, otherwise +** return 0. +*/ +{ + return Cur != '.' ? IsDigit (Cur) : IsDigit (Next); +} + + + +void CopyPPNumber (StrBuf* Target) +/* Copy a pp-number from the input to Target */ +{ + int Std; + + if (!IsPPNumber (CurC, NextC)) { + return; + } + + /* P-exp is only valid in C99 and later */ + Std = IS_Get (&Standard); + while (IsIdent (CurC) || IsDigit (CurC) || CurC == '.') { + SB_AppendChar (Target, CurC); + if (NextC == '+' || NextC == '-') { + if (CurC == 'e' || CurC == 'E' || + (Std >= STD_C99 && (CurC == 'p' || CurC == 'P'))) { + SB_AppendChar (Target, NextC); + NextChar (); + } else { + NextChar (); + break; + } + } + NextChar (); + } +} + + + static void UnknownChar (char C) /* Error message for unknown character */ { @@ -460,76 +499,77 @@ static void StringConst (void) static void NumericConst (void) /* Parse a numeric constant */ { - unsigned Base; /* Temporary number base */ - unsigned Prefix; /* Base according to prefix */ - StrBuf S = STATIC_STRBUF_INITIALIZER; + unsigned Base; /* Temporary number base according to prefix */ + unsigned Index; + StrBuf Src = AUTO_STRBUF_INITIALIZER; int IsFloat; char C; unsigned DigitVal; unsigned long IVal; /* Value */ + /* Get the pp-number first, then parse on it */ + CopyPPNumber (&Src); + SB_Terminate (&Src); + SB_Reset (&Src); + /* Check for a leading hex, octal or binary prefix and determine the ** possible integer types. */ - if (CurC == '0') { + if (SB_Peek (&Src) == '0') { /* Gobble 0 and examine next char */ - NextChar (); - if (toupper (CurC) == 'X') { - Base = Prefix = 16; - NextChar (); /* gobble "x" */ - } else if (toupper (CurC) == 'B' && IS_Get (&Standard) >= STD_CC65) { - Base = Prefix = 2; - NextChar (); /* gobble 'b' */ + SB_Skip (&Src); + if (toupper (SB_Peek (&Src)) == 'X' && + IsXDigit (SB_LookAt (&Src, SB_GetIndex (&Src) + 1))) { + Base = 16; + SB_Skip (&Src); /* gobble "x" */ + } else if (toupper (SB_Peek (&Src)) == 'B' && + IS_Get (&Standard) >= STD_CC65 && + IsDigit (SB_LookAt (&Src, SB_GetIndex (&Src) + 1))) { + Base = 2; + SB_Skip (&Src); /* gobble 'b' */ } else { Base = 10; /* Assume 10 for now - see below */ - Prefix = 8; /* Actual prefix says octal */ } } else { - Base = Prefix = 10; + Base = 10; } - /* Because floating point numbers don't have octal prefixes (a number - ** with a leading zero is decimal), we first have to read the number - ** before converting it, so we can determine if it's a float or an - ** integer. + /* Because floating point numbers don't have octal prefixes (a number with + ** a leading zero is decimal), we first have to read the number before + ** converting it, so we can determine if it's a float or an integer. */ - while (IsXDigit (CurC) && HexVal (CurC) < Base) { - SB_AppendChar (&S, CurC); - NextChar (); + Index = SB_GetIndex (&Src); + while ((C = SB_Peek (&Src)) != '\0' && (Base <= 10 ? IsDigit (C) : IsXDigit (C))) { + SB_Skip (&Src); } - SB_Terminate (&S); /* The following character tells us if we have an integer or floating ** point constant. Note: Hexadecimal floating point constants aren't ** supported in C89. */ - IsFloat = (CurC == '.' || - (Base == 10 && toupper (CurC) == 'E') || - (Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99)); + IsFloat = (C == '.' || + (Base == 10 && toupper (C) == 'E') || + (Base == 16 && toupper (C) == 'P' && IS_Get (&Standard) >= STD_C99)); - /* If we don't have a floating point type, an octal prefix results in an - ** octal base. - */ - if (!IsFloat && Prefix == 8) { + /* An octal prefix for an integer type results in an octal base */ + if (!IsFloat && Base == 10 && SB_LookAt (&Src, 0) == '0') { Base = 8; } - /* Since we do now know the correct base, convert the remembered input - ** into a number. - */ - SB_Reset (&S); + /* Since we now know the correct base, convert the input into a number */ + SB_SetIndex (&Src, Index); IVal = 0; - while ((C = SB_Get (&S)) != '\0') { + while ((C = SB_Peek (&Src)) != '\0' && (Base <= 10 ? IsDigit (C) : IsXDigit (C))) { DigitVal = HexVal (C); if (DigitVal >= Base) { - Error ("Numeric constant contains digits beyond the radix"); + Error ("Invalid digit \"%c\" beyond radix %u constant", C, Base); + SB_Clear (&Src); + break; } IVal = (IVal * Base) + DigitVal; + SB_Skip (&Src); } - /* We don't need the string buffer any longer */ - SB_Done (&S); - /* Distinguish between integer and floating point constants */ if (!IsFloat) { @@ -540,27 +580,32 @@ static void NumericConst (void) ** possible to convert the data to unsigned long even if the IT_ULONG ** flag were not set, but we are not doing that. */ - if (toupper (CurC) == 'U') { + if (toupper (SB_Peek (&Src)) == 'U') { /* Unsigned type */ - NextChar (); - if (toupper (CurC) != 'L') { + SB_Skip (&Src); + if (toupper (SB_Peek (&Src)) != 'L') { Types = IT_UINT | IT_ULONG; } else { - NextChar (); + SB_Skip (&Src); Types = IT_ULONG; } - } else if (toupper (CurC) == 'L') { + } else if (toupper (SB_Peek (&Src)) == 'L') { /* Long type */ - NextChar (); - if (toupper (CurC) != 'U') { + SB_Skip (&Src); + if (toupper (SB_Peek (&Src)) != 'U') { Types = IT_LONG | IT_ULONG; WarnTypes = IT_ULONG; } else { - NextChar (); + SB_Skip (&Src); Types = IT_ULONG; } } else { - if (Prefix == 10) { + if (SB_Peek (&Src) != '\0') { + Error ("Invalid suffix \"%s\" on integer constant", + SB_GetConstBuf (&Src) + SB_GetIndex (&Src)); + } + + if (Base == 10) { /* Decimal constants are of any type but uint */ Types = IT_INT | IT_LONG | IT_ULONG; WarnTypes = IT_LONG | IT_ULONG; @@ -624,16 +669,16 @@ static void NumericConst (void) Double FVal = FP_D_FromInt (IVal); /* Convert to double */ /* Check for a fractional part and read it */ - if (CurC == '.') { + if (SB_Peek (&Src) == '.') { Double Scale; /* Skip the dot */ - NextChar (); + SB_Skip (&Src); /* Read fractional digits */ Scale = FP_D_Make (1.0); - while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) { + while (IsXDigit (SB_Peek (&Src)) && (DigitVal = HexVal (SB_Peek (&Src))) < Base) { /* Get the value of this digit */ Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale); /* Add it to the float value */ @@ -641,25 +686,25 @@ static void NumericConst (void) /* Scale base */ Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal)); /* Skip the digit */ - NextChar (); + SB_Skip (&Src); } } /* Check for an exponent and read it */ - if ((Base == 16 && toupper (CurC) == 'F') || - (Base == 10 && toupper (CurC) == 'E')) { + if ((Base == 16 && toupper (SB_Peek (&Src)) == 'P') || + (Base == 10 && toupper (SB_Peek (&Src)) == 'E')) { unsigned Digits; unsigned Exp; /* Skip the exponent notifier */ - NextChar (); + SB_Skip (&Src); /* Read an optional sign */ - if (CurC == '-') { - NextChar (); - } else if (CurC == '+') { - NextChar (); + if (SB_Peek (&Src) == '-') { + SB_Skip (&Src); + } else if (SB_Peek (&Src) == '+') { + SB_Skip (&Src); } /* Read exponent digits. Since we support only 32 bit floats @@ -670,11 +715,11 @@ static void NumericConst (void) */ Digits = 0; Exp = 0; - while (IsDigit (CurC)) { + while (IsDigit (SB_Peek (&Src))) { if (++Digits <= 3) { - Exp = Exp * 10 + HexVal (CurC); + Exp = Exp * 10 + HexVal (SB_Peek (&Src)); } - NextChar (); + SB_Skip (&Src); } /* Check for errors: We must have exponent digits, and not more @@ -693,10 +738,14 @@ static void NumericConst (void) } /* Check for a suffix and determine the type of the constant */ - if (toupper (CurC) == 'F') { - NextChar (); + if (toupper (SB_Peek (&Src)) == 'F') { + SB_Skip (&Src); NextTok.Type = type_float; } else { + if (SB_Peek (&Src) != '\0') { + Error ("Invalid suffix \"%s\" on floating constant", + SB_GetConstBuf (&Src) + SB_GetIndex (&Src)); + } NextTok.Type = type_double; } @@ -705,6 +754,9 @@ static void NumericConst (void) NextTok.Tok = TOK_FCONST; } + + /* We don't need the string buffer any longer */ + SB_Done (&Src); } @@ -746,7 +798,7 @@ void NextToken (void) } /* Determine the next token from the lookahead */ - if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) { + if (IsPPNumber (CurC, NextC)) { /* A number */ NumericConst (); return; diff --git a/src/cc65/scanner.h b/src/cc65/scanner.h index 194f27b7c..c47d660d5 100644 --- a/src/cc65/scanner.h +++ b/src/cc65/scanner.h @@ -284,6 +284,14 @@ void SymName (char* S); int IsSym (char* S); /* If a symbol follows, read it and return 1, otherwise return 0 */ +int IsPPNumber (int Cur, int Next); +/* Return 1 if the two successive characters indicate a pp-number, otherwise +** return 0. +*/ + +void CopyPPNumber (StrBuf* Target); +/* Copy a pp-number from the input to Target */ + void NextToken (void); /* Get next token from input stream */ From 624e5025b03aa9f7a8949a4fe3fca35e66952cca Mon Sep 17 00:00:00 2001 From: acqn Date: Wed, 17 Aug 2022 22:28:00 +0800 Subject: [PATCH 4/5] Fixed parsing wide char constants. --- src/cc65/expr.c | 1 + src/cc65/ppexpr.c | 1 + src/cc65/scanner.c | 25 ++++++++++++++++++------- src/cc65/scanner.h | 3 ++- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/cc65/expr.c b/src/cc65/expr.c index dbcddd4f9..f3003507a 100644 --- a/src/cc65/expr.c +++ b/src/cc65/expr.c @@ -1234,6 +1234,7 @@ static void Primary (ExprDesc* E) case TOK_ICONST: case TOK_CCONST: + case TOK_WCCONST: /* Character and integer constants */ E->IVal = CurTok.IVal; E->Flags = E_LOC_NONE | E_RTYPE_RVAL; diff --git a/src/cc65/ppexpr.c b/src/cc65/ppexpr.c index 788fb27d5..dd129ced9 100644 --- a/src/cc65/ppexpr.c +++ b/src/cc65/ppexpr.c @@ -114,6 +114,7 @@ static void PPhiePrimary (PPExpr* Expr) switch (CurTok.Tok) { case TOK_ICONST: case TOK_CCONST: + case TOK_WCCONST: /* Character and integer constants */ Expr->IVal = CurTok.IVal; /* According to the C standard, all signed types act as intmax_t diff --git a/src/cc65/scanner.c b/src/cc65/scanner.c index 11d356281..ebdcdb33e 100644 --- a/src/cc65/scanner.c +++ b/src/cc65/scanner.c @@ -412,6 +412,15 @@ static void CharConst (void) { int C; + if (CurC == 'L') { + /* Wide character constant */ + NextTok.Tok = TOK_WCCONST; + NextChar (); + } else { + /* Narrow character constant */ + NextTok.Tok = TOK_CCONST; + } + /* Skip the quote */ NextChar (); @@ -426,9 +435,6 @@ static void CharConst (void) NextChar (); } - /* Setup values and attributes */ - NextTok.Tok = TOK_CCONST; - /* Translate into target charset */ NextTok.IVal = SignExtendChar (TgtTranslateChar (C)); @@ -804,10 +810,15 @@ void NextToken (void) return; } - /* Check for wide character literals */ - if (CurC == 'L' && NextC == '\"') { - StringConst (); - return; + /* Check for wide character constants and literals */ + if (CurC == 'L') { + if (NextC == '\"') { + StringConst (); + return; + } else if (NextC == '\'') { + CharConst (); + return; + } } /* Check for keywords and identifiers */ diff --git a/src/cc65/scanner.h b/src/cc65/scanner.h index c47d660d5..7a67b10ed 100644 --- a/src/cc65/scanner.h +++ b/src/cc65/scanner.h @@ -182,10 +182,11 @@ typedef enum token_t { TOK_LAST_PUNC = TOK_DOUBLE_HASH, /* Primary expressions */ - TOK_SCONST, TOK_ICONST, TOK_CCONST, + TOK_WCCONST, TOK_FCONST, + TOK_SCONST, TOK_WCSCONST, TOK_IDENT, TOK_A, From 47ee543fe00783ddaae2b1aabe9606bfe4b33832 Mon Sep 17 00:00:00 2001 From: acqn Date: Sun, 21 Aug 2022 00:11:19 +0800 Subject: [PATCH 5/5] Added testcase for some part of PR #1833. --- test/val/pr1833.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 test/val/pr1833.c diff --git a/test/val/pr1833.c b/test/val/pr1833.c new file mode 100644 index 000000000..bdc820811 --- /dev/null +++ b/test/val/pr1833.c @@ -0,0 +1,13 @@ +/* Test for PR #1833 fixes */ + +#define char 1 + +#if char && !int && L'A' - L'B' == 'A' - 'B' && L'A' == 'A' +#else +#error +#endif + +int main(void) +{ + return 0; +}