C style character translation in ca65

This commit is contained in:
Gorilla Sapiens
2025-05-06 03:02:18 +00:00
parent 75d43ef88e
commit 1b85ab6985
2 changed files with 104 additions and 44 deletions

View File

@@ -90,22 +90,7 @@ read_loop:
bne :+ bne :+
inc ptr4+1 inc ptr4+1
; The next code line: : cmp #'\n' ; #'\n' should get translated properly
;
; .byte $c9, "\n"
;
; corresponds to a CMP #imm with the target-specific newline value as its operand.
; This works because (with the 'string_escapes' feature enabled), the "\n" string
; assembles to the target-specific value for the newline character.
;
; It would be better if we could just write:
;
; cmp #'\n'
;
; Unfortunately, ca65 doesn't currently handle escape characters in character
; constants. In the longer term, fixing that would be the preferred solution.
: .byte $c9, "\n" ; cmp #'\n'
beq done beq done
bne read_loop bne read_loop

View File

@@ -46,6 +46,7 @@
#include "check.h" #include "check.h"
#include "filestat.h" #include "filestat.h"
#include "fname.h" #include "fname.h"
#include "tgttrans.h"
#include "xmalloc.h" #include "xmalloc.h"
/* ca65 */ /* ca65 */
@@ -788,14 +789,33 @@ static void ReadIdent (void)
static void ReadStringConst (int StringTerm) static void ReadStringConst (int StringTerm)
/* Read a string constant into SVal. */ /* Read a string constant into SVal. */
{ {
int NeedNext;
/* Skip the leading string terminator */ /* Skip the leading string terminator */
NextChar (); NextChar ();
/* Read the string */ /* Read the string */
while (1) { while (1) {
int Cooked = 1;
NeedNext = 1;
if (StringTerm == 0 && SB_GetLen(&CurTok.SVal) == 1) {
if (C == '\'') {
break;
}
else if (MissingCharTerm) {
NeedNext = 0;
break;
}
else {
Error ("Illegal character constant");
}
}
if (C == StringTerm) { if (C == StringTerm) {
break; break;
} }
if (C == '\n' || C == EOF) { if (C == '\n' || C == EOF) {
Error ("Newline in string constant"); Error ("Newline in string constant");
break; break;
@@ -808,20 +828,74 @@ static void ReadStringConst (int StringTerm)
case EOF: case EOF:
Error ("Unterminated escape sequence in string constant"); Error ("Unterminated escape sequence in string constant");
break; break;
case '\\': case '?':
case '\'': C = '\?';
case '"':
break; break;
case 't': case 'a':
C = '\x09'; C = '\a';
break;
case 'b':
C = '\b';
break;
case 'e':
C = '\x1B'; /* see comments in cc65/scanner.c */
break;
case 'f':
C = '\f';
break; break;
case 'r': case 'r':
C = '\x0D'; C = '\r';
break; break;
case 'n': case 'n':
C = '\x0A'; C = '\n';
break; break;
case 't':
C = '\t';
break;
case 'v':
C = '\v';
break;
case '\\':
C = '\\'; /* unnecessary but more readable */
break;
case '\'':
C = '\''; /* unnecessary but more readable */
if (StringTerm == 0) {
/* special case used by character constants
** when LooseStringTerm not set. this will
** cause '\' to be a valid character constant
*/
C = '\\';
NeedNext = 0;
}
break;
case '\"':
C = '\"'; /* unnecessary but more readable */
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{ /* brace needed for scoping */
int Count = 1;
int Final = DigitVal(C);
Cooked = 0;
NextChar ();
while (IsODigit (C) && Count++ < 3) {
Final = (Final << 3) | DigitVal(C);
NextChar();
}
if (C >= 256)
Error ("Octal character constant out of range");
}
break;
case 'X':
case 'x': case 'x':
Cooked = 0;
NextChar (); NextChar ();
if (IsXDigit (C)) { if (IsXDigit (C)) {
char high_nibble = DigitVal (C) << 4; char high_nibble = DigitVal (C) << 4;
@@ -839,14 +913,19 @@ static void ReadStringConst (int StringTerm)
} }
/* Append the char to the string */ /* Append the char to the string */
SB_AppendChar (&CurTok.SVal, C); SB_AppendCharCooked (&CurTok.SVal, C, Cooked);
if (NeedNext) {
/* Skip the character */ /* Skip the character */
NextChar (); NextChar ();
NeedNext = 1;
}
} }
if (NeedNext) {
/* Skip the trailing terminator */ /* Skip the trailing terminator */
NextChar (); NextChar ();
}
/* Terminate the string */ /* Terminate the string */
SB_Terminate (&CurTok.SVal); SB_Terminate (&CurTok.SVal);
@@ -1465,12 +1544,13 @@ CharAgain:
return; return;
case '\'': case '\'':
if (LooseStringTerm) {
/* Hack: If we allow ' as terminating character for strings, read /* Hack: If we allow ' as terminating character for strings, read
** the following stuff as a string, and check for a one character ** the following stuff as a string, and check for a one character
** string later. ** string later.
*/ */
if (LooseStringTerm) {
ReadStringConst ('\''); ReadStringConst ('\'');
TgtTranslateStrBuf(&CurTok.SVal);
if (SB_GetLen (&CurTok.SVal) == 1) { if (SB_GetLen (&CurTok.SVal) == 1) {
CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, 0); CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, 0);
CurTok.Tok = TOK_CHARCON; CurTok.Tok = TOK_CHARCON;
@@ -1478,22 +1558,17 @@ CharAgain:
CurTok.Tok = TOK_STRCON; CurTok.Tok = TOK_STRCON;
} }
} else { } else {
/* Always a character constant */ /* Always a character constant
NextChar (); ** Hack: Pass 0 to ReadStringConst for special handling.
if (C == EOF || IsControl (C)) { */
ReadStringConst(0);
TgtTranslateStrBuf(&CurTok.SVal);
if (SB_GetLen(&CurTok.SVal) != 1) {
Error ("Illegal character constant"); Error ("Illegal character constant");
goto CharAgain; goto CharAgain;
} }
CurTok.IVal = C; CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, 0);
CurTok.Tok = TOK_CHARCON; CurTok.Tok = TOK_CHARCON;
NextChar ();
if (C != '\'') {
if (!MissingCharTerm) {
Error ("Illegal character constant");
}
} else {
NextChar ();
}
} }
return; return;