C style character translation in ca65

This commit is contained in:
Gorilla Sapiens
2025-05-06 03:02:18 +00:00
parent 75d43ef88e
commit 1b85ab6985
2 changed files with 104 additions and 44 deletions

View File

@@ -90,22 +90,7 @@ read_loop:
bne :+
inc ptr4+1
; The next code line:
;
; .byte $c9, "\n"
;
; corresponds to a CMP #imm with the target-specific newline value as its operand.
; This works because (with the 'string_escapes' feature enabled), the "\n" string
; assembles to the target-specific value for the newline character.
;
; It would be better if we could just write:
;
; cmp #'\n'
;
; Unfortunately, ca65 doesn't currently handle escape characters in character
; constants. In the longer term, fixing that would be the preferred solution.
: .byte $c9, "\n" ; cmp #'\n'
: cmp #'\n' ; #'\n' should get translated properly
beq done
bne read_loop

View File

@@ -46,6 +46,7 @@
#include "check.h"
#include "filestat.h"
#include "fname.h"
#include "tgttrans.h"
#include "xmalloc.h"
/* ca65 */
@@ -788,14 +789,33 @@ static void ReadIdent (void)
static void ReadStringConst (int StringTerm)
/* Read a string constant into SVal. */
{
int NeedNext;
/* Skip the leading string terminator */
NextChar ();
/* Read the string */
while (1) {
int Cooked = 1;
NeedNext = 1;
if (StringTerm == 0 && SB_GetLen(&CurTok.SVal) == 1) {
if (C == '\'') {
break;
}
else if (MissingCharTerm) {
NeedNext = 0;
break;
}
else {
Error ("Illegal character constant");
}
}
if (C == StringTerm) {
break;
}
if (C == '\n' || C == EOF) {
Error ("Newline in string constant");
break;
@@ -808,20 +828,74 @@ static void ReadStringConst (int StringTerm)
case EOF:
Error ("Unterminated escape sequence in string constant");
break;
case '\\':
case '\'':
case '"':
case '?':
C = '\?';
break;
case 't':
C = '\x09';
case 'a':
C = '\a';
break;
case 'b':
C = '\b';
break;
case 'e':
C = '\x1B'; /* see comments in cc65/scanner.c */
break;
case 'f':
C = '\f';
break;
case 'r':
C = '\x0D';
C = '\r';
break;
case 'n':
C = '\x0A';
C = '\n';
break;
case 't':
C = '\t';
break;
case 'v':
C = '\v';
break;
case '\\':
C = '\\'; /* unnecessary but more readable */
break;
case '\'':
C = '\''; /* unnecessary but more readable */
if (StringTerm == 0) {
/* special case used by character constants
** when LooseStringTerm not set. this will
** cause '\' to be a valid character constant
*/
C = '\\';
NeedNext = 0;
}
break;
case '\"':
C = '\"'; /* unnecessary but more readable */
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{ /* brace needed for scoping */
int Count = 1;
int Final = DigitVal(C);
Cooked = 0;
NextChar ();
while (IsODigit (C) && Count++ < 3) {
Final = (Final << 3) | DigitVal(C);
NextChar();
}
if (C >= 256)
Error ("Octal character constant out of range");
}
break;
case 'X':
case 'x':
Cooked = 0;
NextChar ();
if (IsXDigit (C)) {
char high_nibble = DigitVal (C) << 4;
@@ -839,14 +913,19 @@ static void ReadStringConst (int StringTerm)
}
/* Append the char to the string */
SB_AppendChar (&CurTok.SVal, C);
SB_AppendCharCooked (&CurTok.SVal, C, Cooked);
/* Skip the character */
NextChar ();
if (NeedNext) {
/* Skip the character */
NextChar ();
NeedNext = 1;
}
}
/* Skip the trailing terminator */
NextChar ();
if (NeedNext) {
/* Skip the trailing terminator */
NextChar ();
}
/* Terminate the string */
SB_Terminate (&CurTok.SVal);
@@ -1465,12 +1544,13 @@ CharAgain:
return;
case '\'':
/* Hack: If we allow ' as terminating character for strings, read
** the following stuff as a string, and check for a one character
** string later.
*/
if (LooseStringTerm) {
/* Hack: If we allow ' as terminating character for strings, read
** the following stuff as a string, and check for a one character
** string later.
*/
ReadStringConst ('\'');
TgtTranslateStrBuf(&CurTok.SVal);
if (SB_GetLen (&CurTok.SVal) == 1) {
CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, 0);
CurTok.Tok = TOK_CHARCON;
@@ -1478,22 +1558,17 @@ CharAgain:
CurTok.Tok = TOK_STRCON;
}
} else {
/* Always a character constant */
NextChar ();
if (C == EOF || IsControl (C)) {
/* Always a character constant
** Hack: Pass 0 to ReadStringConst for special handling.
*/
ReadStringConst(0);
TgtTranslateStrBuf(&CurTok.SVal);
if (SB_GetLen(&CurTok.SVal) != 1) {
Error ("Illegal character constant");
goto CharAgain;
}
CurTok.IVal = C;
CurTok.IVal = SB_AtUnchecked (&CurTok.SVal, 0);
CurTok.Tok = TOK_CHARCON;
NextChar ();
if (C != '\'') {
if (!MissingCharTerm) {
Error ("Illegal character constant");
}
} else {
NextChar ();
}
}
return;