conform to 6.4.4.4 for hex and octal escapes

fixes problem noted in #2610
This commit is contained in:
Gorilla Sapiens
2025-02-25 02:09:30 -08:00
parent 2085646e57
commit 6a17aedd81
9 changed files with 228 additions and 40 deletions

View File

@@ -92,7 +92,7 @@ static Collection LPStack = STATIC_COLLECTION_INITIALIZER;
static Literal* NewLiteral (const void* Buf, unsigned Len)
static Literal* NewLiteral (const StrBuf* S)
/* Create a new literal and return it */
{
/* Allocate memory */
@@ -103,7 +103,7 @@ static Literal* NewLiteral (const void* Buf, unsigned Len)
L->RefCount = 0;
L->Output = 0;
SB_Init (&L->Data);
SB_AppendBuf (&L->Data, Buf, Len);
SB_Append (&L->Data, S);
/* Return the new literal */
return L;
@@ -162,7 +162,7 @@ void ReleaseLiteral (Literal* L)
void TranslateLiteral (Literal* L)
/* Translate a literal into the target charset */
{
TgtTranslateBuf (SB_GetBuf (&L->Data), SB_GetLen (&L->Data));
TgtTranslateStrBuf (&L->Data);
}
@@ -468,18 +468,18 @@ void OutputGlobalLiteralPool (void)
Literal* AddLiteral (const char* S)
/* Add a literal string to the literal pool. Return the literal. */
{
return AddLiteralBuf (S, strlen (S) + 1);
StrBuf SB;
SB_InitFromString(&SB, S);
return AddLiteralStr(&SB);
}
Literal* AddLiteralBuf (const void* Buf, unsigned Len)
/* Add a buffer containing a literal string to the literal pool. Return the
** literal.
*/
Literal* AddLiteralStr (const StrBuf* S)
/* Add a literal string to the literal pool. Return the literal. */
{
/* Create a new literal */
Literal* L = NewLiteral (Buf, Len);
Literal* L = NewLiteral (S);
/* Add the literal to the correct pool */
if (IS_Get (&WritableStrings)) {
@@ -491,11 +491,3 @@ Literal* AddLiteralBuf (const void* Buf, unsigned Len)
/* Return the new literal */
return L;
}
Literal* AddLiteralStr (const StrBuf* S)
/* Add a literal string to the literal pool. Return the literal. */
{
return AddLiteralBuf (SB_GetConstBuf (S), SB_GetLen (S));
}

View File

@@ -125,11 +125,6 @@ void OutputGlobalLiteralPool (void);
Literal* AddLiteral (const char* S);
/* Add a literal string to the literal pool. Return the literal. */
Literal* AddLiteralBuf (const void* Buf, unsigned Len);
/* Add a buffer containing a literal string to the literal pool. Return the
** literal.
*/
Literal* AddLiteralStr (const StrBuf* S);
/* Add a literal string to the literal pool. Return the literal. */

View File

@@ -163,6 +163,12 @@ static const struct Keyword {
typedef uint32_t scan_t;
/* ParseChar return values */
typedef struct {
int Val;
int Cooked;
} parsedchar_t;
/*****************************************************************************/
/* code */
/*****************************************************************************/
@@ -326,13 +332,16 @@ static void SetTok (int tok)
static int ParseChar (void)
static parsedchar_t ParseChar (void)
/* Parse a character token. Converts escape chars into character codes. */
{
parsedchar_t Result;
int C;
int HadError;
int Count;
Result.Cooked = 1;
/* Check for escape chars */
if (CurC == '\\') {
NextChar ();
@@ -373,6 +382,7 @@ static int ParseChar (void)
case 'x':
case 'X':
/* Hex character constant */
Result.Cooked = 0;
if (!IsXDigit (NextC)) {
Error ("\\x used with no following hex digits");
C = ' ';
@@ -401,6 +411,7 @@ static int ParseChar (void)
case '6':
case '7':
/* Octal constant */
Result.Cooked = 0;
Count = 1;
C = HexVal (CurC);
while (IsODigit (NextC) && Count++ < 3) {
@@ -423,7 +434,12 @@ static int ParseChar (void)
NextChar ();
/* Do correct sign extension */
return SignExtendChar (C);
Result.Val = SignExtendChar(C);
if (Result.Cooked) {
Result.Cooked = Result.Val;
}
return Result;
}
@@ -431,7 +447,7 @@ static int ParseChar (void)
static void CharConst (void)
/* Parse a character constant token */
{
int C;
parsedchar_t C;
if (CurC == 'L') {
/* Wide character constant */
@@ -457,7 +473,8 @@ static void CharConst (void)
}
/* Translate into target charset */
NextTok.IVal = SignExtendChar (C);
NextTok.IVal = SignExtendChar (C.Val);
NextTok.Cooked = C.Cooked;
/* Character constants have type int */
NextTok.Type = type_int;
@@ -468,6 +485,9 @@ static void CharConst (void)
static void StringConst (void)
/* Parse a quoted string token */
{
/* result from ParseChar */
parsedchar_t ParsedChar;
/* String buffer */
StrBuf S = AUTO_STRBUF_INITIALIZER;
@@ -494,7 +514,8 @@ static void StringConst (void)
Error ("Unexpected newline");
break;
}
SB_AppendChar (&S, ParseChar ());
ParsedChar = ParseChar ();
SB_AppendCharCooked(&S, ParsedChar.Val, ParsedChar.Cooked);
}
/* Skip closing quote char if there was one */
@@ -689,6 +710,7 @@ static void NumericConst (void)
/* Set the value and the token */
NextTok.IVal = IVal;
NextTok.Cooked = 0;
NextTok.Tok = TOK_ICONST;
} else {
@@ -805,7 +827,12 @@ static void GetNextInputToken (void)
if (NextTok.Tok == TOK_SCONST || NextTok.Tok == TOK_WCSCONST) {
TranslateLiteral (NextTok.SVal);
} else if (NextTok.Tok == TOK_CCONST || NextTok.Tok == TOK_WCCONST) {
NextTok.IVal = SignExtendChar (TgtTranslateChar (NextTok.IVal));
if (NextTok.Cooked) {
NextTok.IVal = SignExtendChar (TgtTranslateChar (NextTok.IVal));
}
else {
NextTok.IVal = SignExtendChar (NextTok.IVal);
}
}
}

View File

@@ -213,6 +213,7 @@ typedef struct Token Token;
struct Token {
token_t Tok; /* The token itself */
long IVal; /* The integer attribute */
int Cooked; /* The "cooked" flag for char constants */
Double FVal; /* The float attribute */
struct Literal* SVal; /* String literal is any */
ident Ident; /* Identifier if IDENT */

View File

@@ -82,6 +82,7 @@ StrBuf* SB_InitFromString (StrBuf* B, const char* S)
B->Len = strlen (S);
B->Index = 0;
B->Buf = (char*) S;
B->Cooked = (char*) S;
return B;
}
@@ -92,6 +93,7 @@ void SB_Done (StrBuf* B)
{
if (B->Allocated) {
xfree (B->Buf);
xfree (B->Cooked);
}
}
@@ -146,10 +148,12 @@ void SB_Realloc (StrBuf* B, unsigned NewSize)
*/
if (B->Allocated) {
/* Just reallocate the block */
B->Buf = xrealloc (B->Buf, NewAllocated);
B->Buf = xrealloc (B->Buf, NewAllocated);
B->Cooked = xrealloc (B->Cooked, NewAllocated);
} else {
/* Allocate a new block and copy */
B->Buf = memcpy (xmalloc (NewAllocated), B->Buf, B->Len);
B->Buf = memcpy (xmalloc (NewAllocated), B->Buf, B->Len);
B->Cooked = memcpy (xmalloc (NewAllocated), B->Cooked, B->Len);
}
/* Remember the new block size */
@@ -178,10 +182,12 @@ static void SB_CheapRealloc (StrBuf* B, unsigned NewSize)
/* Free the old buffer if there is one */
if (B->Allocated) {
xfree (B->Buf);
xfree (B->Cooked);
}
/* Allocate a fresh block */
B->Buf = xmalloc (NewAllocated);
B->Buf = xmalloc (NewAllocated);
B->Cooked = xmalloc (NewAllocated);
/* Remember the new block size */
B->Allocated = NewAllocated;
@@ -222,6 +228,7 @@ void SB_Terminate (StrBuf* B)
SB_Realloc (B, NewLen);
}
B->Buf[B->Len] = '\0';
B->Cooked[B->Len] = '\0';
}
@@ -234,6 +241,22 @@ void SB_CopyBuf (StrBuf* Target, const char* Buf, unsigned Size)
SB_CheapRealloc (Target, Size);
}
memcpy (Target->Buf, Buf, Size);
memcpy (Target->Cooked, Buf, Size); /* nothing raw */
}
Target->Len = Size;
}
void SB_CopyBufCooked (StrBuf* Target, const char* Buf, const char* Cooked, unsigned Size)
/* Copy Buf and Cooked to Target, discarding the old contents of Target */
{
if (Size) {
if (Target->Allocated < Size) {
SB_CheapRealloc (Target, Size);
}
memcpy (Target->Buf, Buf, Size);
memcpy (Target->Cooked, Cooked, Size);
}
Target->Len = Size;
}
@@ -254,7 +277,7 @@ void SB_CopyStr (StrBuf* Target, const char* S)
void SB_Copy (StrBuf* Target, const StrBuf* Source)
/* Copy Source to Target, discarding the old contents of Target */
{
SB_CopyBuf (Target, Source->Buf, Source->Len);
SB_CopyBufCooked (Target, Source->Buf, Source->Cooked, Source->Len);
Target->Index = Source->Index;
}
#endif
@@ -269,6 +292,21 @@ void SB_AppendChar (StrBuf* B, int C)
SB_Realloc (B, NewLen);
}
B->Buf[B->Len] = (char) C;
B->Cooked[B->Len] = (char) C;
B->Len = NewLen;
}
void SB_AppendCharCooked (StrBuf* B, int C, int Cooked)
/* Append a character to a string buffer */
{
unsigned NewLen = B->Len + 1;
if (NewLen > B->Allocated) {
SB_Realloc (B, NewLen);
}
B->Buf[B->Len] = (char) C;
B->Cooked[B->Len] = (char) (Cooked ? C : 0);
B->Len = NewLen;
}
@@ -282,6 +320,7 @@ void SB_AppendBuf (StrBuf* B, const char* S, unsigned Size)
SB_Realloc (B, NewLen);
}
memcpy (B->Buf + B->Len, S, Size);
memcpy (B->Cooked + B->Len, S, Size);
B->Len = NewLen;
}
@@ -301,7 +340,13 @@ void SB_AppendStr (StrBuf* B, const char* S)
void SB_Append (StrBuf* Target, const StrBuf* Source)
/* Append the contents of Source to Target */
{
SB_AppendBuf (Target, Source->Buf, Source->Len);
unsigned NewLen = Target->Len + Source->Len;
if (NewLen > Target->Allocated) {
SB_Realloc (Target, NewLen);
}
memcpy (Target->Buf + Target->Len, Source->Buf, Source->Len);
memcpy (Target->Cooked + Target->Len, Source->Cooked, Source->Len);
Target->Len = NewLen;
}
#endif

View File

@@ -53,10 +53,17 @@
/*****************************************************************************/
/* We want to track whether a character is "raw" or not. */
/* "raw" characters should NOT be translated when translating a string. */
/* We do this by keeping a second array parallel to "Buf" called "Cooked". */
/* Think of "cooked" as the inverse of "raw". */
/* If Cooked[n] is 0, then the character is raw and should not be translated. */
/* This was done to keep LIT_STR_BUFFER sane. */
typedef struct StrBuf StrBuf;
struct StrBuf {
char* Buf; /* Pointer to buffer */
char* Cooked; /* Pointer to cooked buffer */
unsigned Len; /* Length of the string */
unsigned Index; /* Used for reading (Get and friends) */
unsigned Allocated; /* Size of allocated memory */
@@ -66,13 +73,13 @@ struct StrBuf {
extern const StrBuf EmptyStrBuf;
/* Initializer for static string bufs */
#define STATIC_STRBUF_INITIALIZER { 0, 0, 0, 0 }
#define STATIC_STRBUF_INITIALIZER { 0, 0, 0, 0, 0 }
/* Initializer for auto string bufs */
#define AUTO_STRBUF_INITIALIZER { 0, 0, 0, 0 }
#define AUTO_STRBUF_INITIALIZER { 0, 0, 0, 0, 0 }
/* Initialize with a string literal (beware: evaluates str twice!) */
#define LIT_STRBUF_INITIALIZER(str) { (char*)str, sizeof(str)-1, 0, 0 }
#define LIT_STRBUF_INITIALIZER(str) { (char*)str, (char *)str, sizeof(str)-1, 0, 0 }
@@ -164,6 +171,16 @@ INLINE char* SB_GetBuf (StrBuf* B)
# define SB_GetBuf(B) (B)->Buf
#endif
#if defined(HAVE_INLINE)
INLINE char* SB_GetCooked (StrBuf* B)
/* Return a cooked pointer */
{
return B->Cooked;
}
#else
# define SB_GetCooked(B) (B)->Cooked
#endif
#if defined(HAVE_INLINE)
INLINE char SB_At (const StrBuf* B, unsigned Index)
/* Get a character from the buffer */
@@ -310,6 +327,9 @@ void SB_Terminate (StrBuf* B);
void SB_CopyBuf (StrBuf* Target, const char* Buf, unsigned Size);
/* Copy Buf to Target, discarding the old contents of Target */
void SB_CopyBufCooked (StrBuf* Target, const char* Buf, const char *Cooked, unsigned Size);
/* Copy Buf and Cooked to Target, discarding the old contents of Target */
#if defined(HAVE_INLINE)
INLINE void SB_CopyStr (StrBuf* Target, const char* S)
/* Copy S to Target, discarding the old contents of Target */
@@ -325,7 +345,7 @@ void SB_CopyStr (StrBuf* Target, const char* S);
INLINE void SB_Copy (StrBuf* Target, const StrBuf* Source)
/* Copy Source to Target, discarding the old contents of Target */
{
SB_CopyBuf (Target, Source->Buf, Source->Len);
SB_CopyBufCooked (Target, Source->Buf, Source->Cooked, Source->Len);
Target->Index = Source->Index;
}
#else
@@ -336,6 +356,9 @@ void SB_Copy (StrBuf* Target, const StrBuf* Source);
void SB_AppendChar (StrBuf* B, int C);
/* Append a character to a string buffer */
void SB_AppendCharCooked (StrBuf* B, int C, int Cooked);
/* Append a character to a string buffer, raw if Cooked == 0 */
void SB_AppendBuf (StrBuf* B, const char* S, unsigned Size);
/* Append a character buffer to the end of the string buffer */
@@ -354,7 +377,13 @@ void SB_AppendStr (StrBuf* B, const char* S);
INLINE void SB_Append (StrBuf* Target, const StrBuf* Source)
/* Append the contents of Source to Target */
{
SB_AppendBuf (Target, Source->Buf, Source->Len);
unsigned NewLen = Target->Len + Source->Len;
if (NewLen > Target->Allocated) {
SB_Realloc (Target, NewLen);
}
memcpy (Target->Buf + Target->Len, Source->Buf, Source->Len);
memcpy (Target->Cooked + Target->Len, Source->Cooked, Source->Len);
Target->Len = NewLen;
}
#else
void SB_Append (StrBuf* Target, const StrBuf* Source);

View File

@@ -121,7 +121,19 @@ void TgtTranslateStrBuf (StrBuf* Buf)
** system character set.
*/
{
TgtTranslateBuf (SB_GetBuf (Buf), SB_GetLen (Buf));
unsigned char* B = (unsigned char*)SB_GetBuf(Buf);
unsigned char* Cooked = (unsigned char*)SB_GetCooked(Buf);
unsigned Len = SB_GetLen(Buf);
/* Translate */
while (Len--) {
if (*Cooked) {
*B = Tab[*B];
}
/* else { *B = *B; } */
++B;
++Cooked;
}
}
@@ -129,7 +141,7 @@ void TgtTranslateStrBuf (StrBuf* Buf)
void TgtTranslateSet (unsigned Index, unsigned char C)
/* Set the translation code for the given character */
{
CHECK (Index < sizeof (Tab));
CHECK (Index < (sizeof (Tab) / sizeof(Tab[0])));
Tab[Index] = C;
}

72
test/val/bug2609.c Normal file
View File

@@ -0,0 +1,72 @@
/* Bug #2609 - charmap translation violates C specification 6.4.4.4 Character constant */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#pragma charmap (0x07, 0x62) /* map \a to b */
static_assert('\a' == 0x62);
static_assert('\07' == 0x07);
static_assert('\x07' == 0x07);
#pragma charmap (0x07, 0x63) /* map \a to c */
static_assert('\a' == 0x63);
static_assert('\07' == 0x07);
static_assert('\x07' == 0x07);
#pragma charmap (0x07, 0x07) /* map \a back to x07 */
static_assert('\a' == 0x07);
static_assert('\07' == 0x07);
static_assert('\x07' == 0x07);
#pragma charmap (0x07, 0x61) /* map \a to a */
char *s = "\07\a\x07";
char t[] = { 7, 0x61, 7, 0 };
static_assert('\a' == 0x61);
static_assert('\07' == 0x07);
static_assert('\x07' == 0x07);
char c_back_a = '\a';
char c_hex_07 = '\x07';
char c_oct_07 = '\07';
int i_back_a = '\a';
int i_hex_07 = '\x07';
int i_oct_07 = '\07';
#define TEST(a,b) \
if (a != b) { printf("\n\n !FAIL! %s = %04x not %04x\n\n", #a, a, b); return EXIT_FAILURE; }
int main (void) {
int i;
TEST(c_back_a, 0x61)
TEST(c_hex_07, 0x07)
TEST(c_oct_07, 07)
TEST(i_back_a, 0x61)
TEST(i_hex_07, 0x07)
TEST(i_oct_07, 07)
assert('\a' == 0x61);
assert('\07' == 0x07);
assert('\x07' == 0x07);
if (strcmp(s,t) || s[0] == s[1]) {
printf("\n\n !FAIL! strcmp\n");
for (i = 0; i < 4; i++) {
printf("%02x ", s[i]);
}
printf("\n");
for (i = 0; i < 4; i++) {
printf("%02x ", t[i]);
}
printf("\n");
printf("\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

15
test/val/bug2610.c Normal file
View File

@@ -0,0 +1,15 @@
#include <stdio.h>
#if '\x0A' != 0x0A
#error "Suspicious character set translation"
#endif
int main()
{
char c = '\x0A';
if (c == 0x0A) {
printf("Ok\n");
return 0;
} else {
printf("Failed\n");
return 1;
}
}