diff --git a/tccpp.c b/tccpp.c index 4d5169e..48e3501 100644 --- a/tccpp.c +++ b/tccpp.c @@ -2105,6 +2105,43 @@ static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long tcc_warning("unknown escape sequence: \'\\x%x\'", c); break; } + } else if (is_long && c >= 0x80) { + /* assume we are processing UTF-8 sequence */ + + int cont; /* count of continuation bytes */ + int i; + + /* decode leading byte */ + if ((c >> 5) == 0x6) { + cont = 1; n = c & 0x1f; + } else if ((c >> 4) == 0xe) { + cont = 2; n = c & 0xf; + } else if ((c >> 3) == 0x1e) { + cont = 3; n = c & 0x7; + } else { + goto invalid_utf8_sequence; + } + + /* decode continuation bytes */ + for (i = 1; i <= cont; i++) { + if ((p[i] >> 6) != 2) + goto invalid_utf8_sequence; + n = (n << 6) | (p[i] & 0x3f); + } + + /* check for overlong encoding */ + if ((cont == 1 && n < 0x80) || + (cont == 2 && n < 0x800) || + (cont == 3 && n < 0x10000)) + goto invalid_utf8_sequence; + + /* advance pointer */ + p += 1 + cont; + c = n; + goto add_char_nonext; + + invalid_utf8_sequence: + tcc_warning("invalid UTF-8 sequence"); } p++; add_char_nonext: