From 09986f5563e31e2c900a606438f1d60d008f3a14 Mon Sep 17 00:00:00 2001 From: Fabian Boehm Date: Sat, 2 Dec 2023 11:06:07 +0100 Subject: [PATCH] Encode all ENCODE_DIRECT codepoints with encode_direct --- src/common.cpp | 7 ++++--- tests/checks/basic.fish | 11 ++++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index c419bc1..bda5fe6 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -355,9 +355,7 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) { } else { ret = std::mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state); // Determine whether to encode this character with our crazy scheme. - if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { - use_encode_direct = true; - } else if (wc == INTERNAL_SEPARATOR) { + if (fish_reserved_codepoint(wc)) { use_encode_direct = true; } else if (ret == static_cast(-2)) { // Incomplete sequence. @@ -1317,6 +1315,9 @@ maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, boo } if (!errored && result_char_or_none.has_value()) { + if (fish_reserved_codepoint(*result_char_or_none)) { + return none(); + } result->push_back(*result_char_or_none); } if (errored) return none(); diff --git a/tests/checks/basic.fish b/tests/checks/basic.fish index c9a2d58..83bee8f 100644 --- a/tests/checks/basic.fish +++ b/tests/checks/basic.fish @@ -500,3 +500,12 @@ echo banana # This used to be a parse error - #7685. echo (echo hello\\) # CHECK: hello\ + +$fish -c 'echo \ufdd2"fart"' +# CHECKRR: fish: Invalid token '\ufdd2"fart"' +# CHECKRR: echo \ufdd2"fart" +# CHECHRR: ^~~~~~~~~~~^ + +echo (sh -c 'printf $\'\ufdd2foo\'') | string escape +# CHECK: \Xef\Xbf\X92foo +