From 8e8b58c707ed1d076491cad2dd2a201aa72e7aa1 Mon Sep 17 00:00:00 2001 From: rootvector2 Date: Sun, 14 Jun 2026 01:12:45 +0530 Subject: [PATCH] consume recovery bytes from saved buffer before src in xs_textdecoder_decode the illegal-sequence recovery loop read continuation bytes straight from src and advanced src for each one, ignoring the bytes carried over in the streaming buffer. completing a buffered partial lead with a short final chunk walked src past the view end and passed a negative length to the trailing c_memcpy. read the already-assembled utf8 bytes and advance buffer or src the same way the legal decode path does, in both passes. --- modules/data/text/decoder/textdecoder.c | 26 ++++++++++++++----- .../modules/data/text/decoder/decodestream.js | 9 +++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/modules/data/text/decoder/textdecoder.c b/modules/data/text/decoder/textdecoder.c index 56f2fbd284..6b5b9df311 100644 --- a/modules/data/text/decoder/textdecoder.c +++ b/modules/data/text/decoder/textdecoder.c @@ -187,10 +187,17 @@ void xs_textdecoder_decode(xsMachine *the) else if (first > 0xF4) // no valid next byte clen = 0; + const uint8_t *s = &utf8[1]; while (clen-- > 0) { - uint8_t c = c_read8(src); - if ((lower <= c) && (c <= upper)) - src++; + uint8_t c = *s++; + if ((lower <= c) && (c <= upper)) { + if (bufferLength) { + bufferLength--; + buffer++; + } + else + src++; + } else break; } @@ -321,10 +328,17 @@ void xs_textdecoder_decode(xsMachine *the) else if (first > 0xF4) // no valid next byte clen = 0; + const uint8_t *s = &utf8[1]; while (clen-- > 0) { - uint8_t c = c_read8(src); - if ((lower <= c) && (c <= upper)) - src++; + uint8_t c = *s++; + if ((lower <= c) && (c <= upper)) { + if (bufferLength) { + bufferLength--; + buffer++; + } + else + src++; + } else break; } diff --git a/tests/modules/data/text/decoder/decodestream.js b/tests/modules/data/text/decoder/decodestream.js index 9f707e07c0..418638c736 100644 --- a/tests/modules/data/text/decoder/decodestream.js +++ b/tests/modules/data/text/decoder/decodestream.js @@ -46,3 +46,12 @@ assert.sameValue("\uFFFD", decoder.decode()); assert.sameValue("", decoder.decode(Uint8Array.of(0xF0, 0x9F, 0x92), {stream: true})); assert.sameValue("\uFFFD", decoder.decode()); + +// illegal sequence spanning a buffered partial lead and a short final chunk: +// the recovery scan must consume from the buffered bytes before src and stop at the +// end of the input. The chunk is a view whose backing store holds continuation bytes +// (0x90) past its length, so the bytes seen are F0 80 90 90: the 0x80 is below the +// 0x90 lower bound for an F0 lead, so it and the two trailing 0x90 each decode to U+FFFD. +assert.sameValue("", decoder.decode(Uint8Array.of(0xF0, 0x80), {stream: true})); +assert.sameValue("\uFFFD\uFFFD\uFFFD\uFFFD", decoder.decode(new Uint8Array(8).fill(0x90).subarray(0, 2), {stream: true})); +assert.sameValue("", decoder.decode());