Merge "adb: win32: handle incomplete UTF-8 in console output, other fixes"
This commit is contained in:
commit
85c65dab42
3 changed files with 210 additions and 32 deletions
|
|
@ -27,6 +27,7 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
// Include this before open/unlink are defined as macros below.
|
// Include this before open/unlink are defined as macros below.
|
||||||
#include <android-base/utf8.h>
|
#include <android-base/utf8.h>
|
||||||
|
|
@ -288,6 +289,8 @@ extern int adb_chmod(const char *, int);
|
||||||
|
|
||||||
extern int adb_vfprintf(FILE *stream, const char *format, va_list ap)
|
extern int adb_vfprintf(FILE *stream, const char *format, va_list ap)
|
||||||
__attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 0)));
|
__attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 0)));
|
||||||
|
extern int adb_vprintf(const char *format, va_list ap)
|
||||||
|
__attribute__((__format__(ADB_FORMAT_ARCHETYPE, 1, 0)));
|
||||||
extern int adb_fprintf(FILE *stream, const char *format, ...)
|
extern int adb_fprintf(FILE *stream, const char *format, ...)
|
||||||
__attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 3)));
|
__attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 3)));
|
||||||
extern int adb_printf(const char *format, ...)
|
extern int adb_printf(const char *format, ...)
|
||||||
|
|
@ -295,6 +298,8 @@ extern int adb_printf(const char *format, ...)
|
||||||
|
|
||||||
extern int adb_fputs(const char* buf, FILE* stream);
|
extern int adb_fputs(const char* buf, FILE* stream);
|
||||||
extern int adb_fputc(int ch, FILE* stream);
|
extern int adb_fputc(int ch, FILE* stream);
|
||||||
|
extern int adb_putchar(int ch);
|
||||||
|
extern int adb_puts(const char* buf);
|
||||||
extern size_t adb_fwrite(const void* ptr, size_t size, size_t nmemb,
|
extern size_t adb_fwrite(const void* ptr, size_t size, size_t nmemb,
|
||||||
FILE* stream);
|
FILE* stream);
|
||||||
|
|
||||||
|
|
@ -321,13 +326,20 @@ inline void seekdir(DIR*, long) {
|
||||||
#define chmod adb_chmod
|
#define chmod adb_chmod
|
||||||
|
|
||||||
#define vfprintf adb_vfprintf
|
#define vfprintf adb_vfprintf
|
||||||
|
#define vprintf adb_vprintf
|
||||||
#define fprintf adb_fprintf
|
#define fprintf adb_fprintf
|
||||||
#define printf adb_printf
|
#define printf adb_printf
|
||||||
#define fputs adb_fputs
|
#define fputs adb_fputs
|
||||||
#define fputc adb_fputc
|
#define fputc adb_fputc
|
||||||
|
// putc may be a macro, so if so, undefine it, so that we can redefine it.
|
||||||
|
#undef putc
|
||||||
|
#define putc(c, s) adb_fputc(c, s)
|
||||||
|
#define putchar adb_putchar
|
||||||
|
#define puts adb_puts
|
||||||
#define fwrite adb_fwrite
|
#define fwrite adb_fwrite
|
||||||
|
|
||||||
#define fopen adb_fopen
|
#define fopen adb_fopen
|
||||||
|
#define freopen freopen_utf8_not_yet_implemented
|
||||||
|
|
||||||
#define getenv adb_getenv
|
#define getenv adb_getenv
|
||||||
#define putenv putenv_utf8_not_yet_implemented
|
#define putenv putenv_utf8_not_yet_implemented
|
||||||
|
|
@ -386,6 +398,12 @@ public:
|
||||||
// but does not check if the handle != INVALID_HANDLE_VALUE.
|
// but does not check if the handle != INVALID_HANDLE_VALUE.
|
||||||
typedef std::unique_ptr<HANDLE, handle_deleter> unique_handle;
|
typedef std::unique_ptr<HANDLE, handle_deleter> unique_handle;
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
size_t ParseCompleteUTF8(const char* first, const char* last, std::vector<char>* remaining_bytes);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
#else /* !_WIN32 a.k.a. Unix */
|
#else /* !_WIN32 a.k.a. Unix */
|
||||||
|
|
||||||
#include "fdevent.h"
|
#include "fdevent.h"
|
||||||
|
|
|
||||||
|
|
@ -2451,12 +2451,15 @@ static void _fh_socketpair_hook( FH fh, int events, EventHook hook )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static adb_mutex_t g_console_output_buffer_lock;
|
||||||
|
|
||||||
void
|
void
|
||||||
adb_sysdeps_init( void )
|
adb_sysdeps_init( void )
|
||||||
{
|
{
|
||||||
#define ADB_MUTEX(x) InitializeCriticalSection( & x );
|
#define ADB_MUTEX(x) InitializeCriticalSection( & x );
|
||||||
#include "mutex_list.h"
|
#include "mutex_list.h"
|
||||||
InitializeCriticalSection( &_win32_lock );
|
InitializeCriticalSection( &_win32_lock );
|
||||||
|
InitializeCriticalSection( &g_console_output_buffer_lock );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**************************************************************************/
|
/**************************************************************************/
|
||||||
|
|
@ -2525,6 +2528,8 @@ static HANDLE _get_console_handle(int fd, DWORD* mode=nullptr) {
|
||||||
|
|
||||||
// Returns a console handle if |stream| is a console, otherwise returns nullptr.
|
// Returns a console handle if |stream| is a console, otherwise returns nullptr.
|
||||||
static HANDLE _get_console_handle(FILE* const stream) {
|
static HANDLE _get_console_handle(FILE* const stream) {
|
||||||
|
// Save and restore errno to make it easier for callers to prevent from overwriting errno.
|
||||||
|
android::base::ErrnoRestorer er;
|
||||||
const int fd = fileno(stream);
|
const int fd = fileno(stream);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
@ -3296,6 +3301,9 @@ static HANDLE _console_handle; // when set, console mode should be restored
|
||||||
|
|
||||||
void stdin_raw_init() {
|
void stdin_raw_init() {
|
||||||
const HANDLE in = _get_console_handle(STDIN_FILENO, &_old_console_mode);
|
const HANDLE in = _get_console_handle(STDIN_FILENO, &_old_console_mode);
|
||||||
|
if (in == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Disable ENABLE_PROCESSED_INPUT so that Ctrl-C is read instead of
|
// Disable ENABLE_PROCESSED_INPUT so that Ctrl-C is read instead of
|
||||||
// calling the process Ctrl-C routine (configured by
|
// calling the process Ctrl-C routine (configured by
|
||||||
|
|
@ -3617,16 +3625,98 @@ int adb_chmod(const char* path, int mode) {
|
||||||
return _wchmod(path_wide.c_str(), mode);
|
return _wchmod(path_wide.c_str(), mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Internal helper function to write UTF-8 bytes to a console. Returns -1
|
// From libutils/Unicode.cpp, get the length of a UTF-8 sequence given the lead byte.
|
||||||
// on error.
|
static inline size_t utf8_codepoint_len(uint8_t ch) {
|
||||||
static int _console_write_utf8(const char* buf, size_t size, FILE* stream,
|
return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1;
|
||||||
HANDLE console) {
|
}
|
||||||
std::wstring output;
|
|
||||||
|
|
||||||
// Try to convert from data that might be UTF-8 to UTF-16, ignoring errors.
|
namespace internal {
|
||||||
// Data might not be UTF-8 if the user cat's random data, runs dmesg, etc.
|
|
||||||
|
// Given a sequence of UTF-8 bytes (denoted by the range [first, last)), return the number of bytes
|
||||||
|
// (from the beginning) that are complete UTF-8 sequences and append the remaining bytes to
|
||||||
|
// remaining_bytes.
|
||||||
|
size_t ParseCompleteUTF8(const char* const first, const char* const last,
|
||||||
|
std::vector<char>* const remaining_bytes) {
|
||||||
|
// Walk backwards from the end of the sequence looking for the beginning of a UTF-8 sequence.
|
||||||
|
// Current_after points one byte past the current byte to be examined.
|
||||||
|
for (const char* current_after = last; current_after != first; --current_after) {
|
||||||
|
const char* const current = current_after - 1;
|
||||||
|
const char ch = *current;
|
||||||
|
const char kHighBit = 0x80u;
|
||||||
|
const char kTwoHighestBits = 0xC0u;
|
||||||
|
if ((ch & kHighBit) == 0) { // high bit not set
|
||||||
|
// The buffer ends with a one-byte UTF-8 sequence, possibly followed by invalid trailing
|
||||||
|
// bytes with no leading byte, so return the entire buffer.
|
||||||
|
break;
|
||||||
|
} else if ((ch & kTwoHighestBits) == kTwoHighestBits) { // top two highest bits set
|
||||||
|
// Lead byte in UTF-8 sequence, so check if we have all the bytes in the sequence.
|
||||||
|
const size_t bytes_available = last - current;
|
||||||
|
if (bytes_available < utf8_codepoint_len(ch)) {
|
||||||
|
// We don't have all the bytes in the UTF-8 sequence, so return all the bytes
|
||||||
|
// preceding the current incomplete UTF-8 sequence and append the remaining bytes
|
||||||
|
// to remaining_bytes.
|
||||||
|
remaining_bytes->insert(remaining_bytes->end(), current, last);
|
||||||
|
return current - first;
|
||||||
|
} else {
|
||||||
|
// The buffer ends with a complete UTF-8 sequence, possibly followed by invalid
|
||||||
|
// trailing bytes with no lead byte, so return the entire buffer.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Trailing byte, so keep going backwards looking for the lead byte.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the size of the entire buffer. It is possible that we walked backward past invalid
|
||||||
|
// trailing bytes with no lead byte, in which case we want to return all those invalid bytes
|
||||||
|
// so that they can be processed.
|
||||||
|
return last - first;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bytes that have not yet been output to the console because they are incomplete UTF-8 sequences.
|
||||||
|
// Note that we use only one buffer even though stderr and stdout are logically separate streams.
|
||||||
|
// This matches the behavior of Linux.
|
||||||
|
// Protected by g_console_output_buffer_lock.
|
||||||
|
static auto& g_console_output_buffer = *new std::vector<char>();
|
||||||
|
|
||||||
|
// Internal helper function to write UTF-8 bytes to a console. Returns -1 on error.
|
||||||
|
static int _console_write_utf8(const char* const buf, const size_t buf_size, FILE* stream,
|
||||||
|
HANDLE console) {
|
||||||
|
const int saved_errno = errno;
|
||||||
|
std::vector<char> combined_buffer;
|
||||||
|
|
||||||
|
// Complete UTF-8 sequences that should be immediately written to the console.
|
||||||
|
const char* utf8;
|
||||||
|
size_t utf8_size;
|
||||||
|
|
||||||
|
adb_mutex_lock(&g_console_output_buffer_lock);
|
||||||
|
if (g_console_output_buffer.empty()) {
|
||||||
|
// If g_console_output_buffer doesn't have a buffered up incomplete UTF-8 sequence (the
|
||||||
|
// common case with plain ASCII), parse buf directly.
|
||||||
|
utf8 = buf;
|
||||||
|
utf8_size = internal::ParseCompleteUTF8(buf, buf + buf_size, &g_console_output_buffer);
|
||||||
|
} else {
|
||||||
|
// If g_console_output_buffer has a buffered up incomplete UTF-8 sequence, move it to
|
||||||
|
// combined_buffer (and effectively clear g_console_output_buffer) and append buf to
|
||||||
|
// combined_buffer, then parse it all together.
|
||||||
|
combined_buffer.swap(g_console_output_buffer);
|
||||||
|
combined_buffer.insert(combined_buffer.end(), buf, buf + buf_size);
|
||||||
|
|
||||||
|
utf8 = combined_buffer.data();
|
||||||
|
utf8_size = internal::ParseCompleteUTF8(utf8, utf8 + combined_buffer.size(),
|
||||||
|
&g_console_output_buffer);
|
||||||
|
}
|
||||||
|
adb_mutex_unlock(&g_console_output_buffer_lock);
|
||||||
|
|
||||||
|
std::wstring utf16;
|
||||||
|
|
||||||
|
// Try to convert from data that might be UTF-8 to UTF-16, ignoring errors (just like Linux
|
||||||
|
// which does not return an error on bad UTF-8). Data might not be UTF-8 if the user cat's
|
||||||
|
// random data, runs dmesg (which might have non-UTF-8), etc.
|
||||||
// This could throw std::bad_alloc.
|
// This could throw std::bad_alloc.
|
||||||
(void)android::base::UTF8ToWide(buf, size, &output);
|
(void)android::base::UTF8ToWide(utf8, utf8_size, &utf16);
|
||||||
|
|
||||||
// Note that this does not do \n => \r\n translation because that
|
// Note that this does not do \n => \r\n translation because that
|
||||||
// doesn't seem necessary for the Windows console. For the Windows
|
// doesn't seem necessary for the Windows console. For the Windows
|
||||||
|
|
@ -3639,16 +3729,16 @@ static int _console_write_utf8(const char* buf, size_t size, FILE* stream,
|
||||||
|
|
||||||
// Write UTF-16 to the console.
|
// Write UTF-16 to the console.
|
||||||
DWORD written = 0;
|
DWORD written = 0;
|
||||||
if (!WriteConsoleW(console, output.c_str(), output.length(), &written,
|
if (!WriteConsoleW(console, utf16.c_str(), utf16.length(), &written, NULL)) {
|
||||||
NULL)) {
|
|
||||||
errno = EIO;
|
errno = EIO;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is the number of UTF-16 chars written, which might be different
|
// Return the size of the original buffer passed in, signifying that we consumed it all, even
|
||||||
// than the number of UTF-8 chars passed in. It doesn't seem practical to
|
// if nothing was displayed, in the case of being passed an incomplete UTF-8 sequence. This
|
||||||
// get this count correct.
|
// matches the Linux behavior.
|
||||||
return written;
|
errno = saved_errno;
|
||||||
|
return buf_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function prototype because attributes cannot be placed on func definitions.
|
// Function prototype because attributes cannot be placed on func definitions.
|
||||||
|
|
@ -3660,14 +3750,21 @@ static int _console_vfprintf(const HANDLE console, FILE* stream,
|
||||||
// Returns -1 on error.
|
// Returns -1 on error.
|
||||||
static int _console_vfprintf(const HANDLE console, FILE* stream,
|
static int _console_vfprintf(const HANDLE console, FILE* stream,
|
||||||
const char *format, va_list ap) {
|
const char *format, va_list ap) {
|
||||||
|
const int saved_errno = errno;
|
||||||
std::string output_utf8;
|
std::string output_utf8;
|
||||||
|
|
||||||
// Format the string.
|
// Format the string.
|
||||||
// This could throw std::bad_alloc.
|
// This could throw std::bad_alloc.
|
||||||
android::base::StringAppendV(&output_utf8, format, ap);
|
android::base::StringAppendV(&output_utf8, format, ap);
|
||||||
|
|
||||||
return _console_write_utf8(output_utf8.c_str(), output_utf8.length(),
|
const int result = _console_write_utf8(output_utf8.c_str(), output_utf8.length(), stream,
|
||||||
stream, console);
|
console);
|
||||||
|
if (result != -1) {
|
||||||
|
errno = saved_errno;
|
||||||
|
} else {
|
||||||
|
// If -1 was returned, errno has been set.
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Version of vfprintf() that takes UTF-8 and can write Unicode to a
|
// Version of vfprintf() that takes UTF-8 and can write Unicode to a
|
||||||
|
|
@ -3689,6 +3786,11 @@ int adb_vfprintf(FILE *stream, const char *format, va_list ap) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Version of vprintf() that takes UTF-8 and can write Unicode to a Windows console.
|
||||||
|
int adb_vprintf(const char *format, va_list ap) {
|
||||||
|
return adb_vfprintf(stdout, format, ap);
|
||||||
|
}
|
||||||
|
|
||||||
// Version of fprintf() that takes UTF-8 and can write Unicode to a
|
// Version of fprintf() that takes UTF-8 and can write Unicode to a
|
||||||
// Windows console.
|
// Windows console.
|
||||||
int adb_fprintf(FILE *stream, const char *format, ...) {
|
int adb_fprintf(FILE *stream, const char *format, ...) {
|
||||||
|
|
@ -3716,6 +3818,7 @@ int adb_printf(const char *format, ...) {
|
||||||
int adb_fputs(const char* buf, FILE* stream) {
|
int adb_fputs(const char* buf, FILE* stream) {
|
||||||
// adb_fprintf returns -1 on error, which is conveniently the same as EOF
|
// adb_fprintf returns -1 on error, which is conveniently the same as EOF
|
||||||
// which fputs (and hence adb_fputs) should return on error.
|
// which fputs (and hence adb_fputs) should return on error.
|
||||||
|
static_assert(EOF == -1, "EOF is not -1, so this code needs to be fixed");
|
||||||
return adb_fprintf(stream, "%s", buf);
|
return adb_fprintf(stream, "%s", buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3723,32 +3826,32 @@ int adb_fputs(const char* buf, FILE* stream) {
|
||||||
// Windows console.
|
// Windows console.
|
||||||
int adb_fputc(int ch, FILE* stream) {
|
int adb_fputc(int ch, FILE* stream) {
|
||||||
const int result = adb_fprintf(stream, "%c", ch);
|
const int result = adb_fprintf(stream, "%c", ch);
|
||||||
if (result <= 0) {
|
if (result == -1) {
|
||||||
// If there was an error, or if nothing was printed (which should be an
|
|
||||||
// error), return an error, which fprintf signifies with EOF.
|
|
||||||
return EOF;
|
return EOF;
|
||||||
}
|
}
|
||||||
// For success, fputc returns the char, cast to unsigned char, then to int.
|
// For success, fputc returns the char, cast to unsigned char, then to int.
|
||||||
return static_cast<unsigned char>(ch);
|
return static_cast<unsigned char>(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Version of putchar() that takes UTF-8 and can write Unicode to a Windows console.
|
||||||
|
int adb_putchar(int ch) {
|
||||||
|
return adb_fputc(ch, stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Version of puts() that takes UTF-8 and can write Unicode to a Windows console.
|
||||||
|
int adb_puts(const char* buf) {
|
||||||
|
// adb_printf returns -1 on error, which is conveniently the same as EOF
|
||||||
|
// which puts (and hence adb_puts) should return on error.
|
||||||
|
static_assert(EOF == -1, "EOF is not -1, so this code needs to be fixed");
|
||||||
|
return adb_printf("%s\n", buf);
|
||||||
|
}
|
||||||
|
|
||||||
// Internal function to write UTF-8 to a Win32 console. Returns the number of
|
// Internal function to write UTF-8 to a Win32 console. Returns the number of
|
||||||
// items (of length size) written. On error, returns a short item count or 0.
|
// items (of length size) written. On error, returns a short item count or 0.
|
||||||
static size_t _console_fwrite(const void* ptr, size_t size, size_t nmemb,
|
static size_t _console_fwrite(const void* ptr, size_t size, size_t nmemb,
|
||||||
FILE* stream, HANDLE console) {
|
FILE* stream, HANDLE console) {
|
||||||
// TODO: Note that a Unicode character could be several UTF-8 bytes. But
|
const int result = _console_write_utf8(reinterpret_cast<const char*>(ptr), size * nmemb, stream,
|
||||||
// if we're passed only some of the bytes of a character (for example, from
|
console);
|
||||||
// the network socket for adb shell), we won't be able to convert the char
|
|
||||||
// to a complete UTF-16 char (or surrogate pair), so the output won't look
|
|
||||||
// right.
|
|
||||||
//
|
|
||||||
// To fix this, see libutils/Unicode.cpp for hints on decoding UTF-8.
|
|
||||||
//
|
|
||||||
// For now we ignore this problem because the alternative is that we'd have
|
|
||||||
// to parse UTF-8 and buffer things up (doable). At least this is better
|
|
||||||
// than what we had before -- always incorrect multi-byte UTF-8 output.
|
|
||||||
int result = _console_write_utf8(reinterpret_cast<const char*>(ptr),
|
|
||||||
size * nmemb, stream, console);
|
|
||||||
if (result == -1) {
|
if (result == -1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -137,3 +137,60 @@ TEST(sysdeps_win32, unix_isatty) {
|
||||||
// Make sure an invalid FD is handled correctly.
|
// Make sure an invalid FD is handled correctly.
|
||||||
EXPECT_EQ(0, unix_isatty(-1));
|
EXPECT_EQ(0, unix_isatty(-1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TestParseCompleteUTF8(const char* buf, const size_t buf_size,
|
||||||
|
const size_t expected_complete_bytes,
|
||||||
|
const std::vector<char>& expected_remaining_bytes) {
|
||||||
|
std::vector<char> remaining_bytes;
|
||||||
|
const size_t complete_bytes = internal::ParseCompleteUTF8(buf, buf + buf_size,
|
||||||
|
&remaining_bytes);
|
||||||
|
EXPECT_EQ(expected_complete_bytes, complete_bytes);
|
||||||
|
EXPECT_EQ(expected_remaining_bytes, remaining_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(sysdeps_win32, ParseCompleteUTF8) {
|
||||||
|
const std::vector<std::vector<char>> multi_byte_sequences = {
|
||||||
|
{ '\xc2', '\xa9' }, // 2 byte UTF-8 sequence
|
||||||
|
{ '\xe1', '\xb4', '\xa8' }, // 3 byte UTF-8 sequence
|
||||||
|
{ '\xf0', '\x9f', '\x98', '\x80' }, // 4 byte UTF-8 sequence
|
||||||
|
};
|
||||||
|
std::vector<std::vector<char>> all_sequences = {
|
||||||
|
{}, // 0 bytes
|
||||||
|
{ '\0' }, // NULL byte
|
||||||
|
{ 'a' }, // 1 byte UTF-8 sequence
|
||||||
|
};
|
||||||
|
all_sequences.insert(all_sequences.end(), multi_byte_sequences.begin(),
|
||||||
|
multi_byte_sequences.end());
|
||||||
|
|
||||||
|
// Vary a prefix of bytes in front of the sequence that we're actually interested in parsing.
|
||||||
|
for (const auto& prefix : all_sequences) {
|
||||||
|
// Parse (prefix + one byte of the sequence at a time)
|
||||||
|
for (const auto& seq : multi_byte_sequences) {
|
||||||
|
std::vector<char> buffer(prefix);
|
||||||
|
|
||||||
|
// For every byte of the sequence except the last
|
||||||
|
for (size_t i = 0; i < seq.size() - 1; ++i) {
|
||||||
|
buffer.push_back(seq[i]);
|
||||||
|
|
||||||
|
// When parsing an incomplete UTF-8 sequence, the amount of the buffer preceding
|
||||||
|
// the start of the incomplete UTF-8 sequence is valid. The remaining bytes are the
|
||||||
|
// bytes of the incomplete UTF-8 sequence.
|
||||||
|
TestParseCompleteUTF8(buffer.data(), buffer.size(), prefix.size(),
|
||||||
|
std::vector<char>(seq.begin(), seq.begin() + i + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the last byte of the sequence
|
||||||
|
buffer.push_back(seq.back());
|
||||||
|
TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse (prefix (aka sequence) + invalid trailing bytes) to verify that the invalid
|
||||||
|
// trailing bytes are immediately "returned" to prevent them from being stuck in some
|
||||||
|
// buffer.
|
||||||
|
std::vector<char> buffer(prefix);
|
||||||
|
for (size_t i = 0; i < 8; ++i) {
|
||||||
|
buffer.push_back(0x80); // trailing byte
|
||||||
|
TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue