Merge "Remove unused utf8_length()." am: 52dac2f8b7 am: 0ee0648569 am: a7f8cdbfff
Original change: https://android-review.googlesource.com/c/platform/system/core/+/1474916 Change-Id: I0979c1e0aff9016266efdce738dc92934bd37c3e
This commit is contained in:
commit
6add13eba2
2 changed files with 0 additions and 61 deletions
|
|
@ -359,49 +359,6 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_le
|
|||
// UTF-8
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
ssize_t utf8_length(const char *src)
|
||||
{
|
||||
const char *cur = src;
|
||||
size_t ret = 0;
|
||||
while (*cur != '\0') {
|
||||
const char first_char = *cur++;
|
||||
if ((first_char & 0x80) == 0) { // ASCII
|
||||
ret += 1;
|
||||
continue;
|
||||
}
|
||||
// (UTF-8's character must not be like 10xxxxxx,
|
||||
// but 110xxxxx, 1110xxxx, ... or 1111110x)
|
||||
if ((first_char & 0x40) == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t mask, to_ignore_mask;
|
||||
size_t num_to_read = 0;
|
||||
char32_t utf32 = 0;
|
||||
for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
|
||||
num_to_read < 5 && (first_char & mask);
|
||||
num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
|
||||
if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx
|
||||
return -1;
|
||||
}
|
||||
// 0x3F == 00111111
|
||||
utf32 = (utf32 << 6) + (*cur++ & 0x3F);
|
||||
}
|
||||
// "first_char" must be (110xxxxx - 11110xxx)
|
||||
if (num_to_read == 5) {
|
||||
return -1;
|
||||
}
|
||||
to_ignore_mask |= mask;
|
||||
utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
|
||||
if (utf32 > kUnicodeMaxCodepoint) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret += num_to_read;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
|
||||
{
|
||||
if (src == nullptr || src_len == 0) {
|
||||
|
|
|
|||
|
|
@ -110,24 +110,6 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len);
|
|||
*/
|
||||
void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len);
|
||||
|
||||
/**
|
||||
* Returns the length of "src" when "src" is valid UTF-8 string.
|
||||
* Returns 0 if src is NULL or 0-length string. Returns -1 when the source
|
||||
* is an invalid string.
|
||||
*
|
||||
* This function should be used to determine whether "src" is valid UTF-8
|
||||
* characters with valid unicode codepoints. "src" must be nul-terminated.
|
||||
*
|
||||
* If you are going to use other utf8_to_... functions defined in this header
|
||||
* with string which may not be valid UTF-8 with valid codepoint (form 0 to
|
||||
* 0x10FFFF), you should use this function before calling others, since the
|
||||
* other functions do not check whether the string is valid UTF-8 or not.
|
||||
*
|
||||
* If you do not care whether "src" is valid UTF-8 or not, you should use
|
||||
* strlen() as usual, which should be much faster.
|
||||
*/
|
||||
ssize_t utf8_length(const char *src);
|
||||
|
||||
/**
|
||||
* Returns the UTF-16 length of UTF-8 string "src". Returns -1 in case
|
||||
* it's invalid utf8. No buffer over-read occurs because of bound checks. Using overreadIsFatal you
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue