393. UTF-8 Validation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class Solution {
public boolean validUtf8(int[] data) {
int index = 0;

while (index < data.length) {
int num = data[index];
if ((num >> 7 & 1) == 0) {
// one byte
index++;
} else if ((num & 0b11000000) == 0b11000000 && (num >> 5 & 1) == 0 &&
index + 1 < data.length && (data[index + 1] >> 7 & 1) == 1 && (data[index + 1] >> 6 & 1) == 0) {
// two bytes
index += 2;
} else if ((num & 0b11100000) == 0b11100000 && (num >> 4 & 1) == 0 &&
index + 1 < data.length && (data[index + 1] >> 7 & 1) == 1 && (data[index + 1] >> 6 & 1) == 0 &&
index + 2 < data.length && (data[index + 2] >> 7 & 1) == 1 && (data[index + 2] >> 6 & 1) == 0) {
// three bytes
index += 3;
} else if ((num & 0b11110000) == 0b11110000 && (num >> 3 & 1) == 0 &&
index + 1 < data.length && (data[index + 1] >> 7 & 1) == 1 && (data[index + 1] >> 6 & 1) == 0 &&
index + 2 < data.length && (data[index + 2] >> 7 & 1) == 1 && (data[index + 2] >> 6 & 1) == 0 &&
index + 3 < data.length && (data[index + 3] >> 7 & 1) == 1 && (data[index + 3] >> 6 & 1) == 0) {
// four bytes
index += 4;
} else {
break;
}
}

return index == data.length;
}
}

References

393. UTF-8 Validation