-
Notifications
You must be signed in to change notification settings - Fork 55
/
unicode.h
83 lines (73 loc) · 1.84 KB
/
unicode.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <Arduino.h>
#ifndef PORTALCALENDAR_UTF8ITERATOR_H
#define PORTALCALENDAR_UTF8ITERATOR_H
/**
* A very simple UTF8 iterator based on https://github.com/nemtrif/utfcpp
*
* Only handles code points in the basic multilingual plane, and does not validate fully against invalid sequences.
*/
class Utf8Iterator
{
public:
Utf8Iterator(String str)
{
_str = str;
_pos = 0;
}
uint16_t next()
{
if (_pos >= _str.length()) {
return 0;
}
uint8_t len = sequenceLength(_str[_pos]);
uint16_t cp = _str[_pos];
switch (len) {
case 1:
++_pos;
return cp;
case 2:
cp = ((cp << 6) & 0x7FF) + (_str[++_pos] & 0x3F);
++_pos;
return cp;
case 3:
cp = ((cp << 12) & 0xFFFF) + ((_str[++_pos] << 6) & 0xFFF);
cp += _str[++_pos] & 0x3F;
++_pos;
return cp;
case 4:
// Code points over U+FFFF are not supported
_pos += 4;
return 0xFFFD;
default:
return 0;
}
}
unsigned int getCurrentPosition()
{
return _pos;
}
private:
unsigned int _pos;
String _str;
static uint8_t sequenceLength(uint8_t lead)
{
if (lead < 0x80) {
return 1;
} else if ((lead >> 5) == 0x6) {
return 2;
} else if ((lead >> 4) == 0xE) {
return 3;
} else if ((lead >> 3) == 0x1E) {
return 4;
} else {
return 0;
}
}
};
bool isSpaceCodePoint(uint16_t cp) {
return cp == 0x9 || cp == 0x20 || cp == 0xA0;
};
bool isNewlineCodePoint(uint16_t cp) {
return cp == 0xA;
};
#endif // PORTALCALENDAR_UTF8ITERATOR_H