]>
Commit | Line | Data |
---|---|---|
0bb51450 OM |
1 | /**************************************************************************** |
2 | ** | |
3 | ** Copyright (C) 2017 Intel Corporation | |
4 | ** | |
5 | ** Permission is hereby granted, free of charge, to any person obtaining a copy | |
6 | ** of this software and associated documentation files (the "Software"), to deal | |
7 | ** in the Software without restriction, including without limitation the rights | |
8 | ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
9 | ** copies of the Software, and to permit persons to whom the Software is | |
10 | ** furnished to do so, subject to the following conditions: | |
11 | ** | |
12 | ** The above copyright notice and this permission notice shall be included in | |
13 | ** all copies or substantial portions of the Software. | |
14 | ** | |
15 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
18 | ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 | ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
21 | ** THE SOFTWARE. | |
22 | ** | |
23 | ****************************************************************************/ | |
24 | ||
25 | #ifndef CBOR_UTF8_H | |
26 | #define CBOR_UTF8_H | |
27 | ||
28 | #include "compilersupport_p.h" | |
29 | ||
30 | #include <stdint.h> | |
31 | ||
32 | static inline uint32_t get_utf8(const uint8_t **buffer, const uint8_t *end) | |
33 | { | |
34 | int charsNeeded; | |
35 | uint32_t uc, min_uc; | |
36 | uint8_t b; | |
37 | ptrdiff_t n = end - *buffer; | |
38 | if (n == 0) | |
39 | return ~0U; | |
40 | ||
41 | uc = *(*buffer)++; | |
42 | if (uc < 0x80) { | |
43 | /* single-byte UTF-8 */ | |
44 | return uc; | |
45 | } | |
46 | ||
47 | /* multi-byte UTF-8, decode it */ | |
48 | if (unlikely(uc <= 0xC1)) | |
49 | return ~0U; | |
50 | if (uc < 0xE0) { | |
51 | /* two-byte UTF-8 */ | |
52 | charsNeeded = 2; | |
53 | min_uc = 0x80; | |
54 | uc &= 0x1f; | |
55 | } else if (uc < 0xF0) { | |
56 | /* three-byte UTF-8 */ | |
57 | charsNeeded = 3; | |
58 | min_uc = 0x800; | |
59 | uc &= 0x0f; | |
60 | } else if (uc < 0xF5) { | |
61 | /* four-byte UTF-8 */ | |
62 | charsNeeded = 4; | |
63 | min_uc = 0x10000; | |
64 | uc &= 0x07; | |
65 | } else { | |
66 | return ~0U; | |
67 | } | |
68 | ||
69 | if (n < charsNeeded) | |
70 | return ~0U; | |
71 | ||
72 | /* first continuation character */ | |
73 | b = *(*buffer)++; | |
74 | if ((b & 0xc0) != 0x80) | |
75 | return ~0U; | |
76 | uc <<= 6; | |
77 | uc |= b & 0x3f; | |
78 | ||
79 | if (charsNeeded > 2) { | |
80 | /* second continuation character */ | |
81 | b = *(*buffer)++; | |
82 | if ((b & 0xc0) != 0x80) | |
83 | return ~0U; | |
84 | uc <<= 6; | |
85 | uc |= b & 0x3f; | |
86 | ||
87 | if (charsNeeded > 3) { | |
88 | /* third continuation character */ | |
89 | b = *(*buffer)++; | |
90 | if ((b & 0xc0) != 0x80) | |
91 | return ~0U; | |
92 | uc <<= 6; | |
93 | uc |= b & 0x3f; | |
94 | } | |
95 | } | |
96 | ||
97 | /* overlong sequence? surrogate pair? out or range? */ | |
98 | if (uc < min_uc || uc - 0xd800U < 2048U || uc > 0x10ffff) | |
99 | return ~0U; | |
100 | ||
101 | return uc; | |
102 | } | |
103 | ||
104 | #endif /* CBOR_UTF8_H */ |