rapidyaml  0.9.0
parse and emit YAML, and do it fast
utf.hpp
Go to the documentation of this file.
1 #ifndef C4_UTF_HPP_
2 #define C4_UTF_HPP_
3 
4 #include "c4/language.hpp"
5 #include "c4/substr_fwd.hpp"
6 #include <stddef.h>
7 #include <stdint.h>
8 
9 /** @file utf.hpp utilities for UTF and Byte Order Mark */
10 
11 namespace c4 {
12 
13 /** @defgroup doc_utf UTF utilities
14  * @{ */
15 
16 
17 /** skip the Byte Order Mark, or get the full string if there is Byte Order Mark.
18  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
19 C4CORE_EXPORT substr skip_bom(substr s);
20 /** skip the Byte Order Mark, or get the full string if there is Byte Order Mark
21  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
22 C4CORE_EXPORT csubstr skip_bom(csubstr s);
23 
24 
25 /** get the Byte Order Mark, or an empty string if there is no Byte Order Mark
26  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
27 C4CORE_EXPORT substr get_bom(substr s);
28 /** get the Byte Order Mark, or an empty string if there is no Byte Order Mark
29  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
30 C4CORE_EXPORT csubstr get_bom(csubstr s);
31 
32 
33 /** return the position of the first character not belonging to the
34  * Byte Order Mark, or 0 if there is no Byte Order Mark.
35  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
36 C4CORE_EXPORT size_t first_non_bom(csubstr s);
37 
38 
39 /** decode the given @p code_point, writing into the output string in
40  * @p out.
41  *
42  * @param out the output string. must have at least 4 bytes (this is
43  * asserted), and must not have a null string.
44  *
45  * @param code_point: must have length in ]0,8], and must not begin
46  * with any of `U+`,`\\x`,`\\u,`\\U`,`0` (asserted)
47  *
48  * @return the part of @p out that was written, which will always be
49  * at most 4 bytes.
50  */
51 C4CORE_EXPORT substr decode_code_point(substr out, csubstr code_point);
52 
53 /** decode the given @p code point, writing into the output string @p
54  * buf, of size @p buflen
55  *
56  * @param buf the output string. must have at least 4 bytes (this is
57  * asserted), and must not be null
58  *
59  * @param buflen the length of the output string. must be at least 4
60  *
61  * @param code: the code point must have length in ]0,8], and must not begin
62  * with any of `U+`,`\\x`,`\\u,`\\U`,`0` (asserted)
63  *
64  * @return the part of @p out that was written, which will always be
65  * at most 4 bytes.
66  */
67 size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, uint32_t code);
68 
69 /** @} */
70 
71 } // namespace c4
72 
73 #endif // C4_UTF_HPP_
substr skip_bom(substr s)
skip the Byte Order Mark, or get the full string if there is Byte Order Mark.
substr get_bom(substr s)
get the Byte Order Mark, or an empty string if there is no Byte Order Mark
size_t first_non_bom(csubstr s)
return the position of the first character not belonging to the Byte Order Mark, or 0 if there is no ...
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
Definition: common.cpp:12