rapidyaml  0.12.1
parse and emit YAML, and do it fast
utf.hpp
Go to the documentation of this file.
1 #ifndef C4_UTF_HPP_
2 #define C4_UTF_HPP_
3 
4 #include "c4/language.hpp"
5 #include "c4/substr_fwd.hpp"
6 #include "c4/export.hpp"
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 /** @file utf.hpp utilities for UTF and Byte Order Mark */
11 
12 namespace c4 {
13 
14 /** @defgroup doc_utf UTF utilities
15  * @{ */
16 
17 
18 /** skip the Byte Order Mark, or get the full string if there is Byte Order Mark.
19  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
20 C4CORE_EXPORT substr skip_bom(substr s);
21 /** skip the Byte Order Mark, or get the full string if there is Byte Order Mark
22  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
23 C4CORE_EXPORT csubstr skip_bom(csubstr s);
24 
25 
26 /** get the Byte Order Mark, or an empty string if there is no Byte Order Mark
27  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
28 C4CORE_EXPORT substr get_bom(substr s);
29 /** get the Byte Order Mark, or an empty string if there is no Byte Order Mark
30  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
31 C4CORE_EXPORT csubstr get_bom(csubstr s);
32 
33 
34 /** return the position of the first character not belonging to the
35  * Byte Order Mark, or 0 if there is no Byte Order Mark.
36  * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
37 C4CORE_EXPORT size_t first_non_bom(csubstr s);
38 
39 
40 /** decode the given @p code_point, writing into the output string in
41  * @p out.
42  *
43  * @param out the output string. must have at least 4 bytes (this is
44  * asserted), and must not have a null string.
45  *
46  * @param code_point: must have length in ]0,8], and must not begin
47  * with any of @verbatim `U+`,`\x`,`\u`,`\U`,`0` @endverbatim (asserted)
48  *
49  * @return the part of @p out that was written, which will always be
50  * at most 4 bytes.
51  */
52 C4CORE_EXPORT substr decode_code_point(substr out, csubstr code_point);
53 
54 /** decode the given @p code point, writing into the output string @p
55  * buf, of size @p buflen
56  *
57  * @param buf the output string. must have at least 4 bytes (this is
58  * asserted), and must not be null
59  *
60  * @param buflen the length of the output string. must be at least 4
61  *
62  * @param code: the code point must have length in ]0,8], and must not begin
63  * with any of @verbatim `U+`,`\x`,`\u`,`\U`,`0` @endverbatim (asserted)
64  *
65  * @return the number of written characters, which will always be
66  * at most 4 bytes.
67  */
68 C4CORE_EXPORT size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, uint32_t code);
69 
70 /** @} */
71 
72 } // namespace c4
73 
74 #endif // C4_UTF_HPP_
substr skip_bom(substr s)
skip the Byte Order Mark, or get the full string if there is Byte Order Mark.
substr get_bom(substr s)
get the Byte Order Mark, or an empty string if there is no Byte Order Mark
size_t first_non_bom(csubstr s)
return the position of the first character not belonging to the Byte Order Mark, or 0 if there is no ...
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14