rapidyaml 0.14.0
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
utf.hpp
Go to the documentation of this file.
1#ifndef C4_UTF_HPP_
2#define C4_UTF_HPP_
3
4#include "c4/language.hpp"
5#include "c4/substr_fwd.hpp"
6#include "c4/export.hpp"
7#include <stddef.h>
8#include <stdint.h>
9
10/** @file utf.hpp utilities for UTF and Byte Order Mark */
11
12namespace c4 {
13
14/** @defgroup doc_utf UTF utilities
15 * @{ */
16
17
18/** skip the Byte Order Mark, or get the full string if there is Byte Order Mark.
19 * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
20C4CORE_EXPORT substr skip_bom(substr s);
21/** skip the Byte Order Mark, or get the full string if there is Byte Order Mark
22 * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
23C4CORE_EXPORT csubstr skip_bom(csubstr s);
24
25
26/** get the Byte Order Mark, or an empty string if there is no Byte Order Mark
27 * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
28C4CORE_EXPORT substr get_bom(substr s);
29/** get the Byte Order Mark, or an empty string if there is no Byte Order Mark
30 * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
31C4CORE_EXPORT csubstr get_bom(csubstr s);
32
33
34/** return the position of the first character not belonging to the
35 * Byte Order Mark, or 0 if there is no Byte Order Mark.
36 * @see Implements the Byte Order Marks as described in https://en.wikipedia.org/wiki/Byte_order_mark#Byte-order_marks_by_encoding */
37C4CORE_EXPORT size_t first_non_bom(csubstr s);
38
39
40/** decode the given @p code_point, writing into the output string in
41 * @p out.
42 *
43 * @param out the output string. must have at least 4 bytes (this is
44 * asserted), and must not have a null string.
45 *
46 * @param code_point: must have length in ]0,8], and must not begin
47 * with any of @verbatim `U+`,`\x`,`\u`,`\U`,`0` @endverbatim (asserted)
48 *
49 * @return the part of @p out that was written, which will always be
50 * at most 4 bytes.
51 */
52C4CORE_EXPORT substr decode_code_point(substr out, csubstr code_point);
53
54/** decode the given @p code point, writing into the output string @p
55 * buf, of size @p buflen
56 *
57 * @param buf the output string. must have at least 4 bytes (this is
58 * asserted), and must not be null
59 *
60 * @param buflen the length of the output string. must be at least 4
61 *
62 * @param code: the code point must have length in ]0,8], and must not begin
63 * with any of @verbatim `U+`,`\x`,`\u`,`\U`,`0` @endverbatim (asserted)
64 *
65 * @return the number of written characters, which will always be
66 * at most 4 bytes.
67 */
68C4CORE_EXPORT size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, uint32_t code);
69
70/** @} */
71
72} // namespace c4
73
74#endif // C4_UTF_HPP_
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2356
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2357
substr skip_bom(substr s)
skip the Byte Order Mark, or get the full string if there is Byte Order Mark.
substr get_bom(substr s)
get the Byte Order Mark, or an empty string if there is no Byte Order Mark
size_t first_non_bom(csubstr s)
return the position of the first character not belonging to the Byte Order Mark, or 0 if there is no ...
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition common.cpp:14