rapidyaml 0.14.0
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
escape_scalar.hpp
Go to the documentation of this file.
1#ifndef _C4_YML_ESCAPE_SCALAR_HPP_
2#define _C4_YML_ESCAPE_SCALAR_HPP_
3
4#ifndef _C4_YML_COMMON_HPP_
5#include "c4/yml/common.hpp"
6#endif
7
8namespace c4 {
9namespace yml {
10
11
12/** Iterate through a scalar and escape special characters in it. This
13 * function takes a callback (which accepts a single parameter of
14 * csubstr type) and, while processing, calls this callback as
15 * appropriate, passing ranges of the scalar and/or escaped
16 * characters.
17 *
18 * @param fn a sink function receiving a csubstr
19 * @param scalar the scalar to be escaped
20 * @param keep_newlines when true, `\n` will be escaped as `\\n\n` instead of just `\\n`
21 *
22 * Example usage:
23 *
24 * ```c++
25 * // escape to stdout
26 * void escape_scalar(FILE *file, csubstr scalar)
27 * {
28 * auto print_ = [](csubstr repl){
29 * fwrite(repl.len, 1, repl.str, file);
30 * };
31 * escape_scalar_fn(std::ref(print_), scalar);
32 * }
33 *
34 * // escape to a different buffer and return the required buffer size
35 * size_t escape_scalar(substr buffer, csubstr scalar)
36 * {
37 * C4_ASSERT(!buffer.overlaps(scalar));
38 * size_t pos = 0;
39 * auto _append = [&](csubstr repl){
40 * if(repl.len && (pos + repl.len <= buffer.len))
41 * memcpy(buffer.str + pos, repl.str, repl.len);
42 * pos += repl.len;
43 * };
44 * escape_scalar_fn(std::ref(_append), scalar);
45 * return pos;
46 * }
47 * ```
48 */
49template<class Fn>
50C4_NO_INLINE void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
51{
52 size_t prev = 0; // the last position that was flushed
53 size_t skip = 0; // how much to add to prev
54 csubstr repl; // replacement string
55 bool newl = false; // to add a newline
56 // cast to u8 to avoid having to deal with negative
57 // signed chars (which are present in some platforms)
58 uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str); // NOLINT(*-reinterpret-cast)
59 // NOLINTBEGIN(*-goto,bugprone-use-after-move,hicpp-invalid-access-moved)
60 for(size_t i = 0; i < scalar.len; ++i)
61 {
62 switch(s[i])
63 {
64 case UINT8_C(0x0a): // \n
65 repl = "\\n";
66 skip = 1;
67 if(keep_newlines)
68 newl = true;
69 goto flush_now;
70 case UINT8_C(0x5c): // '\\'
71 repl = "\\\\";
72 skip = 1;
73 goto flush_now;
74 case UINT8_C(0x09): // \t
75 repl = "\\t";
76 skip = 1;
77 goto flush_now;
78 case UINT8_C(0x0d): // \r
79 repl = "\\r";
80 skip = 1;
81 goto flush_now;
82 case UINT8_C(0x00): // \0
83 repl = "\\0";
84 skip = 1;
85 goto flush_now;
86 case UINT8_C(0x0c): // \f (form feed)
87 repl = "\\f";
88 skip = 1;
89 goto flush_now;
90 case UINT8_C(0x08): // \b (backspace)
91 repl = "\\b";
92 skip = 1;
93 goto flush_now;
94 case UINT8_C(0x07): // \a (bell)
95 repl = "\\a";
96 skip = 1;
97 goto flush_now;
98 case UINT8_C(0x0b): // \v (vertical tab)
99 repl = "\\v";
100 skip = 1;
101 goto flush_now;
102 case UINT8_C(0x1b): // \e (escape)
103 repl = "\\e";
104 skip = 1;
105 goto flush_now;
106 case UINT8_C(0xc2): // AKA -0x3e
107 if(i+1 < scalar.len)
108 {
109 if(s[i+1] == UINT8_C(0xa0)) // AKA -0x60
110 {
111 repl = "\\_";
112 skip = 2;
113 goto flush_now;
114 }
115 else if(s[i+1] == UINT8_C(0x85)) // AKA -0x7b
116 {
117 repl = "\\N";
118 skip = 2;
119 goto flush_now;
120 }
121 }
122 continue;
123 case UINT8_C(0xe2): // AKA -0x1e
124 if(i+2 < scalar.len)
125 {
126 if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
127 {
128 if(s[i+2] == UINT8_C(0xa8)) // AKA -0x58
129 {
130 repl = "\\L";
131 skip = 3;
132 goto flush_now;
133 }
134 else if(s[i+2] == UINT8_C(0xa9)) // AKA -0x57
135 {
136 repl = "\\P";
137 skip = 3;
138 goto flush_now;
139 }
140 }
141 }
142 continue;
143 default:
144 continue;
145 }
146 flush_now:
147 std::forward<Fn>(fn)(scalar.range(prev, i));
148 std::forward<Fn>(fn)(repl);
149 if(newl)
150 {
151 std::forward<Fn>(fn)("\n");
152 newl = false;
153 }
154 prev = i + skip;
155 }
156 // flush the rest
157 if(scalar.len > prev)
158 std::forward<Fn>(fn)(scalar.sub(prev));
159 // NOLINTEND(*-goto,bugprone-use-after-move,hicpp-invalid-access-moved)
160}
161
162
163C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
164
165/** Adjust a position in a scalar, increasing it to account for any
166 * escaped characters.
167 *
168 * @note This is a utility/debugging function, so it is provided in
169 * this optional header. For this reason, we inline it to obey to the
170 * One Definition Rule. But then we set the noinline attribute to
171 * ensure they are not inlined in calling code. */
172inline C4_NO_INLINE size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
173{
174 // cast to u8 to avoid having to deal with negative
175 // signed chars (which are present in some platforms)
176 uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str); // NOLINT(*-reinterpret-cast)
177 const size_t newbump = keep_newlines ? 2 : 1;
178 size_t ret = 0;
179 size_t excess = pos > scalar.len ? pos - scalar.len : 0;
180 pos = pos < scalar.len ? pos : scalar.len;
181 for(size_t i = 0; i < pos; ++i)
182 {
183 ++ret;
184 switch(s[i])
185 {
186 case UINT8_C(0x5c): // '\\'
187 case UINT8_C(0x09): // \t
188 case UINT8_C(0x0d): // \r
189 case UINT8_C(0x00): // \0
190 case UINT8_C(0x0c): // \f (form feed)
191 case UINT8_C(0x08): // \b (backspace)
192 case UINT8_C(0x07): // \a (bell)
193 case UINT8_C(0x0b): // \v (vertical tab)
194 case UINT8_C(0x1b): // \e (escape)
195 ++ret; // add the backslash
196 break;
197 case UINT8_C(0x0a): // \n
198 ret += newbump;
199 break;
200 case UINT8_C(0xc2): // AKA -0x3e
201 if(i+1 < scalar.len)
202 {
203 if(s[i+1] == UINT8_C(0xa0) // AKA -0x60 -> \_
204 ||
205 s[i+1] == UINT8_C(0x85)) // AKA -0x7b -> \N
206 {
207 ++ret;
208 ++i; // skip the next entry
209 }
210 }
211 break;
212 case UINT8_C(0xe2): // AKA -0x1e
213 if(i+2 < scalar.len)
214 {
215 if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
216 {
217 if(s[i+2] == UINT8_C(0xa8) // AKA -0x58 -> \L
218 ||
219 s[i+2] == UINT8_C(0xa9)) // AKA -0x57 -> \P
220 {
221 ++ret;
222 i += 2; // skip the next two entries
223 }
224 }
225 }
226 break;
227 default:
228 break;
229 }
230 }
231 return ret + excess;
232}
233
234
235/** Escape a scalar to an existing buffer, using @ref escape_scalar_fn
236 *
237 * @note This is a utility/debugging function, so it is provided in
238 * this optional header. For this reason, we inline it to obey to the
239 * One Definition Rule. But then we set the noinline attribute to
240 * ensure they are not inlined in calling code. */
241inline C4_NO_INLINE size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
242{
243 size_t pos = 0;
244 C4_ASSERT(!buffer.overlaps(scalar));
245 auto append_ = [&pos, &buffer](csubstr repl){
246 if(repl.len && (pos + repl.len <= buffer.len))
247 memcpy(buffer.str + pos, repl.str, repl.len);
248 pos += repl.len;
249 };
250 escape_scalar_fn(append_, scalar, keep_newlines);
251 return pos;
252}
253
254
255/** formatting helper to escape a scalar with @ref escape_scalar_fn() */
257{
258 escaped_scalar(csubstr s, bool keep_newl=false) : scalar(s), keep_newlines(keep_newl) {}
261};
262
263/** formatting implementation to escape a scalar with @ref escape_scalar() */
264inline C4_NO_INLINE size_t to_chars(substr buf, escaped_scalar e)
265{
266 return escape_scalar(buf, e.scalar, e.keep_newlines);
267}
268/** dumping implementation to escape a scalar with @ref escape_scalar_fn() */
269template<class SinkPfn>
270C4_NO_INLINE size_t dump(SinkPfn &&sinkfn, substr buf, escaped_scalar const& e)
271{
272 (void)buf;
273 C4_ASSERT(!buf.overlaps(e.scalar));
274 escape_scalar_fn(std::forward<SinkPfn>(sinkfn), e.scalar, e.keep_newlines);
275 return 0;
276}
277
278C4_SUPPRESS_WARNING_GCC_POP
279
280} // namespace yml
281} // namespace c4
282
283#endif /* _C4_YML_ESCAPE_SCALAR_HPP_ */
Common utilities and infrastructure used by ryml.
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2356
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2357
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
Escape a scalar to an existing buffer, using escape_scalar_fn.
void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
Iterate through a scalar and escape special characters in it.
size_t dump(SinkPfn &&sinkfn, substr buf, escaped_scalar const &e)
dumping implementation to escape a scalar with escape_scalar_fn()
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition common.cpp:14
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition substr.hpp:520
size_t len
the length of the substring
Definition substr.hpp:218
bool overlaps(ro_substr const that) const noexcept
true if there is overlap of at least one element between that and *this
Definition substr.hpp:494
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition substr.hpp:503
C * str
a restricted pointer to the first character of the substring
Definition substr.hpp:216
formatting helper to escape a scalar with escape_scalar_fn()
escaped_scalar(csubstr s, bool keep_newl=false)