rapidyaml  0.12.0
parse and emit YAML, and do it fast
escape_scalar.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_ESCAPE_SCALAR_HPP_
2 #define _C4_YML_ESCAPE_SCALAR_HPP_
3 
4 #ifndef _C4_YML_COMMON_HPP_
5 #include "c4/yml/common.hpp"
6 #endif
7 
8 namespace c4 {
9 namespace yml {
10 
11 
12 /** Iterate through a scalar and escape special characters in it. This
13  * function takes a callback (which accepts a single parameter of
14  * csubstr type) and, while processing, calls this callback as
15  * appropriate, passing ranges of the scalar and/or escaped
16  * characters.
17  *
18  * @param fn a sink function receiving a csubstr
19  * @param scalar the scalar to be escaped
20  * @param keep_newlines when true, `\n` will be escaped as `\\n\n` instead of just `\\n`
21  *
22  * Example usage:
23  *
24  * ```c++
25  * // escape to stdout
26  * void escape_scalar(FILE *file, csubstr scalar)
27  * {
28  * auto print_ = [](csubstr repl){
29  * fwrite(repl.len, 1, repl.str, file);
30  * };
31  * escape_scalar_fn(std::ref(print_), scalar);
32  * }
33  *
34  * // escape to a different buffer and return the required buffer size
35  * size_t escape_scalar(substr buffer, csubstr scalar)
36  * {
37  * C4_ASSERT(!buffer.overlaps(scalar));
38  * size_t pos = 0;
39  * auto _append = [&](csubstr repl){
40  * if(repl.len && (pos + repl.len <= buffer.len))
41  * memcpy(buffer.str + pos, repl.str, repl.len);
42  * pos += repl.len;
43  * };
44  * escape_scalar_fn(std::ref(_append), scalar);
45  * return pos;
46  * }
47  * ```
48  */
49 template<class Fn>
50 C4_NO_INLINE void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
51 {
52  size_t prev = 0; // the last position that was flushed
53  size_t skip = 0; // how much to add to prev
54  csubstr repl; // replacement string
55  bool newl = false; // to add a newline
56  // cast to u8 to avoid having to deal with negative
57  // signed chars (which are present in some platforms)
58  uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str); // NOLINT(*-reinterpret-cast)
59  // NOLINTBEGIN(*-goto,bugprone-use-after-move,hicpp-invalid-access-moved)
60  for(size_t i = 0; i < scalar.len; ++i)
61  {
62  switch(s[i])
63  {
64  case UINT8_C(0x0a): // \n
65  repl = "\\n";
66  skip = 1;
67  if(keep_newlines)
68  newl = true;
69  goto flush_now;
70  case UINT8_C(0x5c): // '\\'
71  repl = "\\\\";
72  skip = 1;
73  goto flush_now;
74  case UINT8_C(0x09): // \t
75  repl = "\\t";
76  skip = 1;
77  goto flush_now;
78  case UINT8_C(0x0d): // \r
79  repl = "\\r";
80  skip = 1;
81  goto flush_now;
82  case UINT8_C(0x00): // \0
83  repl = "\\0";
84  skip = 1;
85  goto flush_now;
86  case UINT8_C(0x0c): // \f (form feed)
87  repl = "\\f";
88  skip = 1;
89  goto flush_now;
90  case UINT8_C(0x08): // \b (backspace)
91  repl = "\\b";
92  skip = 1;
93  goto flush_now;
94  case UINT8_C(0x07): // \a (bell)
95  repl = "\\a";
96  skip = 1;
97  goto flush_now;
98  case UINT8_C(0x0b): // \v (vertical tab)
99  repl = "\\v";
100  skip = 1;
101  goto flush_now;
102  case UINT8_C(0x1b): // \e (escape)
103  repl = "\\e";
104  skip = 1;
105  goto flush_now;
106  case UINT8_C(0xc2): // AKA -0x3e
107  if(i+1 < scalar.len)
108  {
109  if(s[i+1] == UINT8_C(0xa0)) // AKA -0x60
110  {
111  repl = "\\_";
112  skip = 2;
113  goto flush_now;
114  }
115  else if(s[i+1] == UINT8_C(0x85)) // AKA -0x7b
116  {
117  repl = "\\N";
118  skip = 2;
119  goto flush_now;
120  }
121  }
122  continue;
123  case UINT8_C(0xe2): // AKA -0x1e
124  if(i+2 < scalar.len)
125  {
126  if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
127  {
128  if(s[i+2] == UINT8_C(0xa8)) // AKA -0x58
129  {
130  repl = "\\L";
131  skip = 3;
132  goto flush_now;
133  }
134  else if(s[i+2] == UINT8_C(0xa9)) // AKA -0x57
135  {
136  repl = "\\P";
137  skip = 3;
138  goto flush_now;
139  }
140  }
141  }
142  continue;
143  default:
144  continue;
145  }
146  flush_now:
147  std::forward<Fn>(fn)(scalar.range(prev, i));
148  std::forward<Fn>(fn)(repl);
149  if(newl)
150  {
151  std::forward<Fn>(fn)("\n");
152  newl = false;
153  }
154  prev = i + skip;
155  }
156  // flush the rest
157  if(scalar.len > prev)
158  std::forward<Fn>(fn)(scalar.sub(prev));
159  // NOLINTEND(*-goto,bugprone-use-after-move,hicpp-invalid-access-moved)
160 }
161 
162 
163 C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
164 
165 /** Adjust a position in a scalar, increasing it to account for any
166  * escaped characters.
167  *
168  * @note This is a utility/debugging function, so it is provided in
169  * this optional header. For this reason, we inline it to obey to the
170  * One Definition Rule. But then we set the noinline attribute to
171  * ensure they are not inlined in calling code. */
172 inline C4_NO_INLINE size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
173 {
174  // cast to u8 to avoid having to deal with negative
175  // signed chars (which are present in some platforms)
176  uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str); // NOLINT(*-reinterpret-cast)
177  const size_t newbump = keep_newlines ? 2 : 1;
178  size_t ret = 0;
179  size_t excess = pos > scalar.len ? pos - scalar.len : 0;
180  pos = pos < scalar.len ? pos : scalar.len;
181  for(size_t i = 0; i < pos; ++i)
182  {
183  ++ret;
184  switch(s[i])
185  {
186  case UINT8_C(0x5c): // '\\'
187  case UINT8_C(0x09): // \t
188  case UINT8_C(0x0d): // \r
189  case UINT8_C(0x00): // \0
190  case UINT8_C(0x0c): // \f (form feed)
191  case UINT8_C(0x08): // \b (backspace)
192  case UINT8_C(0x07): // \a (bell)
193  case UINT8_C(0x0b): // \v (vertical tab)
194  case UINT8_C(0x1b): // \e (escape)
195  ++ret; // add the backslash
196  break;
197  case UINT8_C(0x0a): // \n
198  ret += newbump;
199  break;
200  case UINT8_C(0xc2): // AKA -0x3e
201  if(i+1 < scalar.len)
202  {
203  if(s[i+1] == UINT8_C(0xa0) // AKA -0x60 -> \_
204  ||
205  s[i+1] == UINT8_C(0x85)) // AKA -0x7b -> \N
206  {
207  ++ret;
208  ++i; // skip the next entry
209  }
210  }
211  break;
212  case UINT8_C(0xe2): // AKA -0x1e
213  if(i+2 < scalar.len)
214  {
215  if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
216  {
217  if(s[i+2] == UINT8_C(0xa8) // AKA -0x58 -> \L
218  ||
219  s[i+2] == UINT8_C(0xa9)) // AKA -0x57 -> \P
220  {
221  ++ret;
222  i += 2; // skip the next two entries
223  }
224  }
225  }
226  break;
227  default:
228  break;
229  }
230  }
231  return ret + excess;
232 }
233 
234 
235 /** Escape a scalar to an existing buffer, using @ref escape_scalar_fn
236  *
237  * @note This is a utility/debugging function, so it is provided in
238  * this optional header. For this reason, we inline it to obey to the
239  * One Definition Rule. But then we set the noinline attribute to
240  * ensure they are not inlined in calling code. */
241 inline C4_NO_INLINE size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
242 {
243  size_t pos = 0;
244  auto append_ = [&pos, &buffer](csubstr repl){
245  if(repl.len && (pos + repl.len <= buffer.len))
246  memcpy(buffer.str + pos, repl.str, repl.len);
247  pos += repl.len;
248  };
249  escape_scalar_fn(append_, scalar, keep_newlines);
250  return pos;
251 }
252 C4_SUPPRESS_WARNING_GCC_POP
253 
254 
255 /** formatting helper to escape a scalar with @ref escape_scalar() */
257 {
258  escaped_scalar(csubstr s, bool keep_newl=false) : scalar(s), keep_newlines(keep_newl) {}
259  csubstr scalar;
261 };
262 
263 /** formatting implementation to escape a scalar with @ref escape_scalar() */
264 inline C4_NO_INLINE size_t to_chars(substr buf, escaped_scalar e)
265 {
266  return escape_scalar(buf, e.scalar, e.keep_newlines);
267 }
268 /** dumping implementation to escape a scalar with @ref escape_scalar_fn() */
269 template<class SinkPfn>
270 C4_NO_INLINE size_t dump(SinkPfn &&sinkfn, substr buf, escaped_scalar const& e)
271 {
272  (void)buf;
273  C4_ASSERT(!buf.overlaps(e.scalar));
274  escape_scalar_fn(std::forward<SinkPfn>(sinkfn), e.scalar, e.keep_newlines);
275  return 0;
276 }
277 
278 } // namespace yml
279 } // namespace c4
280 
281 #endif /* _C4_YML_ESCAPE_SCALAR_HPP_ */
Common utilities and infrastructure used by ryml.
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
Escape a scalar to an existing buffer, using escape_scalar_fn.
void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
Iterate through a scalar and escape special characters in it.
size_t dump(SinkPfn &&sinkfn, substr buf, escaped_scalar const &e)
dumping implementation to escape a scalar with escape_scalar_fn()
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
formatting helper to escape a scalar with escape_scalar()
escaped_scalar(csubstr s, bool keep_newl=false)