rapidyaml  0.11.1
parse and emit YAML, and do it fast
escape_scalar.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_ESCAPE_SCALAR_HPP_
2 #define _C4_YML_ESCAPE_SCALAR_HPP_
3 
4 #ifndef _C4_YML_COMMON_HPP_
5 #include "c4/yml/common.hpp"
6 #endif
7 
8 namespace c4 {
9 namespace yml {
10 
11 
12 /** Iterate through a scalar and escape special characters in it. This
13  * function takes a callback (which accepts a single parameter of
14  * csubstr type) and, while processing, calls this callback as
15  * appropriate, passing ranges of the scalar and/or escaped
16  * characters.
17  *
18  * @param fn a sink function receiving a csubstr
19  * @param scalar the scalar to be escaped
20  * @param keep_newlines when true, `\n` will be escaped as `\\n\n` instead of just `\\n`
21  *
22  * Example usage:
23  *
24  * ```c++
25  * // escape to stdout
26  * void escape_scalar(FILE *file, csubstr scalar)
27  * {
28  * auto print_ = [](csubstr repl){
29  * fwrite(repl.len, 1, repl.str, file);
30  * };
31  * escape_scalar_fn(std::ref(print_), scalar);
32  * }
33  *
34  * // escape to a different buffer and return the required buffer size
35  * size_t escape_scalar(substr buffer, csubstr scalar)
36  * {
37  * C4_ASSERT(!buffer.overlaps(scalar));
38  * size_t pos = 0;
39  * auto _append = [&](csubstr repl){
40  * if(repl.len && (pos + repl.len <= buffer.len))
41  * memcpy(buffer.str + pos, repl.str, repl.len);
42  * pos += repl.len;
43  * };
44  * escape_scalar_fn(std::ref(_append), scalar);
45  * return pos;
46  * }
47  * ```
48  */
49 template<class Fn>
50 void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
51 {
52  size_t prev = 0; // the last position that was flushed
53  size_t skip = 0; // how much to add to prev
54  csubstr repl; // replacement string
55  bool newl = false; // to add a newline
56  // cast to u8 to avoid having to deal with negative
57  // signed chars (which are present some platforms)
58  uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str); // NOLINT(*-reinterpret-cast)
59  // NOLINTBEGIN(*-goto)
60  for(size_t i = 0; i < scalar.len; ++i)
61  {
62  switch(s[i])
63  {
64  case UINT8_C(0x0a): // \n
65  repl = "\\n";
66  skip = 1;
67  if(keep_newlines)
68  newl = true;
69  goto flush_now;
70  case UINT8_C(0x5c): // '\\'
71  repl = "\\\\";
72  skip = 1;
73  goto flush_now;
74  case UINT8_C(0x09): // \t
75  repl = "\\t";
76  skip = 1;
77  goto flush_now;
78  case UINT8_C(0x0d): // \r
79  repl = "\\r";
80  skip = 1;
81  goto flush_now;
82  case UINT8_C(0x00): // \0
83  repl = "\\0";
84  skip = 1;
85  goto flush_now;
86  case UINT8_C(0x0c): // \f (form feed)
87  repl = "\\f";
88  skip = 1;
89  goto flush_now;
90  case UINT8_C(0x08): // \b (backspace)
91  repl = "\\b";
92  skip = 1;
93  goto flush_now;
94  case UINT8_C(0x07): // \a (bell)
95  repl = "\\a";
96  skip = 1;
97  goto flush_now;
98  case UINT8_C(0x0b): // \v (vertical tab)
99  repl = "\\v";
100  skip = 1;
101  goto flush_now;
102  case UINT8_C(0x1b): // \e (escape)
103  repl = "\\e";
104  skip = 1;
105  goto flush_now;
106  case UINT8_C(0xc2): // AKA -0x3e
107  if(i+1 < scalar.len)
108  {
109  if(s[i+1] == UINT8_C(0xa0)) // AKA -0x60
110  {
111  repl = "\\_";
112  skip = 2;
113  goto flush_now;
114  }
115  else if(s[i+1] == UINT8_C(0x85)) // AKA -0x7b
116  {
117  repl = "\\N";
118  skip = 2;
119  goto flush_now;
120  }
121  }
122  continue;
123  case UINT8_C(0xe2): // AKA -0x1e
124  if(i+2 < scalar.len)
125  {
126  if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
127  {
128  if(s[i+2] == UINT8_C(0xa8)) // AKA -0x58
129  {
130  repl = "\\L";
131  skip = 3;
132  goto flush_now;
133  }
134  else if(s[i+2] == UINT8_C(0xa9)) // AKA -0x57
135  {
136  repl = "\\P";
137  skip = 3;
138  goto flush_now;
139  }
140  }
141  }
142  continue;
143  default:
144  continue;
145  }
146  flush_now:
147  std::forward<Fn>(fn)(scalar.range(prev, i));
148  std::forward<Fn>(fn)(repl);
149  if(newl)
150  {
151  std::forward<Fn>(fn)("\n");
152  newl = false;
153  }
154  prev = i + skip;
155  }
156  // flush the rest
157  if(scalar.len > prev)
158  std::forward<Fn>(fn)(scalar.sub(prev));
159  // NOLINTEND(*-goto)
160 }
161 
162 
163 C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
164 /** Escape a scalar to an existing buffer, using @ref escape_scalar_fn
165  *
166  * @note This is a utility/debugging functions, so it is provided in this
167  * (optional) header. For this reason, we inline it to obey to the
168  * One-Definition Rule. But then we set the noinline attribute to
169  * ensure they are not inlined in calling code. */
170 inline C4_NO_INLINE size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
171 {
172  size_t pos = 0;
173  auto _append = [&pos, &buffer](csubstr repl){
174  if(repl.len && (pos + repl.len <= buffer.len))
175  memcpy(buffer.str + pos, repl.str, repl.len);
176  pos += repl.len;
177  };
178  escape_scalar_fn(_append, scalar, keep_newlines);
179  return pos;
180 }
181 C4_SUPPRESS_WARNING_GCC_POP
182 
183 
184 /** formatting helper to escape a scalar with @ref escape_scalar()x */
186 {
187  escaped_scalar(csubstr s, bool keep_newl=false) : scalar(s), keep_newlines(keep_newl) {}
188  csubstr scalar;
190 };
191 
192 /** formatting implementation to escape a scalar with @ref escape_scalar()x */
193 inline size_t to_chars(substr buf, escaped_scalar e)
194 {
195  return escape_scalar(buf, e.scalar, e.keep_newlines);
196 }
197 
198 
199 } // namespace yml
200 } // namespace c4
201 
202 #endif /* _C4_YML_ESCAPE_SCALAR_HPP_ */
Common utilities and infrastructure used by ryml.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with x
size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
Escape a scalar to an existing buffer, using escape_scalar_fn.
void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
Iterate through a scalar and escape special characters in it.
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
formatting helper to escape a scalar with x
escaped_scalar(csubstr s, bool keep_newl=false)