rapidyaml  0.13.0
parse and emit YAML, and do it fast
node_type.cpp
Go to the documentation of this file.
1 #ifndef _C4_YML_NODE_TYPE_HPP_
2 #include "c4/yml/node_type.hpp"
3 #endif
4 #ifndef _C4_YML_ERROR_HPP_
5 #include "c4/yml/error.hpp"
6 #endif
7 
8 
9 namespace c4 {
10 namespace yml {
11 
12 const char* NodeType::type_str(NodeType_e ty) noexcept
13 {
14  switch(ty & _TYMASK)
15  {
16  case KEYVAL:
17  return "KEYVAL";
18  case KEY:
19  return "KEY";
20  case VAL:
21  return "VAL";
22  case MAP:
23  return "MAP";
24  case SEQ:
25  return "SEQ";
26  case KEYMAP:
27  return "KEYMAP";
28  case KEYSEQ:
29  return "KEYSEQ";
30  case DOCSEQ:
31  return "DOCSEQ";
32  case DOCMAP:
33  return "DOCMAP";
34  case DOCVAL:
35  return "DOCVAL";
36  case DOC:
37  return "DOC";
38  case STREAM:
39  return "STREAM";
40  case NOTYPE:
41  return "NOTYPE";
42  default:
43  if((ty & KEYVAL) == KEYVAL)
44  return "KEYVAL***";
45  if((ty & KEYMAP) == KEYMAP)
46  return "KEYMAP***";
47  if((ty & KEYSEQ) == KEYSEQ)
48  return "KEYSEQ***";
49  if((ty & DOCSEQ) == DOCSEQ)
50  return "DOCSEQ***";
51  if((ty & DOCMAP) == DOCMAP)
52  return "DOCMAP***";
53  if((ty & DOCVAL) == DOCVAL)
54  return "DOCVAL***";
55  if(ty & KEY)
56  return "KEY***";
57  if(ty & VAL)
58  return "VAL***";
59  if(ty & MAP)
60  return "MAP***";
61  if(ty & SEQ)
62  return "SEQ***";
63  if(ty & DOC)
64  return "DOC***";
65  return "(unk)";
66  }
67 }
68 
69 size_t NodeType::type_str(substr buf, NodeType_e flags) noexcept
70 {
71  size_t pos = 0;
72  bool gotone = false;
73 
74  #define _prflag(fl, txt) \
75  do { \
76  if((flags & (fl)) == (fl)) \
77  { \
78  if(gotone) \
79  { \
80  if(pos + 1 < buf.len) \
81  buf[pos] = '|'; \
82  ++pos; \
83  } \
84  csubstr fltxt = txt; \
85  if(pos + fltxt.len <= buf.len) \
86  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
87  pos += fltxt.len; \
88  gotone = true; \
89  flags = (flags & ~(fl)); /*remove the flag*/ \
90  } \
91  } while(0)
92 
93  _prflag(STREAM, "STREAM");
94  _prflag(DOC, "DOC");
95  // key properties
96  _prflag(KEY, "KEY");
97  _prflag(KEYNIL, "KNIL");
98  _prflag(KEYTAG, "KTAG");
99  _prflag(KEYANCH, "KANCH");
100  _prflag(KEYREF, "KREF");
101  _prflag(KEY_LITERAL, "KLITERAL");
102  _prflag(KEY_FOLDED, "KFOLDED");
103  _prflag(KEY_SQUO, "KSQUO");
104  _prflag(KEY_DQUO, "KDQUO");
105  _prflag(KEY_PLAIN, "KPLAIN");
106  _prflag(KEY_UNFILT, "KUNFILT");
107  // val properties
108  _prflag(VAL, "VAL");
109  _prflag(VALNIL, "VNIL");
110  _prflag(VALTAG, "VTAG");
111  _prflag(VALANCH, "VANCH");
112  _prflag(VALREF, "VREF");
113  _prflag(VAL_UNFILT, "VUNFILT");
114  _prflag(VAL_LITERAL, "VLITERAL");
115  _prflag(VAL_FOLDED, "VFOLDED");
116  _prflag(VAL_SQUO, "VSQUO");
117  _prflag(VAL_DQUO, "VDQUO");
118  _prflag(VAL_PLAIN, "VPLAIN");
119  _prflag(VAL_UNFILT, "VUNFILT");
120  // container properties
121  _prflag(MAP, "MAP");
122  _prflag(SEQ, "SEQ");
123  _prflag(FLOW_SL, "FLOWSL");
124  _prflag(FLOW_ML, "FLOWML");
125  _prflag(BLOCK, "BLCK");
126  if(pos == 0)
127  _prflag(NOTYPE, "NOTYPE");
128 
129  #undef _prflag
130 
131  return pos;
132 }
133 
134 
135 //-----------------------------------------------------------------------------
136 
137 // see https://www.yaml.info/learn/quote.html#noplain
138 bool scalar_style_query_squo(csubstr s) noexcept
139 {
140  // cannot have leading whitespace after a newline
141  for(size_t i = 0; i < s.len; ++i)
142  {
143  if(s.str[i] == '\n' && i + 1 < s.len)
144  {
145  char next = s.str[i + 1];
146  if(next == ' ' || next == '\t')
147  return false;
148  }
149  }
150  return true;
151 }
152 
153 namespace {
154 bool _is_wsnl(char c) noexcept
155 {
156  return c == ' ' || c == '\n' || c == '\t' || c == '\r';
157 }
158 bool _is_valid_bulk(csubstr s, size_t i)
159 {
160  C4_ASSERT(i >= 1 && i+1 < s.len);
161  C4_ASSERT(s.str[i] == ':' || s.str[i] == '#');
162  switch(s.str[i])
163  {
164  case ':': return !_is_wsnl(s.str[i+1]);
165  case '#': return !_is_wsnl(s.str[i-1]);
166  }
167  C4_UNREACHABLE(); // LCOV_EXCL_LINE
168 }
169 } // namespace
170 // see https://www.yaml.info/learn/quote.html#noplain
171 bool scalar_style_query_plain_flow(csubstr s) noexcept
172 {
173  if(!s.len)
174  return !s.str;
175  // first
176  switch(s.str[0])
177  {
178  case ' ': case '\n': case '\t': case '\r':
179  case '!': case '&': case '*': case ',':
180  case '"': case '\'': case '|': case '>':
181  case '{': case '}': case '[': case ']':
182  case '#': case '`': case '%': case '@':
183  return false;
184  case '-': case ':': case '?':
185  if(s.len == 1 || (s.str[1] == ' ' || s.str[1] == '\t'))
186  return false;
187  break;
188  }
189  // bulk
190  for(size_t i = 1; i + 1 < s.len; ++i)
191  {
192  switch(s.str[i])
193  {
194  case ',': case '{': case '}': case '[': case ']':
195  return false;
196  case ':': case '#':
197  if(!_is_valid_bulk(s, i))
198  return false;
199  break;
200  }
201  }
202  // last
203  if(s.len > 1)
204  {
205  switch(s.back())
206  {
207  case ' ': case '\n': case '\t': case '\r':
208  case ',':
209  case '{': case '}':
210  case '[': case ']':
211  case '#':
212  case ':':
213  return false;
214  }
215  }
216  return true;
217 }
218 
219 bool scalar_style_query_plain_block(csubstr s) noexcept
220 {
221  if(!s.len)
222  return !s.str;
223  // first
224  switch(s.str[0])
225  {
226  case ' ': case '\n': case '\t': case '\r':
227  case '!': case '&': case '*': case ',':
228  case '"': case '\'': case '|': case '>':
229  case '{': case '}': case '[': case ']':
230  case '#': case '`': case '%': case '@':
231  return false;
232  case '-': case ':': case '?':
233  if (s.len == 1 || (s.str[1] == ' ' || s.str[1] == '\t'))
234  return false;
235  break;
236  }
237  // bulk
238  for(size_t i = 1; i + 1 < s.len; ++i)
239  {
240  switch(s.str[i])
241  {
242  case ':': case '#':
243  if(!_is_valid_bulk(s, i))
244  return false;
245  break;
246  }
247  }
248  // last
249  if(s.len > 1)
250  {
251  switch(s.back())
252  {
253  case ' ': case '\n': case '\t': case '\r':
254  case '#':
255  case ':':
256  return false;
257  }
258  }
259  return true;
260 }
261 
263 {
264  if(s.len)
265  {
267  return SCALAR_PLAIN;
268  else if(scalar_style_query_squo(s))
269  return SCALAR_SQUO;
270  return SCALAR_DQUO;
271  }
272  return s.str ? SCALAR_SQUO : SCALAR_PLAIN;
273 }
274 
276 {
277  if(s.len)
278  {
280  return SCALAR_PLAIN;
281  _RYML_ASSERT_BASIC(scalar_style_query_squo(s)
282  && "if this assertion fires, please submit an issue!");
283  return SCALAR_SQUO;
284  }
285  return s.str ? SCALAR_SQUO : SCALAR_PLAIN;
286 }
287 
288 
289 bool scalar_is_null(csubstr s) noexcept
290 {
291  return s.str == nullptr ||
292  (s.len == 1 && (s.str[0] == '~')) ||
293  (s.len == 4 && ((0 == memcmp("null", s.str, 4))
294  || (0 == memcmp("Null", s.str, 4))
295  || (0 == memcmp("NULL", s.str, 4))));
296 }
297 
298 
299 //-----------------------------------------------------------------------------
300 
301 namespace {
302 
303 #define rest_is(c1, c2) ((s.str[1] == (c1)) && (s.str[2] == (c2)))
304 bool is_inf_or_nan(csubstr s) noexcept
305 {
306  _RYML_ASSERT_BASIC(!s.begins_with("-."));
307  _RYML_ASSERT_BASIC(!s.begins_with("+."));
308  _RYML_ASSERT_BASIC(!s.begins_with("."));
309  _RYML_ASSERT_BASIC(s.len == 3);
310  switch(s.str[0])
311  {
312  case 'i': return rest_is('n', 'f');
313  case 'I': return rest_is('n', 'f') || rest_is('N', 'F');
314  case 'n': return rest_is('a', 'n');
315  case 'N': return rest_is('a', 'n') || rest_is('A', 'N') || rest_is('a', 'N');
316  }
317  return false;
318 }
319 bool is_inf(csubstr s) noexcept
320 {
321  _RYML_ASSERT_BASIC(!s.begins_with("-."));
322  _RYML_ASSERT_BASIC(!s.begins_with("+."));
323  _RYML_ASSERT_BASIC(!s.begins_with("."));
324  _RYML_ASSERT_BASIC(s.len == 3);
325  switch(s.str[0])
326  {
327  case 'i': return rest_is('n', 'f');
328  case 'I': return rest_is('n', 'f') || rest_is('N', 'F');
329  }
330  return false;
331 }
332 #undef rest_is
333 
334 bool json_is_plain_number(csubstr s) noexcept
335 {
336  return s.is_number()
337  &&
338  (
339  // quote integral numbers if they have a leading 0
340  // https://github.com/biojppm/rapidyaml/issues/291
341  (!(s.len > 1 && s.begins_with('0')))
342  // do not quote reals with leading 0
343  // https://github.com/biojppm/rapidyaml/issues/313
344  || (s.find('.') != csubstr::npos)
345  );
346 }
347 bool json_is_special_scalar(csubstr s) noexcept
348 {
349  if(s.len == 4)
350  return 0 == memcmp("true", s.str, 4)
351  || 0 == memcmp("null", s.str, 4)
352  || (s[0] == '.' && is_inf_or_nan(s.sub(1)));
353  else if(s.len == 5)
354  return 0 == memcmp("false", s.str, 5)
355  || ((s[0] == '-' || s[0] == '+') && s[1] == '.' && is_inf(s.sub(2)));
356  return false;
357 }
358 } // namespace
360 {
361  // do not quote numbers or special scalars
362  return json_is_plain_number(s) || json_is_special_scalar(s) ? SCALAR_PLAIN : SCALAR_DQUO;
363 }
364 
365 } // namespace yml
366 } // namespace c4
Error utilities used by ryml.
bool scalar_style_query_plain_block(csubstr s) noexcept
query whether a scalar can be encoded using plain style while in block mode.
Definition: node_type.cpp:219
bool scalar_is_null(csubstr s) noexcept
YAML-sense query of nullity.
Definition: node_type.cpp:289
bool scalar_style_query_squo(csubstr s) noexcept
query whether a scalar can be encoded using single quotes.
Definition: node_type.cpp:138
NodeType_e scalar_style_choose_flow(csubstr s) noexcept
choose a YAML scalar style based on the scalar's contents, while in flow mode.
Definition: node_type.cpp:262
NodeType_e scalar_style_choose_block(csubstr s) noexcept
choose a YAML scalar style based on the scalar's contents, while in block mode.
Definition: node_type.cpp:275
bool scalar_style_query_plain_flow(csubstr s) noexcept
query whether a scalar can be encoded using plain style while in flow mode.
Definition: node_type.cpp:171
NodeType_e
a bit mask for marking node types and styles
Definition: node_type.hpp:34
NodeType_e scalar_style_choose_json(csubstr s) noexcept
choose a json scalar style based on the scalar's contents
Definition: node_type.cpp:359
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:46
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:36
@ KEY_DQUO
mark key scalar as double quoted "
Definition: node_type.hpp:69
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:44
@ VALNIL
the val is null (eg {a : } results in a null val)
Definition: node_type.hpp:50
@ MAP
a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes
Definition: node_type.hpp:39
@ STREAM
a stream: a seq of docs
Definition: node_type.hpp:42
@ KEY
is member of a map
Definition: node_type.hpp:37
@ VAL_FOLDED
mark val scalar as multiline, block folded >
Definition: node_type.hpp:66
@ KEYTAG
the key has a tag
Definition: node_type.hpp:47
@ SCALAR_SQUO
Definition: node_type.hpp:87
@ FLOW_SL
mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,...
Definition: node_type.hpp:60
@ FLOW_ML
mark container with multi-line flow style (seqs as '[ val1, val2 ], maps as '{ key: val,...
Definition: node_type.hpp:61
@ VAL_UNFILT
the val scalar was left unfiltered; the parser was set not to filter.
Definition: node_type.hpp:56
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:38
@ VALTAG
the val has a tag
Definition: node_type.hpp:48
@ _TYMASK
all the bits up to here
Definition: node_type.hpp:51
@ SEQ
a seq: a parent of VAL/SEQ/MAP nodes
Definition: node_type.hpp:40
@ SCALAR_DQUO
Definition: node_type.hpp:88
@ VAL_SQUO
mark val scalar as single quoted '
Definition: node_type.hpp:68
@ VAL_PLAIN
mark val scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:72
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:43
@ BLOCK
mark container with block style (seqs as '- val ', maps as 'key: val')
Definition: node_type.hpp:62
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:45
@ VAL_DQUO
mark val scalar as double quoted "
Definition: node_type.hpp:70
@ KEY_UNFILT
the key scalar was left unfiltered; the parser was set not to filter.
Definition: node_type.hpp:55
@ KEY_SQUO
mark key scalar as single quoted '
Definition: node_type.hpp:67
@ VAL_LITERAL
mark val scalar as multiline, block literal |
Definition: node_type.hpp:64
@ KEY_LITERAL
mark key scalar as multiline, block literal |
Definition: node_type.hpp:63
@ KEY_PLAIN
mark key scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:71
@ SCALAR_PLAIN
Definition: node_type.hpp:89
@ KEY_FOLDED
mark key scalar as multiline, block folded >
Definition: node_type.hpp:65
@ KEYNIL
the key is null (eg { : b} results in a null key)
Definition: node_type.hpp:49
@ DOC
a document
Definition: node_type.hpp:41
@ npos
a null string position
Definition: common.hpp:258
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
#define rest_is(c1, c2)
Definition: node_type.cpp:303
#define _prflag(fl, txt)
const char * type_str() const noexcept
return a preset string based on the node type
Definition: node_type.hpp:154