rapidyaml  0.13.0
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
4 #ifndef _C4_YML_PARSE_ENGINE_HPP_
6 #endif
7 #ifndef _C4_CHARCONV_HPP_
8 #include "c4/charconv.hpp"
9 #endif
10 #ifndef C4_UTF_HPP_
11 #include "c4/utf.hpp"
12 #endif
13 #ifndef _C4_YML_FILTER_PROCESSOR_HPP_
15 #endif
16 #ifndef _C4_YML_TAG_HPP_
17 #include "c4/yml/tag.hpp"
18 #endif
19 #ifndef _C4_YML_NODE_TYPE_HPP_
20 #include "c4/yml/node_type.hpp"
21 #endif
22 
23 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24 #include "c4/yml/detail/dbgprint.hpp"
25 #endif
26 
27 #ifdef RYML_DBG
28 #ifndef C4_DUMP_HPP_
29 #include <c4/dump.hpp>
30 #endif
31 #define _c4err(...) \
32  do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
33 #else
34 #define _c4err(...) \
35  this->_err(RYML_LOC_HERE(), __VA_ARGS__)
36 #endif
37 #define _c4assert(...) \
38  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
39 
40 
41 #if defined(RYML_WITH_TAB_TOKENS)
42 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43 #define _RYML_WITHOUT_TAB_TOKENS(...)
44 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
45 #else
46 #define _RYML_WITH_TAB_TOKENS(...)
47 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
49 #endif
50 
51 // helper to export cases to the YAML test suite
52 #ifndef RYML_SAVE_TEST_YAML
53 #define _RYML_SAVE_TEST_YAML(filename, src)
54 #define _RYML_SAVE_TEST_JSON(filename, src)
55 #else
56 #define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57 #define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
58 namespace c4 {
59 namespace yml {
60 void ryml_save_test_yaml(csubstr filename, csubstr src);
61 void ryml_save_test_json(csubstr filename, csubstr src);
62 } // namespace yml
63 } // namespace c4
64 #endif
65 
66 
67 // scaffold:
68 #define _c4dbgnextline() \
69  do { \
70  _c4dbgq("\n-----------"); \
71  _c4dbgt("handling line={}, offset={}B", \
72  m_evt_handler->m_curr->pos.line, \
73  m_evt_handler->m_curr->pos.offset); \
74  } while(0)
75 
76 
77 C4_SUPPRESS_WARNING_MSVC_PUSH
78 C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value'
79 C4_SUPPRESS_WARNING_MSVC(4702) // unreachable code
80 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
81 C4_SUPPRESS_WARNING_GCC_CLANG("-Wtype-limits") // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
82 C4_SUPPRESS_WARNING_GCC_CLANG("-Wformat-nonliteral")
83 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
84 #if defined(__GNUC__) && (__GNUC__ >= 6)
85 C4_SUPPRESS_WARNING_GCC("-Wnull-dereference")
86 #endif
87 #if defined(__GNUC__) && (__GNUC__ >= 7)
88 C4_SUPPRESS_WARNING_GCC("-Wduplicated-branches")
89 #endif
90 
91 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
92 
93 namespace c4 {
94 namespace yml {
95 
96 namespace { // NOLINT
97 
98 C4_HOT C4_ALWAYS_INLINE void _set_first(substr &C4_RESTRICT subject, size_t pos) noexcept
99 {
100  // avoids reassigning the ptr in substr
101  subject.len = pos != npos ? pos : subject.len;
102 }
103 C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos) noexcept
104 {
105  // avoids reassigning the ptr in substr
106  subject.len = pos != npos ? pos : subject.len;
107 }
108 C4_HOT C4_ALWAYS_INLINE void _set_first_strict(substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
109 {
110  // avoids reassigning the ptr in substr
111  _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
112  subject.len = pos;
113 }
114 C4_HOT C4_ALWAYS_INLINE void _set_first_strict(csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
115 {
116  // avoids reassigning the ptr in substr
117  _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
118  subject.len = pos;
119 }
120 
121 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) RYML_NOEXCEPT
122 {
123  _RYML_ASSERT_BASIC(s.len > 0);
124  _RYML_ASSERT_BASIC(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
125  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
126 }
127 
128 C4_HOT C4_ALWAYS_INLINE bool _is_blck_seq_token_maybe(csubstr const& C4_RESTRICT s) noexcept
129 {
130  return ((s.len >= 1) && (s.str[0] == '-') && ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t')))));
131 }
132 
133 inline bool _is_doc_begin_token(csubstr s) RYML_NOEXCEPT
134 {
135  _RYML_ASSERT_BASIC(s.begins_with('-'));
136  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
137  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
138  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
139  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
140 }
141 
142 inline bool _is_doc_end_token(csubstr s) RYML_NOEXCEPT
143 {
144  _RYML_ASSERT_BASIC(s.begins_with('.'));
145  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
146  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
147  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
148  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
149 }
150 
151 inline bool _is_doc_token(csubstr s) noexcept
152 {
153  if(s.len >= 3)
154  {
155  switch(s.str[0])
156  {
157  case '-':
158  //return _is_doc_begin_token(s); // this was failing with gcc -O2
159  return (s.str[1] == '-' && s.str[2] == '-')
160  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
161  case '.':
162  //return _is_doc_end_token(s); // this was failing with gcc -O2
163  return (s.str[1] == '.' && s.str[2] == '.')
164  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
165  }
166  }
167  return false;
168 }
169 
170 inline size_t _begins_with_special_json_scalar(csubstr s) RYML_NOEXCEPT
171 {
172  _RYML_ASSERT_BASIC(s.len);
173  switch(s.str[0])
174  {
175  case 'f':
176  return s.begins_with("false") ? 5u : 0u;
177  case 't':
178  return s.begins_with("true") ? 4u : 0u;
179  case 'n':
180  return s.begins_with("null") ? 4u : 0u;
181  }
182  return 0u;
183 }
184 
185 
186 //-----------------------------------------------------------------------------
187 
188 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
189 {
190  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
191 }
192 
193 //! look for the next newline chars, and jump to the right of those
194 inline substr _from_next_line(substr rem)
195 {
196  size_t nlpos = rem.first_of("\r\n");
197  if(nlpos == csubstr::npos)
198  return {};
199  const char nl = rem[nlpos];
200  rem = rem.right_of(nlpos);
201  if(rem.empty())
202  return {};
203  if(_extend_from_combined_newline(nl, rem.front()))
204  rem = rem.sub(1);
205  return rem;
206 }
207 
208 
209 //-----------------------------------------------------------------------------
210 
211 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
212 {
213  _RYML_ASSERT_BASIC(r[*i] == '\n');
214  size_t numnl_following = 0;
215  ++(*i);
216  for( ; *i < r.len; ++(*i))
217  {
218  if(r.str[*i] == '\n')
219  ++numnl_following;
220  // skip leading whitespace
221  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
222  ;
223  else
224  break;
225  }
226  return numnl_following;
227 }
228 
229 /** @p i is set to the first non whitespace character after the line
230  * @return the number of empty lines after the initial position */
231 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
232 {
233  _RYML_ASSERT_BASIC(r[*i] == '\n');
234  size_t numnl_following = 0;
235  ++(*i);
236  if(indentation == 0)
237  {
238  for( ; *i < r.len; ++(*i))
239  {
240  const char c = r.str[*i];
241  if(c == '\n')
242  ++numnl_following;
243  // skip leading whitespace
244  else if(c != ' ' && c != '\t' && c != '\r')
245  break;
246  }
247  }
248  else
249  {
250  for( ; *i < r.len; ++(*i))
251  {
252  char c = r.str[*i];
253  if(c == '\n')
254  {
255  ++numnl_following;
256  // skip the indentation after the newline
257  size_t stop = *i + indentation;
258  for( ; *i < r.len; ++(*i))
259  {
260  c = r.str[*i];
261  if(c != ' ' && c != '\r')
262  break;
263  _RYML_ASSERT_BASIC(*i < stop); // LCOV_EXCL_LINE
264  }
265  C4_UNUSED(stop);
266  }
267  // skip leading whitespace
268  else if(c != ' ' && c != '\t' && c != '\r')
269  {
270  break;
271  }
272  }
273  }
274  return numnl_following;
275 }
276 
277 } // anon namespace
278 
279 
280 //-----------------------------------------------------------------------------
281 //-----------------------------------------------------------------------------
282 //-----------------------------------------------------------------------------
283 
284 template<class EventHandler>
286 {
287  _free();
288  _clr();
289 }
290 
291 template<class EventHandler>
293  : m_options(opts)
294  , m_evt_handler(evt_handler)
295  , m_pending_anchors()
296  , m_pending_tags()
297  , m_has_directives_yaml(false)
298  , m_has_directives(false)
299  , m_doc_empty(true)
300  , m_prev_colon(npos)
301  , m_prev_val_end(npos)
302  , m_encoding(NOBOM)
303  , m_newline_offsets()
304  , m_newline_offsets_size(0)
305  , m_newline_offsets_capacity(0)
306 {
307  _RYML_CHECK_BASIC(evt_handler);
308 }
309 
310 template<class EventHandler>
312  : m_options(that.m_options)
313  , m_evt_handler(that.m_evt_handler)
314  , m_pending_anchors(that.m_pending_anchors)
315  , m_pending_tags(that.m_pending_tags)
316  , m_has_directives_yaml(that.m_has_directives_yaml)
317  , m_has_directives(that.m_has_directives)
318  , m_doc_empty(that.m_doc_empty)
319  , m_prev_colon(npos)
320  , m_prev_val_end(npos)
321  , m_encoding(NOBOM)
322  , m_newline_offsets(that.m_newline_offsets)
323  , m_newline_offsets_size(that.m_newline_offsets_size)
324  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
325 {
326  that._clr();
327 }
328 
329 template<class EventHandler>
331  : m_options(that.m_options)
332  , m_evt_handler(that.m_evt_handler)
333  , m_pending_anchors(that.m_pending_anchors)
334  , m_pending_tags(that.m_pending_tags)
335  , m_has_directives_yaml(that.m_has_directives_yaml)
336  , m_has_directives(that.m_has_directives)
337  , m_doc_empty(that.m_doc_empty)
338  , m_prev_colon(npos)
339  , m_prev_val_end(npos)
340  , m_encoding(NOBOM)
341  , m_newline_offsets()
342  , m_newline_offsets_size()
343  , m_newline_offsets_capacity()
344 {
345  if(that.m_newline_offsets_capacity)
346  {
347  _resize_locations(that.m_newline_offsets_capacity);
348  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
350  m_newline_offsets_size = that.m_newline_offsets_size;
351  }
352 }
353 
354 template<class EventHandler>
356 {
357  _free();
358  m_options = (that.m_options);
359  m_evt_handler = that.m_evt_handler;
360  m_pending_anchors = that.m_pending_anchors;
361  m_pending_tags = that.m_pending_tags;
362  m_has_directives_yaml = that.m_has_directives_yaml;
363  m_has_directives = that.m_has_directives;
364  m_doc_empty = that.m_doc_empty;
365  m_prev_colon = that.m_prev_colon;
366  m_prev_val_end = that.m_prev_val_end;
367  m_encoding = that.m_encoding;
368  m_newline_offsets = (that.m_newline_offsets);
369  m_newline_offsets_size = (that.m_newline_offsets_size);
370  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
371  that._clr();
372  return *this;
373 }
374 
375 template<class EventHandler>
377 {
378  if(&that != this)
379  {
380  _free();
381  m_options = (that.m_options);
382  m_evt_handler = that.m_evt_handler;
383  m_pending_anchors = that.m_pending_anchors;
384  m_pending_tags = that.m_pending_tags;
385  m_has_directives_yaml = that.m_has_directives_yaml;
386  m_has_directives = that.m_has_directives;
387  m_doc_empty = that.m_doc_empty;
388  m_prev_colon = that.m_prev_colon;
389  m_prev_val_end = that.m_prev_val_end;
390  m_encoding = that.m_encoding;
391  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392  _resize_locations(that.m_newline_offsets_capacity);
393  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
396  m_newline_offsets_size = that.m_newline_offsets_size;
397  }
398  return *this;
399 }
400 
401 template<class EventHandler>
403 {
404  m_options = {};
405  m_evt_handler = {};
406  m_pending_anchors = {};
407  m_pending_tags = {};
408  m_has_directives_yaml = false;
409  m_has_directives = false;
410  m_doc_empty = true;
411  m_prev_colon = npos;
412  m_prev_val_end = npos;
413  m_encoding = NOBOM;
414  m_newline_offsets = {};
415  m_newline_offsets_size = {};
416  m_newline_offsets_capacity = {};
417 }
418 
419 template<class EventHandler>
420 void ParseEngine<EventHandler>::_free()
421 {
422  if(m_newline_offsets)
423  {
424  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
425  m_newline_offsets = nullptr;
426  m_newline_offsets_size = 0u;
427  m_newline_offsets_capacity = 0u;
428  }
429 }
430 
431 
432 //-----------------------------------------------------------------------------
433 
434 template<class EventHandler>
435 void ParseEngine<EventHandler>::_reset()
436 {
437  m_pending_anchors = {};
438  m_pending_tags = {};
439  m_has_directives_yaml = false;
440  m_has_directives = false;
441  m_doc_empty = true;
442  m_prev_colon = npos;
443  m_prev_val_end = npos;
444  m_bom_len = 0;
445  m_encoding = NOBOM;
446  m_bom_line = 0;
447  if(m_options.locations())
448  {
449  _prepare_locations();
450  }
451 }
452 
453 
454 //-----------------------------------------------------------------------------
455 
456 template<class EventHandler>
457 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
458 {
459  _c4dbgp("relocate to new arena");
460  const char *pb = prev_arena.str;
461  const char *pe = prev_arena.str + prev_arena.len;
462  #define _ryml_relocate(s) \
463  if((s).str >= pb && (s).str <= pe) \
464  { \
465  (s).str = next_arena.str + ((s).str - pb); \
466  }
467  for(ParserState &st : m_evt_handler->m_stack)
468  {
469  _ryml_relocate(st.line_contents.rem);
470  _ryml_relocate(st.line_contents.full);
471  }
472  _ryml_relocate(m_evt_handler->m_src);
473  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
474  {
475  _ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
476  _ryml_relocate(m_pending_tags.annotations[i].orig); // LCOV_EXCL_LINE
477  }
478  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
479  {
480  _ryml_relocate(m_pending_anchors.annotations[i].str);
481  _ryml_relocate(m_pending_anchors.annotations[i].orig);
482  }
483  {
484  TagDirectives &tds = m_evt_handler->tag_directives();
485  for(size_t i = 0, sz = tds.size(); i < sz; ++i)
486  {
487  _ryml_relocate(tds.m_directives[i].handle);
488  _ryml_relocate(tds.m_directives[i].prefix);
489  }
490  }
491  {
492  TagCache &tch = m_evt_handler->tag_cache();
493  for(id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
494  {
495  _ryml_relocate(tch.m_entries[i].tag);
496  _ryml_relocate(tch.m_entries[i].resolved);
497  }
498  }
499  if(other)
500  {
501  _ryml_relocate(*other);
502  }
503  #undef _ryml_relocate
504 }
505 
506 /** @cond dev */
507 template<class EventHandler>
508 substr ParseEngine<EventHandler>::_alloc_arena(size_t len, substr *other)
509 {
510  csubstr prev = m_evt_handler->arena();
511  substr out = m_evt_handler->alloc_arena(len);
512  substr curr = m_evt_handler->arena();
513  if(curr.str != prev.str)
514  _relocate_arena(prev, curr, other);
515  return out;
516 }
517 /** @endcond */
518 
519 
520 //-----------------------------------------------------------------------------
521 
522 #ifdef RYML_DBG
523 template<class EventHandler>
524 template<class DumpFn>
525 C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
526 {
527  ParserState const *const C4_RESTRICT st = m_evt_handler->m_curr;
528  LineContents const& C4_RESTRICT lc = st->line_contents;
529  csubstr contents = lc.full.first(lc.num_cols);
530  if(contents.len)
531  {
532  // print the yaml src line
533  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
534  csubstr m_file = m_evt_handler->m_curr->pos.name;
535  if(m_file.len)
536  {
537  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
538  offs += m_file.len + 1;
539  }
540  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
541  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
542  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
543  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", escaped_scalar(maybe_full_content, /*escape*/true), maybe_ellipsis, contents.len);
544  // highlight the remaining portion of the previous line
545  size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
546  size_t lastcol = firstcol + lc.rem.len;
547  size_t firstcol_adj = adjust_pos_with_escapes(lc.full, firstcol);
548  size_t len = adjust_pos_with_escapes(lc.rem, lc.rem.len);
549  for(size_t i = 0; i < offs + firstcol_adj; ++i)
550  std::forward<DumpFn>(dumpfn)(" ");
551  std::forward<DumpFn>(dumpfn)("^");
552  for(size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
553  std::forward<DumpFn>(dumpfn)("~");
554  _dbg_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
555  }
556  else
557  {
558  std::forward<DumpFn>(dumpfn)("\n");
559  }
560  // next line: print the state flags
561  {
562  char flagbuf_[128];
563  _dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
564  }
565 }
566 
567 template<class EventHandler>
568 void ParseEngine<EventHandler>::_print_state_stack(substr buf) const
569 {
570  if(_dbg_enabled())
571  {
572  for(ParserState const& s : m_evt_handler->m_stack)
573  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
574  }
575 }
576 
577 template<class EventHandler>
578 void ParseEngine<EventHandler>::_print_state_stack() const
579 {
580  char buf[128];
581  _print_state_stack(buf);
582 }
583 #endif
584 
585 
586 //-----------------------------------------------------------------------------
587 
588 template<class EventHandler>
589 template<class ...Args>
590 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
591 {
592  m_evt_handler->cancel_parse();
593  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
594 }
595 
596 template<class EventHandler>
597 template<class ...Args>
598 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
599 {
600  m_evt_handler->cancel_parse();
601  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
602 }
603 
604 
605 //-----------------------------------------------------------------------------
606 #ifdef RYML_DBG
607 template<class EventHandler>
608 template<class ...Args>
609 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
610 {
611  if(_dbg_enabled())
612  {
613  _dbg_printf(fmt, args...);
614  _dbg_dumper("\n");
615  _fmt_msg(_dbg_dumper);
616  }
617 }
618 #endif
619 
620 
621 //-----------------------------------------------------------------------------
622 template<class EventHandler>
623 bool ParseEngine<EventHandler>::_finished_file() const
624 {
625  bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
626  if(ret)
627  {
628  _c4dbgp("finished file!!!");
629  }
630  return ret;
631 }
632 
633 template<class EventHandler>
634 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const // LCOV_EXCL_LINE
635 {
636  return m_evt_handler->m_curr->line_contents.rem.empty();
637 }
638 
639 
640 //-----------------------------------------------------------------------------
641 
642 template<class EventHandler>
643 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
644 {
645  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')))
646  {
647  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
648  if(pos == npos)
649  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all whitespace
650  _c4dbgpf("skip {} whitespace characters", pos);
651  _line_progressed(pos);
652  }
653 }
654 
655 template<class EventHandler>
656 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
657 {
658  if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
659  {
660  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
661  if(pos == npos)
662  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all c
663  _c4dbgpf("skip {}x'{}'", pos, _c4prc(c));
664  _line_progressed(pos);
665  }
666 }
667 
668 template<class EventHandler>
669 template<size_t N>
670 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
671 {
672  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
673  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
674  if(pos == npos)
675  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
676  _c4dbgpf("skip {} characters", pos);
677  _line_progressed(pos);
678 }
679 
680 template<class EventHandler>
681 void ParseEngine<EventHandler>::_skip_comment()
682 {
683  LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
684  const size_t col = m_evt_handler->m_curr->pos.col - 1u;
685  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with('#'), m_evt_handler->m_curr->pos);
686  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
687  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos); // 1-based
688  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
689  // raise an error if the comment is not preceded by whitespace
690  if(lc.rem.str != lc.full.str) // not at line beginning
691  {
692  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
693  const char prev = lc.full.str[col - 1u];
694  if(C4_UNLIKELY(prev != ' ' && prev != '\t'))
695  _c4err("comment not preceded by whitespace");
696  }
697  _c4dbgpf("comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
698  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
699 }
700 
701 template<class EventHandler>
702 void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
703 {
704  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
705  if(pos != npos)
706  {
707  if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
708  {
709  _line_progressed(pos);
710  _skip_comment();
711  }
712  }
713 }
714 
715 template<class EventHandler>
716 void ParseEngine<EventHandler>::_maybe_skip_comment()
717 {
718  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
719  if(pos != npos)
720  {
721  if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
722  {
723  _line_progressed(pos);
724  _skip_comment();
725  }
726  }
727  else
728  {
729  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
730  }
731 }
732 
733 template<class EventHandler>
734 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
735 {
736  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
737  if(pos != npos)
738  {
739  if(':' == m_evt_handler->m_curr->line_contents.rem[pos])
740  {
741  // bump pos to skip the colon as well, and check the colon
742  // is followed by space or tab
743  if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
744  {
745  const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
746  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
747  ++pos;
748  else
749  return false;
750  }
751  _line_progressed(pos);
752  return true;
753  }
754  }
755  else
756  {
757  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
758  }
759  return false;
760 }
761 
762 
763 //-----------------------------------------------------------------------------
764 
765 template<class EventHandler>
766 csubstr ParseEngine<EventHandler>::_scan_anchor()
767 {
768  csubstr s = m_evt_handler->m_curr->line_contents.rem;
769  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'), m_evt_handler->m_curr->pos);
770  csubstr anchor = s.range(1, s.first_of(" ,]}\t"));
771  _line_progressed(1u + anchor.len);
772  _maybe_skipchars(' ');
773  return anchor;
774 }
775 
776 template<class EventHandler>
777 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
778 {
779  csubstr s = m_evt_handler->m_curr->line_contents.rem;
780  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
781  _set_first(s, s.first_of(" ,]\t"));
782  _line_progressed(s.len);
783  return s;
784 }
785 
786 template<class EventHandler>
787 csubstr ParseEngine<EventHandler>::_scan_ref_map()
788 {
789  csubstr s = m_evt_handler->m_curr->line_contents.rem;
790  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
791  _set_first(s, s.first_of(" ,}\t"));
792  _line_progressed(s.len);
793  return s;
794 }
795 
796 template<class EventHandler>
797 csubstr ParseEngine<EventHandler>::_scan_tag()
798 {
799  csubstr t = m_evt_handler->m_curr->line_contents.rem;
800  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
801  if(!t.begins_with("!<"))
802  {
803  _c4dbgp("begins with '!'");
804  _set_first(t, t.first_of(" ,]}\t"));
805  if(C4_UNLIKELY(t.first_of("[{") != npos))
806  _c4err("invalid tag");
807  _line_progressed(t.len);
808  if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
809  t = _resolve_tag(t);
810  }
811  else
812  {
813  _c4dbgp("begins with '!<'");
814  size_t pos = t.find('>');
815  if(C4_UNLIKELY(pos == npos))
816  _c4err("invalid tag");
817  _set_first_strict(t, pos+1);
818  _line_progressed(t.len);
819  t = t.sub(1);
820  }
821  _maybe_skip_whitespace_tokens();
822  return t;
823 }
824 
825 template<class EventHandler>
826 csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
827 {
828  csubstr t = m_evt_handler->m_curr->line_contents.rem;
829  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
830  if(!t.begins_with("!<"))
831  {
832  _c4dbgp("begins with '!'");
833  _set_first(t, t.first_of(" ,\t"));
834  if(C4_UNLIKELY(t.first_of("[{") != npos))
835  _c4err("invalid tag");
836  _line_progressed(t.len);
837  *orig = t;
838  if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
839  t = _resolve_tag(t);
840  }
841  else
842  {
843  _c4dbgp("begins with '!<'");
844  size_t pos = t.find('>');
845  if(C4_UNLIKELY(pos == npos))
846  _c4err("invalid tag");
847  _set_first_strict(t, pos+1);
848  _line_progressed(t.len);
849  *orig = t;
850  t = t.sub(1);
851  }
852  _maybe_skip_whitespace_tokens();
853  return t;
854 }
855 
856 
857 //-----------------------------------------------------------------------------
858 
859 template<class EventHandler>
860 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
861 {
862  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
863  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(":-"), m_evt_handler->m_curr->pos);
864  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
865  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
866  if(s.len > 1)
867  {
868  switch(s.str[1])
869  {
870  case ' ':
871  case ',':
872  case '}':
873  case ']':
874  case '\t':
875  if(s.str[0] == ':')
876  {
877  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
878  return false;
879  }
880  else
881  {
882  _c4err("invalid scalar");
883  }
884  break;
885  case '{':
886  case '[':
887  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
888  break;
889  default:
890  break;
891  }
892  }
893  else
894  {
895  if(s.str[0] == '-')
896  _c4err("invalid scalar");
897  return false;
898  }
899  return true;
900 }
901 
902 template<class EventHandler>
903 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
904 {
905  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
906  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '?', m_evt_handler->m_curr->pos);
907  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
908  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
909  if(s.len > 1)
910  {
911  switch(s.str[1])
912  {
913  case ' ':
914  case '\t':
915  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
916  return false;
917  case '{':
918  case '}':
919  case '[':
920  case ']':
921  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
922  break;
923  default:
924  break;
925  }
926  }
927  else
928  {
929  return false;
930  }
931  return true;
932 }
933 
934 
935 template<class EventHandler>
936 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
937 {
938  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
939  // it's not a scalar if it starts with any of these characters:
940  switch(s.str[0])
941  {
942  // these are all legal tokens which mean no scalar is starting:
943  case '[':
944  case ']':
945  case '{':
946  case '}':
947  case '&':
948  case '*':
949  case '!':
950  case '|':
951  case '>':
952  case '#':
953  case ',':
954  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
955  return false;
956  // '-' and ':' are illegal at the beginning if not followed by a scalar character
957  case '-':
958  case ':':
959  _c4dbgpf("suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
960  return _is_valid_start_scalar_plain_flow_check_block_token(s);
961  case '?':
962  _c4dbgpf("qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
963  return _is_valid_start_scalar_plain_flow_check_qmrk(s);
964  // everything else is a legal starting character
965  default:
966  return true;
967  }
968 }
969 
970 
971 template<class EventHandler>
972 bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s, size_t offs)
973 {
974  _c4dbgpf("newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs), true));
975  if(s.len > offs + 1)
976  {
977  _c4dbgp("newl[PLAIN]: buffer continues");
978  csubstr next_line = s.sub(offs + 1);
979  size_t next_line_indentation = next_line.first_not_of(' ');
980  if(next_line_indentation != npos)
981  {
982  _c4dbgpf("newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
983  next_line = next_line.first(next_line.first_of("\n\r"));
984  _c4dbgpf("newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
985  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
986  if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
987  {
988  _c4dbgp("newl[PLAIN]: larger indentation");
989  next_line = next_line.sub(next_line_indentation);
990  }
991  else if(C4_UNLIKELY(next_line.len && next_line.triml(' ').len))
992  {
993  _c4dbgp("newl[PLAIN]: err, smaller indentation");
994  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
995  _line_ended();
996  _scan_line();
997  if(m_evt_handler->m_curr->line_contents.indentation != npos)
998  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
999  _c4err("parse error"); // cannot reduce indentation here
1000  }
1001  _c4dbgpf("newl[PLAIN]: next_line.len={}", next_line.len);
1002  if(next_line.len)
1003  {
1004  size_t fno = next_line.first_not_of(" \t");
1005  if(fno != csubstr::npos)
1006  {
1007  _c4assert(fno < next_line.len);
1008  switch(next_line.str[fno])
1009  {
1010  case ',': case ']': case '#':
1011  _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1012  return false;
1013  case ':': // cannot be succeeded by whitespace
1014  _c4dbgp("newl[PLAIN]: found :");
1015  if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1016  {
1017  _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1018  return false;
1019  }
1020  break;
1021  }
1022  }
1023  }
1024  }
1025  }
1026  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1027  _line_ended();
1028  _scan_line();
1029  return true;
1030 }
1031 
1032 template<class EventHandler>
1033 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1034 {
1035  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1036  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1037  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP), m_evt_handler->m_curr->pos);
1038  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1039  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1040 
1041  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1042  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1043 
1044  if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1045  return false;
1046 
1047  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1048  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1049 
1050  _c4dbgp("scanning seqflow scalar...");
1051 
1052  bool needs_filter = false;
1053  size_t col = 0; // zero-based column
1054  size_t offs = 0; // offset
1055  for( ; offs < s.len; ++offs, ++col)
1056  {
1057  const char c = s.str[offs];
1058  switch(c)
1059  {
1060  case ',':
1061  case ']':
1062  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1063  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1064  goto ended_scalar;
1065  case '\n':
1066  _c4dbgpf("found '\\n' at col={}", col);
1067  if(!_scan_scalar_plain_handle_newline(s, offs))
1068  goto ended_scalar;
1069  col = (size_t)-1; // so that col is 0 in the next loop iteration
1070  needs_filter = true;
1071  break;
1072  case '\r':
1073  --col; // don't count \r when calling _line_progressed()
1074  needs_filter = true;
1075  break;
1076  case ':':
1077  _c4dbgp("found suspicious ':'");
1078  if(s.len > offs + 1)
1079  {
1080  char next = s.str[offs + 1];
1081  _c4dbgpf("next char is '{}'", _c4prc(next));
1082  if(next == '\r')
1083  {
1084  csubstr after = s.sub(offs + 1).triml('\r');
1085  if(after.len)
1086  {
1087  next = after.str[0];
1088  _c4dbgpf("skip \\r to '{}'", _c4prc(next));
1089  }
1090  }
1091  // no else here.
1092  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t') || next == ',' || next == '\n' || next == ']')
1093  {
1094  _c4dbgp("map starting!");
1095  goto ended_scalar;
1096  }
1097  else
1098  {
1099  _c4dbgp("':' nothing to see here");
1100  }
1101  }
1102  else
1103  {
1104  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1105  _line_progressed(col);
1106  _c4err("missing termination: '{}'", c); // noreturn
1107  }
1108  break;
1109  case '#':
1110  {
1111  _c4dbgp("found suspicious '#'");
1112  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1113  char prev = s.str[offs - 1];
1114  if(prev == ' ' _RYML_WITH_TAB_TOKENS(|| prev == '\t'))
1115  {
1116  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1117  goto ended_scalar;
1118  }
1119  }
1120  break;
1121  case '[':
1122  case '{':
1123  case '}':
1124  _line_progressed(col); // advance to report the proper position in the error
1125  _c4err("invalid character: '{}'", c); // noreturn
1126  case '-':
1127  case '.':
1128  _c4dbgpf("doc token character: '{}', offs={}", c, offs);
1129  if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1130  {
1131  _c4dbgp("at line beginning");
1132  if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1133  {
1134  _c4err("parse error"); // no return
1135  }
1136  }
1137  default:
1138  ;
1139  }
1140  }
1141 
1142 ended_scalar:
1143 
1144  _line_progressed(col);
1145  _set_first(s, offs);
1146  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1147  sc->needs_filter = needs_filter;
1148 
1149  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1150 
1151  return true;
1152 }
1153 
1154 template<class EventHandler>
1155 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1156 {
1157  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1158  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1159  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP), m_evt_handler->m_curr->pos);
1160  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1161  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1162 
1163  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1164  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1165 
1166  if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1167  return false;
1168 
1169  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1170  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1171 
1172  _c4dbgp("scanning mapflow scalar...");
1173 
1174  bool needs_filter = false;
1175  size_t col = 0; // zero-based column
1176  size_t offs = 0; // offset
1177  for( ; offs < s.len; ++offs, ++col)
1178  {
1179  const char c = s.str[offs];
1180  switch(c)
1181  {
1182  case ',':
1183  case '}':
1184  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1185  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1186  goto ended_scalar;
1187  case '\n':
1188  _c4dbgpf("found '\\n' at col={}", col);
1189  if(!_scan_scalar_plain_handle_newline(s, offs))
1190  goto ended_scalar;
1191  col = (size_t)-1; // so that col is 0 in the next loop iteration
1192  needs_filter = true;
1193  break;
1194  case '\r':
1195  --col; // don't count \r when calling _line_progressed()
1196  needs_filter = true;
1197  break;
1198  case ':':
1199  _c4dbgpf("found ':'", c);
1200  if(s.len == offs+1)
1201  break;
1202  {
1203  const char next = s.str[offs+1];
1204  _c4dbgpf("next='{}'", c);
1205  if(next == ' ' || next == ',' || next == '}' || next == '\n' || next == '\r' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
1206  {
1207  _c4dbgpf("found terminating character: '{}'", c);
1208  goto ended_scalar;
1209  }
1210  }
1211  break;
1212  case '{':
1213  case '[':
1214  _line_progressed(col);
1215  _c4err("invalid character: '{}'", c); // noreturn
1216  break;
1217  case ']':
1218  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1219  goto ended_scalar;
1220  default:
1221  ;
1222  }
1223  }
1224 
1225 ended_scalar:
1226 
1227  _line_progressed(col);
1228  s = s.first(offs);
1229  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1230  sc->needs_filter = needs_filter;
1231 
1232  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1233 
1234  return sc->scalar.len > 0u;
1235 }
1236 
1237 template<class EventHandler>
1238 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1239 {
1240  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1241  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1242  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1243  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1244 
1245  substr s = m_evt_handler->m_curr->line_contents.rem;
1246  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1247  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1248 
1249  _c4dbgp("seq_json: scanning scalar...");
1250 
1251  switch(s.str[0])
1252  {
1253  case ']':
1254  case '{':
1255  case ',':
1256  _c4dbgp("seq_json: not a scalar.");
1257  return false;
1258  }
1259 
1260  {
1261  const size_t len = _begins_with_special_json_scalar(s);
1262  if(len)
1263  {
1264  char c = s.len > len ? s.str[len] : ',';
1265  if(c == ',' || c == ']' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1266  {
1267  sc->scalar = s.first(len);
1268  sc->needs_filter = false;
1269  _c4dbgpf("seq_json: special scalar: '{}'", sc->scalar);
1270  _line_progressed(len);
1271  return true;
1272  }
1273  else
1274  {
1275  return false;
1276  }
1277  }
1278  }
1279 
1280  // must be a number or special scalar
1281  size_t i = 0;
1282  for( ; i < s.len; ++i)
1283  {
1284  const char c = s.str[i];
1285  switch(c)
1286  {
1287  case ',':
1288  case ']':
1289  case ' ':
1290  case '\t':
1291  _c4dbgpf("seq_json: found terminating character: '{}'", c);
1292  goto ended_scalar;
1293  default:
1294  ;
1295  }
1296  }
1297 
1298 ended_scalar:
1299 
1300  if(C4_LIKELY(i > 0))
1301  {
1302  _line_progressed(i);
1303  sc->scalar = s.first(i);
1304  sc->needs_filter = false;
1305  _c4dbgpf("seq_json: scalar was {}", _prs(sc->scalar, /*escape*/true));
1306  }
1307 
1308  return true;
1309 }
1310 
1311 template<class EventHandler>
1312 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1313 {
1314  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1315  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1316  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1317  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1318  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL), m_evt_handler->m_curr->pos);
1319 
1320  substr s = m_evt_handler->m_curr->line_contents.rem;
1321  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1322  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1323 
1324  _c4dbgp("scanning scalar...");
1325 
1326  {
1327  const size_t len = _begins_with_special_json_scalar(s);
1328  if(len)
1329  {
1330  char c = s.len > len ? s.str[len] : ',';
1331  _c4dbgpf("begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1332  if(c == ',' || c == '}' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1333  {
1334  sc->scalar = s.first(len);
1335  sc->needs_filter = false;
1336  _c4dbgpf("special json scalar: '{}'", _prs(sc->scalar));
1337  _line_progressed(len);
1338  return true;
1339  }
1340  else
1341  {
1342  return false;
1343  }
1344  }
1345  }
1346 
1347  // must be a number
1348  size_t i = 0;
1349  for( ; i < s.len; ++i)
1350  {
1351  const char c = s.str[i];
1352  switch(c)
1353  {
1354  case ',':
1355  case '}':
1356  case ' ':
1357  case '\t':
1358  _c4dbgpf("found terminating character: '{}'", c);
1359  goto ended_scalar;
1360  default:
1361  ;
1362  }
1363  }
1364 
1365 ended_scalar:
1366 
1367  if(C4_LIKELY(i > 0))
1368  {
1369  _line_progressed(i);
1370  sc->scalar = s.first(i);
1371  sc->needs_filter = false;
1372  _c4dbgpf("scalar was {}", _prs(sc->scalar));
1373  return true;
1374  }
1375 
1376  return false;
1377 }
1378 
1379 template<class EventHandler>
1380 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1381 {
1382  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '-', m_evt_handler->m_curr->pos);
1383  return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1384 }
1385 
1386 template<class EventHandler>
1387 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1388 {
1389  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '.', m_evt_handler->m_curr->pos);
1390  return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1391 }
1392 
1393 template<class EventHandler>
1394 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1395 {
1396  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1397  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1398  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY), m_evt_handler->m_curr->pos);
1399 
1400  substr s = m_evt_handler->m_curr->line_contents.rem;
1401  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1402  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1403 
1404  switch(s.str[0])
1405  {
1406  case '-':
1407  if(_is_blck_token(s))
1408  {
1409  return false;
1410  }
1411  else if(_is_doc_begin(s))
1412  {
1413  _c4dbgp("token is doc start");
1414  return false;
1415  }
1416  break;
1417  case ':':
1418  case '?':
1419  if(_is_blck_token(s))
1420  return false;
1421  break;
1422  case '[':
1423  case '{':
1424  case '&':
1425  case '*':
1426  case '!':
1427  case '\t':
1428  case ',':
1429  case '%':
1430  return false;
1431  case '.':
1432  if(_is_doc_end(s))
1433  {
1434  _c4dbgp("token is doc end");
1435  return false;
1436  }
1437  break;
1438  }
1439 
1440  _c4dbgpf("plain scalar! indentation={}", indentation);
1441 
1442  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1443  const size_t start_line = m_evt_handler->m_curr->pos.line;
1444 
1445  bool needs_filter = false;
1446  while(true)
1447  {
1448  _c4dbgpf("plain scalar line: {}", _prs(s));
1449  for(size_t i = 0; i < s.len; ++i)
1450  {
1451  const char curr = s.str[i];
1452  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1453  switch(curr)
1454  {
1455  case ':':
1456  _c4dbgpf("[{}]: got suspicious ':'", i);
1457  // are there more characters?
1458  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1459  {
1460  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1461  _line_progressed(i);
1462  // ': ' is accepted only on the first line
1463  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1464  {
1465  _c4dbgp("start line. scalar ends here");
1466  goto ended_scalar;
1467  }
1468  else
1469  {
1470  _c4err("multiline scalars cannot be used as implicit keys");
1471  }
1472  }
1473  else
1474  {
1475  size_t j = i;
1476  while(j + 1 < s.len && s.str[j+1] == ':')
1477  {
1478  _c4dbgp("skip colon");
1479  ++j;
1480  }
1481  i = j > i ? j-1 : i;
1482  _c4dbgp("nothing to see here");
1483  }
1484  break;
1485  case '#':
1486  _c4dbgp("got suspicious '#'");
1487  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1488  {
1489  _c4dbgp("comment! scalar ends here");
1490  _line_progressed(i);
1491  goto ended_scalar;
1492  }
1493  else
1494  {
1495  _c4dbgp("nothing to see here");
1496  }
1497  break;
1498  }
1499  }
1500  _line_progressed(s.len);
1501  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1502  next_peeked = next_peeked.trimr("\n\r");
1503  const size_t next_indentation = next_peeked.first_not_of(' ');
1504  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1505  if(next_indentation < indentation)
1506  {
1507  _c4dbgp("smaller indentation! scalar ended");
1508  goto ended_scalar;
1509  }
1510  else if(next_indentation == 0 && next_peeked.len > 0)
1511  {
1512  const char first = next_peeked.str[0];
1513  switch(first)
1514  {
1515  case '-':
1516  _c4dbgpf("doc begin? peeked={}", _prs(next_peeked, size_t(3)));
1517  if(_is_doc_begin_token(next_peeked))
1518  {
1519  _c4dbgp("doc begin! scalar ended");
1520  goto ended_scalar;
1521  }
1522  break;
1523  case '.':
1524  _c4dbgpf("doc end? peeked={}", _prs(next_peeked, size_t(3)));
1525  if(_is_doc_end_token(next_peeked))
1526  {
1527  _c4dbgp("doc end! scalar ended");
1528  goto ended_scalar;
1529  }
1530  break;
1531  }
1532  }
1533  // load with next line
1534  _c4dbgp("next line!");
1535  if(!_finished_file())
1536  {
1537  _c4dbgp("next line!");
1538  _line_ended();
1539  _scan_line();
1540  }
1541  else
1542  {
1543  _c4dbgp("file finished!");
1544  goto ended_scalar;
1545  }
1546  s = m_evt_handler->m_curr->line_contents.rem;
1547  needs_filter = true;
1548  }
1549 
1550 ended_scalar:
1551 
1552  sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1553  sc->needs_filter = needs_filter;
1554 
1555  _c4dbgpf("scalar was {}", _prs(sc->scalar));
1556 
1557  return true;
1558 }
1559 
1560 template<class EventHandler>
1561 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1562 {
1563  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1564  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1565  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1566  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1567  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1568  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1569  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1570 }
1571 
1572 template<class EventHandler>
1573 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1574 {
1575  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1576  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1577  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1578  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1579  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1580  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1581 }
1582 
1583 template<class EventHandler>
1584 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1585 {
1586  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY), m_evt_handler->m_curr->pos);
1587  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1588 }
1589 
1590 
1591 //-----------------------------------------------------------------------------
1592 
1593 template<class EventHandler>
1594 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1595 {
1596  substr rem{}; // declare here because of the goto
1597  size_t nlpos{}; // declare here because of the goto
1598  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1599  if(pos >= _buf().len)
1600  goto next_is_empty;
1601 
1602  // look for the next newline chars, and jump to the right of those
1603  rem = _from_next_line(_buf().sub(pos));
1604  if(rem.empty())
1605  goto next_is_empty;
1606 
1607  // now get everything up to and including the following newline chars
1608  nlpos = rem.first_of("\r\n");
1609  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1610  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1611  rem = rem.left_of(nlpos, /*include_pos*/true);
1612 
1613  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1614  return rem;
1615 
1616 next_is_empty:
1617  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1618  return {};
1619 }
1620 
1621 //-----------------------------------------------------------------------------
1622 
1623 template<class EventHandler>
1624 void ParseEngine<EventHandler>::_scan_line()
1625 {
1626  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1627  m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1628  else
1629  m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1630 }
1631 
1632 template<class EventHandler>
1633 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1634 {
1635  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1636  m_evt_handler->m_curr->pos.line,
1637  m_evt_handler->m_curr->line_contents.full.len,
1638  ahead, m_evt_handler->m_curr->pos.col,
1639  m_evt_handler->m_curr->pos.col+ahead,
1640  m_evt_handler->m_curr->pos.offset,
1641  m_evt_handler->m_curr->pos.offset+ahead);
1642  m_evt_handler->m_curr->pos.offset += ahead;
1643  m_evt_handler->m_curr->pos.col += ahead;
1644  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1645  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1646 }
1647 
1648 template<class EventHandler>
1649 void ParseEngine<EventHandler>::_line_ended()
1650 {
1651  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1652  m_evt_handler->m_curr->pos.line,
1653  m_evt_handler->m_curr->line_contents.full.len,
1654  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1655  m_evt_handler->m_curr->pos.col, 1);
1656  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1657  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1658  ++m_evt_handler->m_curr->pos.line;
1659  m_evt_handler->m_curr->pos.col = 1;
1660 }
1661 
1662 template<class EventHandler>
1663 void ParseEngine<EventHandler>::_line_ended_undo()
1664 {
1665  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1666  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1667  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1668  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1669  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1670  m_evt_handler->m_curr->pos.offset -= delta;
1671  --m_evt_handler->m_curr->pos.line;
1672  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1673  // don't forget to undo also the changes to the remainder of the line
1674  //_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= _buf().len || _buf()[m_evt_handler->m_curr->pos.offset] == '\n' || _buf()[m_evt_handler->m_curr->pos.offset] == '\r', m_evt_handler->m_curr->pos);
1675  m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1676 }
1677 
1678 
1679 //-----------------------------------------------------------------------------
1680 template<class EventHandler>
1681 void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
1682 {
1683  m_evt_handler->m_curr->indref = indentation;
1684  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1685 }
1686 
1687 template<class EventHandler>
1688 void ParseEngine<EventHandler>::_save_indentation()
1689 {
1690  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1691  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1692  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1693 }
1694 
1695 template<class EventHandler>
1696 void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1697 {
1698  _c4dbgpf("SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1699  m_prev_val_end = m_evt_handler->m_curr->pos.line;
1700 }
1701 
1702 
1703 //-----------------------------------------------------------------------------
1704 
1705 template<class EventHandler>
1706 void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1707 {
1708  _c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1709  m_evt_handler->actually_val_is_first_key_of_new_map_block();
1710  addrem_flags(RMAP|RVAL|RBLCK, RKCL|RSEQ|RUNK);
1711  _set_indentation(orig_indent);
1712  _maybe_skip_whitespace_tokens();
1713 }
1714 
1715 template<class EventHandler>
1716 void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent, bool multiline)
1717 {
1718  // this is called AFTER ending the flow container,
1719  // so now we're at the parent container's scope
1720  if(has_all(RMAP|RBLCK) && has_none(RKCL|RVAL|RNXT))
1721  {
1722  _c4dbgp("flow container: end as vanilla block map key!");
1723  if(C4_UNLIKELY(multiline))
1724  _c4err("multiline key is invalid");
1725  if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1726  _c4err("could not find ':' colon after key");
1727  _maybe_skip_whitespace_tokens();
1728  addrem_flags(RVAL, RKEY|RKCL|RNXT);
1729  }
1730  else if(has_none(RFLOW))
1731  {
1732  _c4dbgp("end_flow_container: now not in flow!");
1733  if(has_any(RUNK|RSEQ|RKCL) && _maybe_scan_following_colon())
1734  {
1735  if(C4_UNLIKELY(multiline))
1736  _c4err("multiline key is invalid");
1737  _flow_container_was_a_key(orig_indent);
1738  }
1739  else
1740  {
1741  _c4dbgp("end_flow_container: end map as key!");
1742  }
1743  }
1744  else if(has_any(RSEQ))
1745  {
1746  _c4dbgp("end_flow_container: now in a flow seq");
1747  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1748  _mark_seqflow_val_end();
1749  }
1750 }
1751 
1752 template<class EventHandler>
1753 void ParseEngine<EventHandler>::_end_map_flow()
1754 {
1755  bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1756  size_t orig_indent = m_evt_handler->m_curr->indref;
1757  _c4dbgpf("mapflow: end, multiline={}", multiline);
1758  m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1759  _end_flow_container(orig_indent, multiline);
1760 }
1761 
1762 template<class EventHandler>
1763 void ParseEngine<EventHandler>::_end_seq_flow()
1764 {
1765  bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1766  size_t orig_indent = m_evt_handler->m_curr->indref;
1767  _c4dbgpf("seqflow: end, multiline={}", multiline);
1768  m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1769  _end_flow_container(orig_indent, multiline);
1770 }
1771 
1772 template<class EventHandler>
1773 void ParseEngine<EventHandler>::_end_map_blck()
1774 {
1775  _c4dbgp("mapblck: end");
1776  if(has_any(RKCL|RVAL))
1777  {
1778  _c4dbgp("mapblck: set missing val");
1779  _handle_annotations_before_blck_val_scalar();
1780  m_evt_handler->set_val_scalar_plain_empty();
1781  }
1782  else if(has_any(QMRK))
1783  {
1784  _c4dbgp("mapblck: set missing keyval");
1785  _handle_annotations_before_blck_key_scalar();
1786  m_evt_handler->set_key_scalar_plain_empty();
1787  _handle_annotations_before_blck_val_scalar();
1788  m_evt_handler->set_val_scalar_plain_empty();
1789  }
1790  m_evt_handler->end_map_block();
1791 }
1792 
1793 template<class EventHandler>
1794 void ParseEngine<EventHandler>::_end_seq_blck()
1795 {
1796  if(has_any(RVAL))
1797  {
1798  _c4dbgp("seqblck: set missing val");
1799  _handle_annotations_before_blck_val_scalar();
1800  m_evt_handler->set_val_scalar_plain_empty();
1801  }
1802  m_evt_handler->end_seq_block();
1803 }
1804 
1805 template<class EventHandler>
1806 void ParseEngine<EventHandler>::_end2_map()
1807 {
1808  _c4dbgp("map: end");
1809  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1810  if(has_any(RBLCK))
1811  {
1812  _end_map_blck();
1813  }
1814  else
1815  {
1816  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1817  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1818  m_evt_handler->_pop();
1819  }
1820 }
1821 
1822 template<class EventHandler>
1823 void ParseEngine<EventHandler>::_end2_seq()
1824 {
1825  _c4dbgp("seq: end");
1826  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1827  if(has_any(RBLCK))
1828  {
1829  _end_seq_blck();
1830  }
1831  else
1832  {
1833  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1834  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1835  m_evt_handler->_pop();
1836  }
1837 }
1838 
1839 template<class EventHandler>
1840 void ParseEngine<EventHandler>::_begin2_doc()
1841 {
1842  _c4dbgp("begin_doc");
1843  m_has_directives_yaml = false;
1844  m_has_directives = false;
1845  m_doc_empty = true;
1846  add_flags(RDOC);
1847  m_evt_handler->begin_doc();
1848  m_evt_handler->m_curr->indref = 0; // ?
1849 }
1850 
1851 template<class EventHandler>
1852 void ParseEngine<EventHandler>::_begin2_doc_expl()
1853 {
1854  _c4dbgp("begin_doc_expl");
1855  m_has_directives_yaml = false;
1856  m_has_directives = false;
1857  m_doc_empty = true;
1858  add_flags(RDOC);
1859  m_evt_handler->begin_doc_expl();
1860  m_evt_handler->m_curr->indref = 0; // ?
1861 }
1862 
1863 template<class EventHandler>
1864 void ParseEngine<EventHandler>::_end2_doc()
1865 {
1866  _c4dbgp("doc: end");
1867  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1868  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1869  {
1870  _c4dbgp("doc was empty; add empty val");
1871  _handle_annotations_before_blck_val_scalar();
1872  m_evt_handler->set_val_scalar_plain_empty();
1873  }
1874  m_evt_handler->end_doc();
1875  m_bom_len = 0;
1876 }
1877 
1878 template<class EventHandler>
1879 void ParseEngine<EventHandler>::_end2_doc_expl()
1880 {
1881  _c4dbgp("doc: end");
1882  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1883  {
1884  _c4dbgp("doc: no children; add empty val");
1885  _handle_annotations_before_blck_val_scalar();
1886  m_evt_handler->set_val_scalar_plain_empty();
1887  }
1888  m_evt_handler->end_doc_expl();
1889  m_bom_len = 0;
1890 }
1891 
1892 template<class EventHandler>
1893 void ParseEngine<EventHandler>::_maybe_begin_doc()
1894 {
1895  if(has_none(RDOC))
1896  {
1897  _c4dbgp("doc must be started");
1898  _begin2_doc();
1899  }
1900 }
1901 template<class EventHandler>
1902 void ParseEngine<EventHandler>::_maybe_end_doc()
1903 {
1904  if(has_any(RDOC))
1905  {
1906  _c4dbgp("doc must be finished");
1907  _end2_doc();
1908  }
1909  else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1910  {
1911  _c4dbgp("no doc to finish, but pending annotations");
1912  m_evt_handler->begin_doc();
1913  _handle_annotations_before_blck_val_scalar();
1914  m_evt_handler->set_val_scalar_plain_empty();
1915  m_evt_handler->end_doc();
1916  }
1917 }
1918 
1919 template<class EventHandler>
1920 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1921 {
1922  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1923  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags & RDOC, m_evt_handler->m_curr->pos);
1924  _c4dbgp("root is RDOC");
1925  if(m_evt_handler->m_curr->level != 0)
1926  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1927  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1928 }
1929 
1930 /** Check whether the current parse tokens are trailing on the
1931  * previous doc, and raise an error if they are */
1932 template<class EventHandler>
1933 void ParseEngine<EventHandler>::_check_trailing_doc_token()
1934 {
1935  const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1936  const bool isndoc = (m_evt_handler->m_curr->flags & NDOC) != 0;
1937  const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1938  _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1939  if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1940  _c4err("parse error");
1941 }
1942 
1943 template<class EventHandler>
1944 void ParseEngine<EventHandler>::_end_doc_suddenly()
1945 {
1946  _c4dbgp("end doc suddenly");
1947  _end_doc_suddenly__pop();
1948  _end2_doc_expl();
1949  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1950 }
1951 
1952 template<class EventHandler>
1953 void ParseEngine<EventHandler>::_check_doc_end_tokens() const
1954 {
1955  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1956  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(". \t"), m_evt_handler->m_curr->pos);
1957  if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
1958  {
1959  _c4err("parse error");
1960  }
1961 }
1962 
1963 template<class EventHandler>
1964 void ParseEngine<EventHandler>::_start_doc_suddenly()
1965 {
1966  _c4dbgp("start doc suddenly");
1967  _end_doc_suddenly__pop();
1968  _end2_doc();
1969  _begin2_doc_expl();
1970 }
1971 
1972 template<class EventHandler>
1973 void ParseEngine<EventHandler>::_end_stream()
1974 {
1975  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1976  if(C4_UNLIKELY(has_all(RSEQ|RFLOW)))
1977  _c4err("missing terminating ]");
1978  else if(C4_UNLIKELY(has_all(RMAP|RFLOW)))
1979  _c4err("missing terminating }");
1980  if(m_evt_handler->m_stack.size() > 1)
1981  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1982  if(has_all(RDOC))
1983  {
1984  _end2_doc();
1985  }
1986  else if(has_all(RTOP|RUNK))
1987  {
1988  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1989  {
1990  if(m_doc_empty)
1991  {
1992  m_evt_handler->begin_doc();
1993  _handle_annotations_before_blck_val_scalar();
1994  m_evt_handler->set_val_scalar_plain_empty();
1995  m_evt_handler->end_doc();
1996  }
1997  }
1998  }
1999  m_evt_handler->end_stream();
2000  if(C4_UNLIKELY(m_has_directives))
2001  _c4err("directives cannot be used without a document");
2002 }
2003 
2004 
2005 template<class EventHandler>
2006 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
2007 {
2008  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2009  while(m_evt_handler->m_curr != popto)
2010  {
2011  if(has_any(RSEQ))
2012  {
2013  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2014  _end2_seq();
2015  }
2016  else if(has_any(RMAP))
2017  {
2018  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2019  _end2_map();
2020  }
2021  else
2022  {
2023  break;
2024  }
2025  }
2026  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2027 }
2028 
2029 template<class EventHandler>
2030 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2031 {
2032  // search the stack frame to jump to based on its indentation
2033  using state_type = typename EventHandler::state;
2034  state_type const* popto = nullptr;
2035  auto &stack = m_evt_handler->m_stack;
2036  _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2037  _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2038  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2039  #ifdef RYML_DBG
2040  _print_state_stack();
2041  #endif
2042  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2043  {
2044  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2045  if(s->indref == ind)
2046  {
2047  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
2048  popto = s;
2049  break;
2050  }
2051  }
2052  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2053  {
2054  _c4err("parse error: incorrect indentation?");
2055  }
2056  _handle_indentation_pop(popto);
2057 }
2058 
2059 template<class EventHandler>
2060 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2061 {
2062  // search the stack frame to jump to based on its indentation
2063  using state_type = typename EventHandler::state;
2064  auto &stack = m_evt_handler->m_stack;
2065  _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2066  _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2067  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2068  state_type const* popto = nullptr;
2069  #ifdef RYML_DBG
2070  char flagbuf_[128];
2071  _print_state_stack(flagbuf_);
2072  #endif
2073  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
2074  {
2075  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2076  if(s->indref < ind)
2077  {
2078  break;
2079  }
2080  else if(s->indref == ind)
2081  {
2082  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
2083  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
2084  {
2085  break;
2086  }
2087  popto = s;
2088  if(has_all(RSEQ|RBLCK, s))
2089  {
2090  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2091  const size_t first = rem.first_not_of(' ');
2092  _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first == npos, m_evt_handler->m_curr->pos);
2093  rem = rem.right_of(first, true);
2094  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
2095  if(rem.begins_with('-') && _is_blck_token(rem))
2096  {
2097  _c4dbgp("parent was indentless seq");
2098  break;
2099  }
2100  }
2101  }
2102  }
2103  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2104  {
2105  _c4err("parse error: incorrect indentation?");
2106  }
2107  _handle_indentation_pop(popto);
2108 }
2109 
2110 
2111 //-----------------------------------------------------------------------------
2112 template<class EventHandler>
2113 void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2114 {
2115  if(C4_UNLIKELY(has_all(RMAP|RBLCK|RKEY)))
2116  {
2117  _c4err("multiline quoted keys are invalid");
2118  }
2119  else // check contextual indentation
2120  {
2121  const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(RMAP|RSEQ) && has_any(RBLCK)));
2122  _c4dbgpf("indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2123  if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2124  {
2125  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2126  m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(' '),
2127  m_evt_handler->m_curr->pos);
2128  csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2129  _c4dbgpf("trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem, true));
2130  if(C4_UNLIKELY(!!trimmed.len))
2131  {
2132  _c4err("bad indentation");
2133  }
2134  }
2135  }
2136 }
2137 
2138 
2139 //-----------------------------------------------------------------------------
2140 template<class EventHandler>
2141 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2142 {
2143  // quoted scalars can spread over multiple lines!
2144  // nice explanation here: http://yaml-multiline.info/
2145 
2146  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('\''), m_evt_handler->m_curr->pos);
2147 
2148  // a span to the end of the file, skipping the opening quote
2149  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2150  _line_progressed(1); // advance over the opening quote
2151  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2152 
2153  bool needs_filter = false;
2154  size_t pos = npos; // find the pos of the matching quote
2155  while( ! _finished_file())
2156  {
2157  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2158  _c4dbgpf("scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2159  if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2160  _c4err("token can not appear at line begin");
2161  for(size_t i = 0; i < line.len; ++i)
2162  {
2163  const char curr = line.str[i];
2164  if(curr == '\'') // single quotes are escaped with two single quotes
2165  {
2166  const char next = i+1 < line.len ? line.str[i+1] : '~';
2167  if(next != '\'') // so just look for the first quote
2168  { // without another after it
2169  _line_progressed(i + 1); // progress beyond the quote
2170  pos = i + (size_t)(line.str - s.str); // set pos to before the quote
2171  goto found_close;
2172  }
2173  else
2174  {
2175  needs_filter = true; // needs filter to remove escaped quotes
2176  ++i; // skip the escaped quote
2177  }
2178  }
2179  }
2180 
2181  needs_filter = true;
2182  _line_progressed(line.len);
2183  _line_ended();
2184  _scan_line();
2185  _check_valid_newline_in_quoted_scalar();
2186  }
2187 
2188  _c4err("reached end of file while looking for closing quote");
2189 
2190 found_close:
2191 
2192  _c4dbgpf("found closing quote at: {}", pos);
2193  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2194  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2195  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2196  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2197  _set_first_strict(s, pos);
2198 
2199  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
2200 
2201  return ScannedScalar { s, needs_filter };
2202 }
2203 
2204 
2205 //-----------------------------------------------------------------------------
2206 template<class EventHandler>
2207 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2208 {
2209  // quoted scalars can spread over multiple lines!
2210  // nice explanation here: http://yaml-multiline.info/
2211 
2212  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('"'), m_evt_handler->m_curr->pos);
2213 
2214  // a span to the end of the file, skipping the opening quote
2215  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2216  _line_progressed(1); // advance over the opening quote
2217  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2218 
2219  bool needs_filter = false;
2220  size_t pos = npos; // find the pos of the matching quote
2221  while( ! _finished_file())
2222  {
2223  #if defined(__GNUC__) && (/*__GNUC__ == 12 || */__GNUC__ == 13)
2224  C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem); // prevent hoisting
2225  #endif
2226  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2227  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2228  if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2229  _c4err("token can not appear at line begin");
2230  for(size_t i = 0; i < rem.len; ++i)
2231  {
2232  const char curr = rem.str[i];
2233  // every \ is an escape
2234  if(curr == '\\')
2235  {
2236  const char next = i+1 < rem.len ? rem.str[i+1] : '~';
2237  needs_filter = true;
2238  if(next == '"' || next == '\\')
2239  ++i;
2240  }
2241  else if(curr == '"')
2242  {
2243  _line_progressed(i + 1); // progress beyond the quote
2244  pos = i + (size_t)(rem.str - s.str); // set pos to before the quote
2245  goto found_close;
2246  }
2247  }
2248 
2249  // leading whitespace also needs filtering
2250  needs_filter = true;
2251  _line_progressed(rem.len);
2252  _line_ended();
2253  _scan_line();
2254  _check_valid_newline_in_quoted_scalar();
2255  }
2256 
2257  _c4err("reached end of file while looking for closing quote");
2258 
2259 found_close:
2260 
2261  _c4dbgpf("found closing quote at: {}", pos);
2262  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2263  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2264  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2265  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2266  _set_first_strict(s, pos);
2267 
2268  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2269 
2270  return ScannedScalar{s, needs_filter};
2271 }
2272 
2273 
2274 //-----------------------------------------------------------------------------
2275 template<class EventHandler>
2276 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2277 {
2278  _c4dbgpf("blck: indref={}", indref);
2279  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref != npos, m_evt_handler->m_curr->pos);
2280 
2281  // nice explanation here: http://yaml-multiline.info/
2282  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2283  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'), m_evt_handler->m_curr->pos);
2284 
2285  _c4dbgpf("blck: specs={}", _prs(s));
2286 
2287  // parse the spec
2288  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2289  size_t indentation = npos; // have to find out if no spec is given
2290  if(s.len > 1)
2291  {
2292  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"), m_evt_handler->m_curr->pos);
2293  csubstr t = s.sub(1);
2294  _c4dbgpf("blck: spec is multichar: '{}'", t);
2295  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2296  size_t pos = t.first_of("-+");
2297  _c4dbgpf("blck: spec chomp char at {}", pos);
2298  if(pos != npos)
2299  {
2300  if(t[pos] == '-')
2301  chomp = CHOMP_STRIP;
2302  else if(t[pos] == '+')
2303  chomp = CHOMP_KEEP;
2304  if(pos == 0)
2305  t = t.sub(1);
2306  else
2307  t = t.first(pos);
2308  }
2309  // from here to the end, only digits are considered
2310  pos = t.first_not_of("0123456789");
2311  csubstr digits = t.first(pos);
2312  if( ! digits.empty())
2313  {
2314  if(C4_UNLIKELY(digits.len > 1))
2315  _c4err("parse error: invalid indentation");
2316  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2317  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2318  _c4err("parse error: could not read indentation as decimal"); // LCOV_EXCL_LINE
2319  if(C4_UNLIKELY( ! indentation))
2320  _c4err("parse error: null indentation");
2321  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2322  indentation += m_evt_handler->m_curr->indref;
2323  }
2324  else
2325  {
2326  if(C4_UNLIKELY(t.len && (!t.begins_with_any(" \t") || !t.sub(pos).triml(" \t").begins_with('#'))))
2327  _c4err("parse error: invalid token");
2328  }
2329  }
2330 
2331  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2332 
2333  // finish the current line
2334  _line_progressed(s.len);
2335  _line_ended();
2336  _scan_line();
2337 
2338  // start with a zero-length block, already pointing at the right place
2339  substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset, size_t(0));
2340  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2341 
2342  // read every full line into a raw block,
2343  // from which newlines are to be stripped as needed.
2344  //
2345  // If no explicit indentation was given, pick it from the first
2346  // non-empty line. See
2347  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2348  size_t num_lines = 0;
2349  size_t first = m_evt_handler->m_curr->pos.line;
2350  size_t provisional_indentation = npos;
2351  LineContents lc;
2352  while(( ! _finished_file()))
2353  {
2354  // peek next line, but do not advance immediately
2355  lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2356  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2357  C4_DONT_OPTIMIZE(lc.rem);
2358  #endif
2359  _c4dbgpf("blck: peeking at {}", _prs(lc.rem.trimr("\r\n"), true));
2360  // evaluate termination conditions
2361  if(indentation != npos)
2362  {
2363  _c4dbgpf("blck: indentation={}", indentation);
2364  // stop when the line is deindented and not empty
2365  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2366  {
2367  if(raw_block.len)
2368  {
2369  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2370  }
2371  else
2372  {
2373  _c4err("indentation decreased without any scalar");
2374  }
2375  break;
2376  }
2377  else if(indentation == 0)
2378  {
2379  _c4dbgpf("blck: noindent. lc.rem={}", _prs(lc.rem));
2380  if(_is_doc_token(lc.rem))
2381  {
2382  _c4dbgp("blck: stop. indentation=0 and doc ended");
2383  break;
2384  }
2385  }
2386  }
2387  else
2388  {
2389  const size_t fns = lc.rem.first_not_of(' ');
2390  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2391  if(fns != npos) // non-empty line
2392  {
2393  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2394  if(C4_UNLIKELY(lc.full.begins_with('\t')))
2395  _c4err("parse error");
2396  if(provisional_indentation == npos)
2397  {
2398  if(lc.indentation < indref)
2399  {
2400  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2401  if(raw_block.len == 0)
2402  {
2403  _c4dbgp("blck: was empty, undo next line");
2404  _line_ended_undo();
2405  }
2406  break;
2407  }
2408  else if(lc.indentation == m_evt_handler->m_curr->indref)
2409  {
2410  if(has_any(RSEQ|RMAP))
2411  {
2412  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2413  break;
2414  }
2415  }
2416  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2417  indentation = lc.indentation;
2418  }
2419  else
2420  {
2421  if(lc.indentation >= provisional_indentation)
2422  {
2423  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2424  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2425  indentation = lc.indentation;
2426  }
2427  else
2428  {
2429  if(lc.indentation >= indref)
2430  _c4err("parse error: first non-empty block line should have at least the original indentation");
2431  _c4dbgp("blck: finished");
2432  break;
2433  }
2434  }
2435  }
2436  else // empty line
2437  {
2438  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2439  if(provisional_indentation != npos)
2440  {
2441  if(lc.rem.len >= provisional_indentation)
2442  {
2443  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2444  provisional_indentation = lc.rem.len;
2445  }
2446  }
2447  else
2448  {
2449  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2450  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2451  if(provisional_indentation == npos)
2452  {
2453  provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2454  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2455  }
2456  if(provisional_indentation < indref)
2457  {
2458  provisional_indentation = indref;
2459  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2460  }
2461  }
2462  }
2463  }
2464  // advance now that we know the folded scalar continues
2465  m_evt_handler->m_curr->line_contents = lc;
2466  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2467  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2468  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2469  _line_ended();
2470  ++num_lines;
2471  }
2472  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2473  C4_UNUSED(num_lines);
2474  C4_UNUSED(first);
2475 
2476  if(indentation == npos)
2477  {
2478  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2479  indentation = provisional_indentation;
2480  }
2481 
2482  if(num_lines)
2483  _line_ended_undo();
2484 
2485  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2486 
2487  sb->scalar = raw_block;
2488  sb->indentation = indentation;
2489  sb->chomp = chomp;
2490 }
2491 
2492 
2493 //-----------------------------------------------------------------------------
2494 //-----------------------------------------------------------------------------
2495 //-----------------------------------------------------------------------------
2496 /** @cond dev */
2497 
2498 // a debugging scaffold:
2499 #if 0
2500 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2501 #else
2502 #define _c4dbgfws(...)
2503 #endif
2504 
2505 template<class EventHandler>
2506 template<class FilterProcessor>
2507 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2508 {
2509  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2510  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t', m_evt_handler->m_curr->pos);
2511 
2512  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2513  if(first_pos != npos)
2514  {
2515  const char first_char = proc.src[first_pos];
2516  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2517  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2518  {
2519  _c4dbgfws("whitespace is trailing on line", "");
2520  proc.skip(first_pos - proc.rpos);
2521  }
2522  else // a legit whitespace
2523  {
2524  proc.copy();
2525  _c4dbgfws("legit whitespace. sofar={}", _prs(proc.sofar()));
2526  }
2527  return true;
2528  }
2529  _c4dbgfws("whitespace is trailing on line", "");
2530  return false;
2531 }
2532 
2533 template<class EventHandler>
2534 template<class FilterProcessor>
2535 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2536 {
2537  if(!_filter_ws_handle_to_first_non_space(proc))
2538  {
2539  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2540  proc.copy(proc.src.len - proc.rpos);
2541  }
2542 }
2543 
2544 template<class EventHandler>
2545 template<class FilterProcessor>
2546 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2547 {
2548  if(!_filter_ws_handle_to_first_non_space(proc))
2549  {
2550  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2551  proc.skip(proc.src.len - proc.rpos);
2552  }
2553 }
2554 
2555 #undef _c4dbgfws
2556 
2557 
2558 //-----------------------------------------------------------------------------
2559 //-----------------------------------------------------------------------------
2560 //-----------------------------------------------------------------------------
2561 /* plain scalars */
2562 
2563 // a debugging scaffold:
2564 #if 0
2565 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2566 #else
2567 #define _c4dbgfps(fmt, ...)
2568 #endif
2569 
2570 template<class EventHandler>
2571 template<class FilterProcessor>
2572 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2573 {
2574  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2575 
2576  _c4dbgfps("found newline. sofar={}", _prs(proc.sofar()));
2577  size_t ii = proc.rpos;
2578  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2579  if(numnl_following)
2580  {
2581  proc.set('\n', numnl_following);
2582  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2583  }
2584  else
2585  {
2586  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2587  if(ret != npos)
2588  {
2589  proc.set(' ');
2590  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2591  }
2592  else
2593  {
2594  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2595  ii = proc.src.len;
2596  }
2597  }
2598  proc.rpos = ii;
2599 }
2600 
2601 template<class EventHandler>
2602 template<class FilterProcessor>
2603 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2604 {
2605  _RYML_ASSERT_PARSE_(this->callbacks(), indentation != npos, m_evt_handler->m_curr->pos);
2606  _c4dbgfps("before={}", _prs(proc.src));
2607 
2608  while(proc.has_more_chars())
2609  {
2610  const char curr = proc.curr();
2611  _c4dbgfps("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2612  switch(curr)
2613  {
2614  case ' ':
2615  _RYML_WITH_TAB_TOKENS(case '\t':)
2616  _c4dbgfps("whitespace", curr);
2617  _filter_ws_skip_trailing(proc);
2618  break;
2619  case '\n':
2620  _c4dbgfps("newline", curr);
2621  _filter_nl_plain(proc, /*indentation*/indentation);
2622  break;
2623  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2624  _c4dbgfps("carriage return, ignore", curr);
2625  proc.skip();
2626  break;
2627  default:
2628  proc.copy();
2629  break;
2630  }
2631  }
2632 
2633  _c4dbgfps("after={}", _prs(proc.sofar()));
2634 
2635  return proc.result();
2636 }
2637 
2638 #undef _c4dbgfps
2639 
2640 
2641 template<class EventHandler>
2642 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2643 {
2644  FilterProcessorSrcDst proc(scalar, dst);
2645  return _filter_plain(proc, indentation);
2646 }
2647 
2648 template<class EventHandler>
2649 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2650 {
2651  FilterProcessorInplaceEndExtending proc(dst, cap);
2652  return _filter_plain(proc, indentation);
2653 }
2654 
2655 
2656 //-----------------------------------------------------------------------------
2657 //-----------------------------------------------------------------------------
2658 //-----------------------------------------------------------------------------
2659 /* single quoted */
2660 
2661 // a debugging scaffold:
2662 #if 0
2663 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2664 #else
2665 #define _c4dbgfsq(fmt, ...)
2666 #endif
2667 
2668 template<class EventHandler>
2669 template<class FilterProcessor>
2670 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2671 {
2672  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2673 
2674  _c4dbgfsq("found newline. sofar={}", _prs(proc.sofar()));
2675  size_t ii = proc.rpos;
2676  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2677  if(numnl_following)
2678  {
2679  proc.set('\n', numnl_following);
2680  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2681  }
2682  else
2683  {
2684  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2685  if(ret != npos)
2686  {
2687  proc.set(' ');
2688  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2689  }
2690  else
2691  {
2692  proc.set(' ');
2693  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2694  }
2695  }
2696  proc.rpos = ii;
2697 }
2698 
2699 template<class EventHandler>
2700 template<class FilterProcessor>
2701 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2702 {
2703  _c4dbgfsq("before={}", _prs(proc.src));
2704 
2705  // from the YAML spec for double-quoted scalars:
2706  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2707  while(proc.has_more_chars())
2708  {
2709  const char curr = proc.curr();
2710  _c4dbgfsq("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2711  switch(curr)
2712  {
2713  case ' ':
2714  case '\t':
2715  _c4dbgfsq("whitespace", curr);
2716  _filter_ws_copy_trailing(proc);
2717  break;
2718  case '\n':
2719  _c4dbgfsq("newline", curr);
2720  _filter_nl_squoted(proc);
2721  break;
2722  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2723  _c4dbgfsq("skip cr", curr);
2724  proc.skip();
2725  break;
2726  case '\'':
2727  _c4dbgfsq("squote", curr);
2728  if(proc.next() == '\'')
2729  {
2730  _c4dbgfsq("two consecutive squotes", curr);
2731  proc.skip();
2732  proc.copy();
2733  }
2734  else
2735  {
2736  _c4err("filter error");
2737  }
2738  break;
2739  default:
2740  proc.copy();
2741  break;
2742  }
2743  }
2744 
2745  _c4dbgfsq(": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2746 
2747  return proc.result();
2748 }
2749 
2750 #undef _c4dbgfsq
2751 
2752 template<class EventHandler>
2753 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2754 {
2755  FilterProcessorSrcDst proc(scalar, dst);
2756  return _filter_squoted(proc);
2757 }
2758 
2759 template<class EventHandler>
2760 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2761 {
2762  FilterProcessorInplaceEndExtending proc(dst, cap);
2763  return _filter_squoted(proc);
2764 }
2765 
2766 
2767 //-----------------------------------------------------------------------------
2768 //-----------------------------------------------------------------------------
2769 //-----------------------------------------------------------------------------
2770 /* double quoted */
2771 
2772 // a debugging scaffold:
2773 #if 0
2774 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2775 #else
2776 #define _c4dbgfdq(...)
2777 #endif
2778 
2779 template<class EventHandler>
2780 template<class FilterProcessor>
2781 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2782 {
2783  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2784 
2785  _c4dbgfdq("found newline. sofar={}", _prs(proc.sofar()));
2786  size_t ii = proc.rpos;
2787  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2788  if(numnl_following)
2789  {
2790  proc.set('\n', numnl_following);
2791  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2792  }
2793  else
2794  {
2795  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2796  if(ret != npos)
2797  {
2798  proc.set(' ');
2799  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2800  }
2801  else
2802  {
2803  proc.set(' ');
2804  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2805  }
2806  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2807  {
2808  _c4dbgfdq("backslash at [{}]", ii);
2809  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2810  if(next == ' ' || next == '\t')
2811  {
2812  _c4dbgfdq("extend skip to backslash", "");
2813  ++ii;
2814  }
2815  }
2816  }
2817  proc.rpos = ii;
2818 }
2819 
2820 template<class EventHandler>
2821 template<class FilterProcessor>
2822 void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc, size_t sz)
2823 {
2824  const size_t szp1 = sz + 1u;
2825  if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2826  _c4err("codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2827  char readbuf[8];
2828  csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2829  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2830  uint32_t codepoint_val = {};
2831  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2832  _c4err("failed to parse codepoint. scalar pos={}", proc.rpos);
2833  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2834  if(C4_UNLIKELY(numbytes == 0))
2835  _c4err("failed to decode code point={}", proc.rpos);
2836  _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2837  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/szp1);
2838  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2839 }
2840 
2841 template<class EventHandler>
2842 template<class FilterProcessor>
2843 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2844 {
2845  char next = proc.next();
2846  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2847  if(next == '\r')
2848  {
2849  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2850  {
2851  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2852  next = '\n';
2853  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2854  }
2855  }
2856 
2857  if(next == '\n')
2858  {
2859  size_t ii = proc.rpos + 2;
2860  for( ; ii < proc.src.len; ++ii)
2861  {
2862  // skip leading whitespace
2863  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2864  ;
2865  else
2866  break;
2867  }
2868  proc.skip(ii - proc.rpos);
2869  }
2870  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2871  {
2872  // escapes for json compatibility
2873  proc.translate_esc(next);
2874  _c4dbgfdq("here, used '{}'", _c4prc(next));
2875  }
2876  else if(next == '\r')
2877  {
2878  proc.skip();
2879  }
2880  else if(next == 'n')
2881  {
2882  proc.translate_esc('\n');
2883  }
2884  else if(next == 'r')
2885  {
2886  proc.translate_esc('\r');
2887  }
2888  else if(next == 't')
2889  {
2890  proc.translate_esc('\t');
2891  }
2892  else if(next == '\\')
2893  {
2894  proc.translate_esc('\\');
2895  }
2896  else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2897  {
2898  _filter_dquoted_backslash_decode(proc, 2u);
2899  }
2900  else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2901  {
2902  _filter_dquoted_backslash_decode(proc, 4u);
2903  }
2904  else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2905  {
2906  _filter_dquoted_backslash_decode(proc, 8u);
2907  }
2908  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2909  else if(next == '0')
2910  {
2911  proc.translate_esc('\0');
2912  }
2913  else if(next == 'b') // backspace
2914  {
2915  proc.translate_esc('\b');
2916  }
2917  else if(next == 'f') // form feed
2918  {
2919  proc.translate_esc('\f');
2920  }
2921  else if(next == 'a') // bell character
2922  {
2923  proc.translate_esc('\a');
2924  }
2925  else if(next == 'v') // vertical tab
2926  {
2927  proc.translate_esc('\v');
2928  }
2929  else if(next == 'e') // escape character
2930  {
2931  proc.translate_esc('\x1b');
2932  }
2933  else if(next == '_') // unicode non breaking space \u00a0
2934  {
2935  // https://www.compart.com/en/unicode/U+00a0
2936  const char payload[] = {
2937  _RYML_CHCONST(-0x3e, 0xc2),
2938  _RYML_CHCONST(-0x60, 0xa0),
2939  };
2940  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2941  }
2942  else if(next == 'N') // unicode next line \u0085
2943  {
2944  // https://www.compart.com/en/unicode/U+0085
2945  const char payload[] = {
2946  _RYML_CHCONST(-0x3e, 0xc2),
2947  _RYML_CHCONST(-0x7b, 0x85),
2948  };
2949  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2950  }
2951  else if(next == 'L') // unicode line separator \u2028
2952  {
2953  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2954  const char payload[] = {
2955  _RYML_CHCONST(-0x1e, 0xe2),
2956  _RYML_CHCONST(-0x80, 0x80),
2957  _RYML_CHCONST(-0x58, 0xa8),
2958  };
2959  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2960  }
2961  else if(next == 'P') // unicode paragraph separator \u2029
2962  {
2963  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2964  const char payload[] = {
2965  _RYML_CHCONST(-0x1e, 0xe2),
2966  _RYML_CHCONST(-0x80, 0x80),
2967  _RYML_CHCONST(-0x57, 0xa9),
2968  };
2969  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2970  }
2971  else if(next == '\0')
2972  {
2973  proc.skip();
2974  }
2975  else
2976  {
2977  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2978  }
2979  _c4dbgfdq("backslash...sofar={}", _prs(proc.sofar()));
2980 }
2981 
2982 
2983 template<class EventHandler>
2984 template<class FilterProcessor>
2985 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2986 {
2987  _c4dbgfdq("before={}", _prs(proc.src));
2988  // from the YAML spec for double-quoted scalars:
2989  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2990  while(proc.has_more_chars())
2991  {
2992  const char curr = proc.curr();
2993  _c4dbgfdq("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
2994  switch(curr)
2995  {
2996  case ' ':
2997  case '\t':
2998  {
2999  _c4dbgfdq("whitespace", curr);
3000  _filter_ws_copy_trailing(proc);
3001  break;
3002  }
3003  case '\n':
3004  {
3005  _c4dbgfdq("newline", curr);
3006  _filter_nl_dquoted(proc);
3007  break;
3008  }
3009  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
3010  {
3011  _c4dbgfdq("carriage return, ignore", curr);
3012  proc.skip();
3013  break;
3014  }
3015  case '\\':
3016  {
3017  _filter_dquoted_backslash(proc);
3018  break;
3019  }
3020  default:
3021  {
3022  proc.copy();
3023  break;
3024  }
3025  }
3026  }
3027  _c4dbgfdq("after={}", _prs(proc.sofar()));
3028  return proc.result();
3029 }
3030 
3031 #undef _c4dbgfdq
3032 
3033 
3034 template<class EventHandler>
3035 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
3036 {
3037  FilterProcessorSrcDst proc(scalar, dst);
3038  return _filter_dquoted(proc);
3039 }
3040 
3041 template<class EventHandler>
3042 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
3043 {
3044  FilterProcessorInplaceMidExtending proc(dst, cap);
3045  return _filter_dquoted(proc);
3046 }
3047 
3048 
3049 //-----------------------------------------------------------------------------
3050 //-----------------------------------------------------------------------------
3051 //-----------------------------------------------------------------------------
3052 // block filtering helpers
3053 
3054 C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
3055 {
3056  if(indentation + 1 > s.len)
3057  return npos;
3058  for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
3059  {
3060  if(s.str[i] == '\n')
3061  {
3062  csubstr rem = s.sub(i + 1);
3063  size_t first = rem.first_not_of(' ');
3064  first = (first != npos) ? first : rem.len;
3065  if(first > indentation)
3066  return i;
3067  }
3068  }
3069  return npos;
3070 }
3071 
3072 template<class EventHandler>
3073 template<class FilterProcessor>
3074 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
3075 {
3076  _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3077  _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos, m_evt_handler->m_curr->pos);
3078 
3079  // a debugging scaffold:
3080  #if 0
3081  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3082  #else
3083  #define _c4dbgchomp(...)
3084  #endif
3085 
3086  // advance to the last line having spaces beyond the indentation
3087  {
3088  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3089  if(last != npos)
3090  {
3091  _c4dbgchomp("found newline and larger indentation. last={}", last);
3092  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
3093  _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3094  // remove indentation spaces, copy the rest
3095  while((proc.rpos < last) && proc.has_more_chars())
3096  {
3097  const char curr = proc.curr();
3098  _c4dbgchomp("curr='{}'", _c4prc(curr));
3099  switch(curr)
3100  {
3101  case '\n':
3102  {
3103  _c4dbgchomp("newline! remlen={}", proc.rem().len);
3104  proc.copy();
3105  // are there spaces after the newline?
3106  csubstr at_next_line = proc.rem();
3107  if(at_next_line.begins_with(' '))
3108  {
3109  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
3110  // there are spaces.
3111  size_t first_non_space = at_next_line.first_not_of(' ');
3112  _c4dbgchomp("first_non_space={}", first_non_space);
3113  if(first_non_space == npos)
3114  {
3115  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
3116  first_non_space = at_next_line.len;
3117  }
3118  if(first_non_space <= indentation)
3119  {
3120  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
3121  proc.skip(first_non_space);
3122  }
3123  else
3124  {
3125  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
3126  proc.skip(indentation);
3127  // copy the spaces after the indentation
3128  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3129  proc.copy(first_non_space - indentation);
3130  }
3131  }
3132  break;
3133  }
3134  case '\r':
3135  proc.skip();
3136  break;
3137  }
3138  }
3139  }
3140  }
3141 
3142  // from now on, we only have line ends (or indentation spaces)
3143  switch(chomp)
3144  {
3145  case CHOMP_CLIP:
3146  {
3147  bool had_one = false;
3148  while(proc.has_more_chars())
3149  {
3150  const char curr = proc.curr();
3151  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
3152  switch(curr)
3153  {
3154  case '\n':
3155  {
3156  _c4dbgchomp("copy newline!", curr);
3157  proc.copy();
3158  proc.set_at_end();
3159  had_one = true;
3160  break;
3161  }
3162  case ' ':
3163  case '\r':
3164  _c4dbgchomp("skip!", curr);
3165  proc.skip();
3166  break;
3167  }
3168  }
3169  if(!had_one) // there were no newline characters. add one.
3170  {
3171  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3172  proc.set('\n');
3173  }
3174  break;
3175  }
3176  case CHOMP_KEEP:
3177  {
3178  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3179  while(proc.has_more_chars())
3180  {
3181  const char curr = proc.curr();
3182  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3183  switch(curr)
3184  {
3185  case '\n':
3186  _c4dbgchomp("copy newline!", curr);
3187  proc.copy();
3188  break;
3189  case ' ':
3190  case '\r':
3191  _c4dbgchomp("skip!", curr);
3192  proc.skip();
3193  break;
3194  }
3195  }
3196  break;
3197  }
3198  case CHOMP_STRIP:
3199  {
3200  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3201  // nothing to do!
3202  break;
3203  }
3204  }
3205 
3206  #undef _c4dbgchomp
3207 }
3208 
3209 
3210 // a debugging scaffold:
3211 #if 0
3212 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3213 #else
3214 #define _c4dbgfb(...)
3215 #endif
3216 
3217 template<class EventHandler>
3218 template<class FilterProcessor>
3219 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3220 {
3221  csubstr rem = proc.rem(); // remaining
3222  if(rem.len)
3223  {
3224  size_t first = rem.first_not_of(' ');
3225  if(first != npos)
3226  {
3227  _c4dbgfb("{} spaces follow before next nonws character", first);
3228  if(first < indentation)
3229  {
3230  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3231  proc.skip(first);
3232  }
3233  else
3234  {
3235  _c4dbgfb("skip {} spaces from indentation", indentation);
3236  proc.skip(indentation);
3237  }
3238  }
3239  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3240  else
3241  {
3242  _c4dbgfb("all spaces to the end: {} spaces", first);
3243  first = rem.len;
3244  if(first)
3245  {
3246  if(first < indentation)
3247  {
3248  _c4dbgfb("skip everything", first);
3249  proc.skip(proc.src.len - proc.rpos);
3250  }
3251  else
3252  {
3253  _c4dbgfb("skip {} spaces from indentation", indentation);
3254  proc.skip(indentation);
3255  }
3256  }
3257  }
3258  #endif
3259  }
3260 }
3261 
3262 template<class EventHandler>
3263 template<class FilterProcessor>
3264 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3265 {
3266  csubstr contents = proc.src.trimr(" \n\r");
3267  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3268  if(!contents.len)
3269  {
3270  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3271  if(chomp == CHOMP_KEEP && proc.src.len)
3272  {
3273  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3274  while(proc.has_more_chars())
3275  {
3276  const char curr = proc.curr();
3277  if(curr == '\n')
3278  proc.copy();
3279  else
3280  proc.skip();
3281  }
3282  if(!proc.wpos)
3283  {
3284  proc.set('\n');
3285  }
3286  }
3287  }
3288  return contents.len;
3289 }
3290 
3291 template<class EventHandler>
3292 template<class FilterProcessor>
3293 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3294 {
3295  _c4dbgfb("contents_len={}", contents_len);
3296 
3297  _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3298 
3299  // extend contents to just before the first newline at the end,
3300  // in case it is preceded by spaces
3301  size_t firstnewl = proc.src.first_of('\n', contents_len);
3302  if(firstnewl != npos)
3303  {
3304  contents_len = firstnewl;
3305  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3306  }
3307  else
3308  {
3309  contents_len = proc.src.len;
3310  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3311  }
3312 
3313  return contents_len;
3314 }
3315 
3316 #undef _c4dbgfb
3317 
3318 
3319 //-----------------------------------------------------------------------------
3320 //-----------------------------------------------------------------------------
3321 //-----------------------------------------------------------------------------
3322 
3323 // a debugging scaffold:
3324 #if 0
3325 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3326 #else
3327 #define _c4dbgfbl(...)
3328 #endif
3329 
3330 template<class EventHandler>
3331 template<class FilterProcessor>
3332 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3333 {
3334  _c4dbgfbl("indentation={} before={}", indentation, _prs(proc.src));
3335 
3336  size_t contents_len = _handle_all_whitespace(proc, chomp);
3337  if(!contents_len)
3338  return proc.result();
3339 
3340  contents_len = _extend_to_chomp(proc, contents_len);
3341 
3342  _c4dbgfbl("to filter={}", _prs(proc.src.first(contents_len)));
3343 
3344  _filter_block_indentation(proc, indentation);
3345 
3346  // now filter the bulk
3347  while(proc.has_more_chars(/*maxpos*/contents_len))
3348  {
3349  const char curr = proc.curr();
3350  _c4dbgfbl("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3351  switch(curr)
3352  {
3353  case '\n':
3354  {
3355  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3356  proc.copy(); // copy the newline
3357  _filter_block_indentation(proc, indentation);
3358  break;
3359  }
3360  case '\r':
3361  proc.skip();
3362  break;
3363  default:
3364  proc.copy();
3365  break;
3366  }
3367  }
3368 
3369  _c4dbgfbl("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3370 
3371  _filter_chomp(proc, chomp, indentation);
3372 
3373  _c4dbgfbl("final={}", _prs(proc.sofar()));
3374 
3375  return proc.result();
3376 }
3377 
3378 #undef _c4dbgfbl
3379 
3380 template<class EventHandler>
3381 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3382 {
3383  FilterProcessorSrcDst proc(scalar, dst);
3384  return _filter_block_literal(proc, indentation, chomp);
3385 }
3386 
3387 template<class EventHandler>
3388 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3389 {
3390  FilterProcessorInplaceEndExtending proc(scalar, cap);
3391  return _filter_block_literal(proc, indentation, chomp);
3392 }
3393 
3394 
3395 //-----------------------------------------------------------------------------
3396 //-----------------------------------------------------------------------------
3397 //-----------------------------------------------------------------------------
3398 
3399 // a debugging scaffold:
3400 #if 0
3401 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3402 #else
3403 #define _c4dbgfbf(...)
3404 #endif
3405 
3406 
3407 template<class EventHandler>
3408 template<class FilterProcessor>
3409 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3410 {
3411  _filter_block_indentation(proc, indentation);
3412  while(proc.has_more_chars(len))
3413  {
3414  const char curr = proc.curr();
3415  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3416  switch(curr)
3417  {
3418  case '\n':
3419  _c4dbgfbf("newline.", curr);
3420  proc.copy();
3421  _filter_block_indentation(proc, indentation);
3422  break;
3423  case '\r':
3424  proc.skip();
3425  break;
3426  case ' ':
3427  case '\t':
3428  {
3429  size_t first = proc.rem().first_not_of(" \t");
3430  _c4dbgfbf("space. first={}", first);
3431  if(first == npos)
3432  first = proc.rem().len;
3433  _c4dbgfbf("... indentation increased to {}", first);
3434  _filter_block_folded_indented_block(proc, indentation, len, first);
3435  break;
3436  }
3437  default:
3438  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3439  return;
3440  }
3441  }
3442 }
3443 
3444 template<class EventHandler>
3445 template<class FilterProcessor>
3446 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3447 {
3448  switch(num_newl)
3449  {
3450  case 1u:
3451  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3452  wpos_at_first_newl = proc.wpos;
3453  proc.skip();
3454  proc.set(' ');
3455  break;
3456  case 2u:
3457  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3458  _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl != npos, m_evt_handler->m_curr->pos);
3459  _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ', m_evt_handler->m_curr->pos);
3460  _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3461  proc.skip();
3462  proc.set_at(wpos_at_first_newl, '\n');
3463  _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n', m_evt_handler->m_curr->pos);
3464  break;
3465  default:
3466  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3467  proc.copy();
3468  break;
3469  }
3470  return wpos_at_first_newl;
3471 }
3472 
3473 template<class EventHandler>
3474 template<class FilterProcessor>
3475 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3476 {
3477  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
3478  size_t num_newl = 0;
3479  size_t wpos_at_first_newl = npos;
3480  while(proc.has_more_chars(len))
3481  {
3482  const char curr = proc.curr();
3483  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3484  switch(curr)
3485  {
3486  case '\n':
3487  {
3488  _c4dbgfbf("newline. sofar={}", num_newl);
3489  // NOTE: vs2022-32bit-release builds were giving wrong
3490  // results in this block, if it was written as either
3491  // as a switch(num_newl) or its equivalent if-form.
3492  //
3493  // For this reason, we're using a dedicated function
3494  // (**_compress), which seems to work around the issue.
3495  //
3496  // The manifested problem was that somewhere between the
3497  // assignment to curr and this point, proc.wpos (the
3498  // write-position of the processor) jumped to npos, which
3499  // made the write wrap-around! To make things worse,
3500  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3501  // problem go away!
3502  //
3503  // The only way to make the problem appear with prints
3504  // enabled was by disabling all prints in this function
3505  // (including in the block which was moved to the compress
3506  // function) and then selectively enabling only some of
3507  // those prints.
3508  //
3509  // This may be due to some bug in the cl-x86 optimizer; or
3510  // it may be triggered by some UB which may be
3511  // inadvertedly present in this function or in the filter
3512  // processor. This is despite our best efforts to weed out
3513  // any such UB problem: neither clang-tidy nor none of the
3514  // sanitizers, or gcc's -fanalyzer pointed to any problems
3515  // in this code.
3516  //
3517  // In the end, moving this block to a separate function
3518  // was the only way to bury the problem. But it may
3519  // resurface again, as The Undead, rising to from the
3520  // grave to haunt us with his terrible presence.
3521  //
3522  // We may have to revisit this. With a stake, and lots of
3523  // garlic.
3524  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3525  _filter_block_indentation(proc, indentation);
3526  break;
3527  }
3528  case ' ':
3529  case '\t':
3530  {
3531  size_t first = proc.rem().first_not_of(" \t");
3532  _c4dbgfbf("space. first={}", first);
3533  if(first == npos)
3534  first = proc.rem().len;
3535  _c4dbgfbf("... indentation increased to {}", first);
3536  if(num_newl)
3537  {
3538  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3539  proc.set_at(wpos_at_first_newl, '\n');
3540  }
3541  if(num_newl > 1u)
3542  {
3543  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3544  proc.set('\n');
3545  }
3546  _filter_block_folded_indented_block(proc, indentation, len, first);
3547  num_newl = 0;
3548  wpos_at_first_newl = npos;
3549  break;
3550  }
3551  case '\r':
3552  proc.skip();
3553  break;
3554  default:
3555  _c4dbgfbf("not space, not newline. stop.", 0);
3556  return;
3557  }
3558  }
3559 }
3560 
3561 
3562 template<class EventHandler>
3563 template<class FilterProcessor>
3564 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3565 {
3566  _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos), m_evt_handler->m_curr->pos);
3567  if(curr_indentation)
3568  proc.copy(curr_indentation);
3569  while(proc.has_more_chars(len))
3570  {
3571  const char curr = proc.curr();
3572  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3573  switch(curr)
3574  {
3575  case '\n':
3576  {
3577  proc.copy();
3578  _filter_block_indentation(proc, indentation);
3579  csubstr rem = proc.rem();
3580  const size_t first = rem.first_not_of(' ');
3581  _c4dbgfbf("newline. firstns={}", first);
3582  if(first == 0)
3583  {
3584  const char c = rem[first];
3585  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3586  if(c == '\n' || c == '\r')
3587  {
3588  ;
3589  }
3590  else
3591  {
3592  _c4dbgfbf("done with indented block", first);
3593  goto endloop;
3594  }
3595  }
3596  else if(first != npos)
3597  {
3598  proc.copy(first);
3599  _c4dbgfbf("copy all {} spaces", first);
3600  }
3601  break;
3602  }
3603  break;
3604  case '\r':
3605  proc.skip();
3606  break;
3607  default:
3608  proc.copy();
3609  break;
3610  }
3611  }
3612  endloop:
3613  return;
3614 }
3615 
3616 
3617 template<class EventHandler>
3618 template<class FilterProcessor>
3619 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3620 {
3621  _c4dbgfbf("indentation={} before={}", indentation, _prs(proc.src));
3622 
3623  size_t contents_len = _handle_all_whitespace(proc, chomp);
3624  if(!contents_len)
3625  return proc.result();
3626 
3627  contents_len = _extend_to_chomp(proc, contents_len);
3628 
3629  _c4dbgfbf("to filter={}", _prs(proc.src.first(contents_len)));
3630 
3631  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3632 
3633  // now filter the bulk
3634  while(proc.has_more_chars(/*maxpos*/contents_len))
3635  {
3636  const char curr = proc.curr();
3637  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3638  switch(curr)
3639  {
3640  case '\n':
3641  {
3642  _c4dbgfbf("found newline", curr);
3643  _filter_block_folded_newlines(proc, indentation, contents_len);
3644  break;
3645  }
3646  case '\r':
3647  proc.skip();
3648  break;
3649  default:
3650  proc.copy();
3651  break;
3652  }
3653  }
3654 
3655  _c4dbgfbf("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3656 
3657  _filter_chomp(proc, chomp, indentation);
3658 
3659  _c4dbgfbf("final={}", proc.sofar().len, _prs(proc.sofar()));
3660 
3661  return proc.result();
3662 }
3663 
3664 #undef _c4dbgfbf
3665 
3666 template<class EventHandler>
3667 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3668 {
3669  FilterProcessorSrcDst proc(scalar, dst);
3670  return _filter_block_folded(proc, indentation, chomp);
3671 }
3672 
3673 template<class EventHandler>
3674 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3675 {
3676  FilterProcessorInplaceEndExtending proc(scalar, cap);
3677  return _filter_block_folded(proc, indentation, chomp);
3678 }
3679 
3680 
3681 //-----------------------------------------------------------------------------
3682 //-----------------------------------------------------------------------------
3683 //-----------------------------------------------------------------------------
3684 
3685 template<class EventHandler>
3686 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3687 {
3688  _c4dbgpf("filtering plain scalar: s={}", _prs(s));
3689  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3690  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3691  _c4dbgpf("filtering plain scalar: success! s={}", _prs(r.get()));
3692  return r.get();
3693 }
3694 
3695 //-----------------------------------------------------------------------------
3696 
3697 template<class EventHandler>
3698 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3699 {
3700  _c4dbgpf("filtering squo scalar: s={}", _prs(s));
3701  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3702  _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3703  _c4dbgpf("filtering squo scalar: success! s={}", _prs(r.get()));
3704  return r.get();
3705 }
3706 
3707 
3708 //-----------------------------------------------------------------------------
3709 
3710 template<class EventHandler>
3711 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3712 {
3713  _c4dbgpf("filtering dquo scalar: s={}", _prs(s));
3714  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3715  if(C4_LIKELY(r.valid()))
3716  {
3717  _c4dbgpf("filtering dquo scalar: success! s={}", _prs(r.get()));
3718  return r.get();
3719  }
3720  else
3721  {
3722  const size_t len = r.required_len();
3723  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3724  substr dst = _alloc_arena(len, &s);
3725  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3726  if(dst.str)
3727  {
3728  _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3729  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3730  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3731  _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos); // may be smaller!
3732  _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3733  _c4dbgpf("filtering dquo scalar: success! s={}", _prs(rsd.get()));
3734  return rsd.get();
3735  }
3736  return dst;
3737  }
3738 }
3739 
3740 
3741 //-----------------------------------------------------------------------------
3742 
3743 template<class EventHandler>
3744 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3745 {
3746  if(s.is_sub(_buf()))
3747  {
3748  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3749  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3750  if(s.len)
3751  memmove(s.str - 1, s.str, s.len);
3752  --s.str;
3753  s.str[s.len] = '\n';
3754  ++s.len;
3755  return s;
3756  }
3757  else
3758  {
3759  substr dst = _alloc_arena(s.len + 1, &s);
3760  if(s.len)
3761  memcpy(dst.str, s.str, s.len);
3762  dst[s.len] = '\n';
3763  return dst;
3764  }
3765 }
3766 
3767 template<class EventHandler>
3768 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3769 {
3770  _c4dbgpf("filtering block literal scalar: s={}", _prs(s));
3771  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3772  csubstr result;
3773  if(C4_LIKELY(r.valid()))
3774  {
3775  result = r.get();
3776  }
3777  else
3778  {
3779  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3780  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3781  // this can only happen when adding a single newline in clip mode.
3782  // so we shift left the scalar by one place
3783  result = _move_scalar_left_and_add_newline(s);
3784  }
3785  _c4dbgpf("filtering block literal scalar: success! s={}", _prs(result));
3786  return result;
3787 }
3788 
3789 
3790 //-----------------------------------------------------------------------------
3791 template<class EventHandler>
3792 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3793 {
3794  _c4dbgpf("filtering block folded scalar: s={}", _prs(s));
3795  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3796  csubstr result;
3797  if(C4_LIKELY(r.valid()))
3798  {
3799  result = r.get();
3800  }
3801  else
3802  {
3803  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3804  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3805  // this can only happen when adding a single newline in clip mode.
3806  // so we shift left the scalar by one place
3807  result = _move_scalar_left_and_add_newline(s);
3808  }
3809  _c4dbgpf("filtering block folded scalar: success! s={}", _prs(result));
3810  return result;
3811 }
3812 
3813 
3814 //-----------------------------------------------------------------------------
3815 
3816 template<class EventHandler>
3817 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3818 {
3819  if(sc.needs_filter)
3820  {
3821  if(m_options.scalar_filtering())
3822  {
3823  return _filter_scalar_plain(sc.scalar, indentation);
3824  }
3825  else
3826  {
3827  _c4dbgp("plain scalar left unfiltered");
3828  m_evt_handler->mark_key_scalar_unfiltered();
3829  }
3830  }
3831  else
3832  {
3833  _c4dbgp("plain scalar doesn't need filtering");
3834  }
3835  return sc.scalar;
3836 }
3837 
3838 template<class EventHandler>
3839 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3840 {
3841  if(sc.needs_filter)
3842  {
3843  if(m_options.scalar_filtering())
3844  {
3845  return _filter_scalar_plain(sc.scalar, indentation);
3846  }
3847  else
3848  {
3849  _c4dbgp("plain scalar left unfiltered");
3850  m_evt_handler->mark_val_scalar_unfiltered();
3851  }
3852  }
3853  else
3854  {
3855  _c4dbgp("plain scalar doesn't need filtering");
3856  }
3857  return sc.scalar;
3858 }
3859 
3860 
3861 //-----------------------------------------------------------------------------
3862 
3863 template<class EventHandler>
3864 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3865 {
3866  if(sc.needs_filter)
3867  {
3868  if(m_options.scalar_filtering())
3869  {
3870  return _filter_scalar_squot(sc.scalar);
3871  }
3872  else
3873  {
3874  _c4dbgp("squo key scalar left unfiltered");
3875  m_evt_handler->mark_key_scalar_unfiltered();
3876  }
3877  }
3878  else
3879  {
3880  _c4dbgp("squo key scalar doesn't need filtering");
3881  }
3882  return sc.scalar;
3883 }
3884 
3885 template<class EventHandler>
3886 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3887 {
3888  if(sc.needs_filter)
3889  {
3890  if(m_options.scalar_filtering())
3891  {
3892  return _filter_scalar_squot(sc.scalar);
3893  }
3894  else
3895  {
3896  _c4dbgp("squo val scalar left unfiltered");
3897  m_evt_handler->mark_val_scalar_unfiltered();
3898  }
3899  }
3900  else
3901  {
3902  _c4dbgp("squo val scalar doesn't need filtering");
3903  }
3904  return sc.scalar;
3905 }
3906 
3907 
3908 //-----------------------------------------------------------------------------
3909 
3910 template<class EventHandler>
3911 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3912 {
3913  if(sc.needs_filter)
3914  {
3915  if(m_options.scalar_filtering())
3916  {
3917  return _filter_scalar_dquot(sc.scalar);
3918  }
3919  else
3920  {
3921  _c4dbgp("dquo scalar left unfiltered");
3922  m_evt_handler->mark_key_scalar_unfiltered();
3923  }
3924  }
3925  else
3926  {
3927  _c4dbgp("dquo scalar doesn't need filtering");
3928  }
3929  return sc.scalar;
3930 }
3931 
3932 template<class EventHandler>
3933 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3934 {
3935  if(sc.needs_filter)
3936  {
3937  if(m_options.scalar_filtering())
3938  {
3939  return _filter_scalar_dquot(sc.scalar);
3940  }
3941  else
3942  {
3943  _c4dbgp("dquo scalar left unfiltered");
3944  m_evt_handler->mark_val_scalar_unfiltered();
3945  }
3946  }
3947  else
3948  {
3949  _c4dbgp("dquo scalar doesn't need filtering");
3950  }
3951  return sc.scalar;
3952 }
3953 
3954 
3955 //-----------------------------------------------------------------------------
3956 
3957 template<class EventHandler>
3958 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3959 {
3960  if(m_options.scalar_filtering())
3961  {
3962  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3963  }
3964  else
3965  {
3966  _c4dbgp("literal scalar left unfiltered");
3967  m_evt_handler->mark_key_scalar_unfiltered();
3968  }
3969  return sb.scalar;
3970 }
3971 
3972 template<class EventHandler>
3973 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3974 {
3975  if(m_options.scalar_filtering())
3976  {
3977  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3978  }
3979  else
3980  {
3981  _c4dbgp("literal scalar left unfiltered");
3982  m_evt_handler->mark_val_scalar_unfiltered();
3983  }
3984  return sb.scalar;
3985 }
3986 
3987 
3988 //-----------------------------------------------------------------------------
3989 
3990 template<class EventHandler>
3991 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3992 {
3993  if(m_options.scalar_filtering())
3994  {
3995  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3996  }
3997  else
3998  {
3999  _c4dbgp("folded scalar left unfiltered");
4000  m_evt_handler->mark_key_scalar_unfiltered();
4001  }
4002  return sb.scalar;
4003 }
4004 
4005 template<class EventHandler>
4006 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
4007 {
4008  if(m_options.scalar_filtering())
4009  {
4010  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4011  }
4012  else
4013  {
4014  _c4dbgp("folded scalar left unfiltered");
4015  m_evt_handler->mark_val_scalar_unfiltered();
4016  }
4017  return sb.scalar;
4018 }
4019 
4020 
4021 //-----------------------------------------------------------------------------
4022 //-----------------------------------------------------------------------------
4023 //-----------------------------------------------------------------------------
4024 
4025 #ifdef RYML_DBG // !!! <----------------------------------
4026 
4027 template<class EventHandler>
4028 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on)
4029 {
4030  ParserState *s = m_evt_handler->m_curr;
4031  char buf1_[64], buf2_[64], buf3_[64];
4032  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4033  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4034  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4035  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4036  s->flags |= on;
4037 }
4038 
4039 template<class EventHandler>
4040 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off)
4041 {
4042  ParserState *s = m_evt_handler->m_curr;
4043  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4044  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4045  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4046  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4047  csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4048  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4049  _RYML_ASSERT_BASIC((on & off) == ParserFlag_t(0));
4050  s->flags &= ~off;
4051  s->flags |= on;
4052 }
4053 
4054 template<class EventHandler>
4055 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off)
4056 {
4057  ParserState *s = m_evt_handler->m_curr;
4058  char buf1_[64], buf2_[64], buf3_[64];
4059  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4060  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4061  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4062  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4063  s->flags &= ~off;
4064 }
4065 
4066 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
4067 {
4068  size_t pos = 0;
4069  bool gotone = false;
4070 
4071  #define _prflag(fl) \
4072  if((flags & fl) == (fl)) \
4073  { \
4074  if(gotone) \
4075  { \
4076  if(pos + 1 < buf.len) \
4077  buf[pos] = '|'; \
4078  ++pos; \
4079  } \
4080  csubstr fltxt = #fl; \
4081  if(pos + fltxt.len <= buf.len) \
4082  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4083  pos += fltxt.len; \
4084  gotone = true; \
4085  }
4086 
4087  _prflag(RTOP);
4088  _prflag(RUNK);
4089  _prflag(RMAP);
4090  _prflag(RSEQ);
4091  _prflag(RFLOW);
4092  _prflag(RBLCK);
4093  _prflag(QMRK);
4094  _prflag(RKEY);
4095  _prflag(RVAL);
4096  _prflag(RKCL);
4097  _prflag(RNXT);
4098  _prflag(SSCL);
4099  _prflag(QSCL);
4100  _prflag(RSET);
4101  _prflag(RDOC);
4102  _prflag(NDOC);
4103  _prflag(USTY);
4104  _prflag(RSEQIMAP);
4105 
4106  #undef _prflag
4107 
4108  if(pos == 0)
4109  if(buf.len > 0)
4110  buf[pos++] = '0';
4111 
4112  _RYML_CHECK_BASIC(pos <= buf.len);
4113 
4114  return buf.first(pos);
4115 }
4116 
4117 #endif // RYML_DBG !!! <----------------------------------
4118 
4119 
4120 //-----------------------------------------------------------------------------
4121 //-----------------------------------------------------------------------------
4122 //-----------------------------------------------------------------------------
4123 
4124 template<class EventHandler>
4125 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
4126 {
4127  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4128  return _buf().sub(loc.offset);
4129 }
4130 
4131 template<class EventHandler>
4132 Location ParseEngine<EventHandler>::val_location(const char *val) const
4133 {
4134  if(C4_UNLIKELY(val == nullptr))
4135  return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4136  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4137  // NOTE: if any of these checks fails, the parser needs to be
4138  // instantiated with locations enabled.
4139  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4140  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4141  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4142  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4143  // NOTE: the pointer needs to belong to the buffer that was used to parse.
4144  csubstr src = _buf();
4145  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4146  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4147  // ok. search the first stored newline after the given ptr
4148  using lineptr_type = size_t const* C4_RESTRICT;
4149  lineptr_type lineptr = nullptr;
4150  size_t offset = (size_t)(val - src.begin());
4151  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4152  {
4153  // just do a linear search if the size is small.
4154  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4155  {
4156  if(*curr > offset)
4157  {
4158  lineptr = curr;
4159  break;
4160  }
4161  }
4162  }
4163  else
4164  {
4165  // do a bisection search if the size is not small.
4166  //
4167  // We could use std::lower_bound but this is simple enough and
4168  // spares the costly include of <algorithm>.
4169  size_t count = m_newline_offsets_size;
4170  lineptr = m_newline_offsets;
4171  while(count)
4172  {
4173  size_t step = count >> 1;
4174  lineptr_type it = lineptr + step;
4175  if(*it < offset)
4176  {
4177  lineptr = ++it;
4178  count -= step + 1;
4179  }
4180  else
4181  {
4182  count = step;
4183  }
4184  }
4185  }
4186  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4187  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4188  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4189  Location loc;
4190  loc.name = m_evt_handler->m_curr->pos.name;
4191  loc.offset = offset;
4192  loc.line = (size_t)(lineptr - m_newline_offsets);
4193  if(lineptr > m_newline_offsets)
4194  loc.col = (offset - *(lineptr-1) - 1u);
4195  else
4196  loc.col = offset;
4197  return loc;
4198 }
4199 
4200 template<class EventHandler>
4201 void ParseEngine<EventHandler>::_prepare_locations()
4202 {
4203  csubstr src = _buf();
4204  size_t numnewlines = 1u + src.count('\n');
4205  _resize_locations(numnewlines);
4206  m_newline_offsets_size = 0;
4207  for(size_t i = 0; i < src.len; i++)
4208  if(src.str[i] == '\n')
4209  m_newline_offsets[m_newline_offsets_size++] = i; // NOLINT
4210  m_newline_offsets[m_newline_offsets_size++] = src.len; // NOLINT
4211  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4212 }
4213 
4214 template<class EventHandler>
4215 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4216 {
4217  numnewlines = numnewlines >= 16 ? numnewlines : 16;
4218  if(numnewlines > m_newline_offsets_capacity)
4219  {
4220  if(m_newline_offsets)
4221  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4222  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4223  m_newline_offsets_capacity = numnewlines;
4224  }
4225 }
4226 
4227 template<class EventHandler>
4228 bool ParseEngine<EventHandler>::_locations_dirty() const
4229 {
4230  return !m_newline_offsets_size;
4231 }
4232 
4233 
4234 //-----------------------------------------------------------------------------
4235 //-----------------------------------------------------------------------------
4236 //-----------------------------------------------------------------------------
4237 
4238 template<class EventHandler>
4239 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4240 {
4241  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4242  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4243  {
4244  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4245  {
4246  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4247  _skipchars(" \t");
4248  }
4249  // comments
4250  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4251  {
4252  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4253  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4254  }
4255  }
4256 }
4257 
4258 
4259 template<class EventHandler>
4260 void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4261 {
4262  _c4dbgpf("flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4263  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4264  if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4265  {
4266  csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4267  _c4dbgpf("flow: after indentation={}", _prs(trimmed));
4268  if(trimmed.len && trimmed.triml(" \t").len)
4269  {
4270  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4271  _c4err("bad indentation");
4272  }
4273  }
4274 }
4275 
4276 template<class EventHandler>
4277 size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4278 {
4279  const size_t mark = m_evt_handler->m_curr->pos.offset;
4280  const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
4281  _c4dbgpf("block: mark={} firstpos={}", mark, firstpos);
4282  if(firstpos != npos)
4283  {
4284  _c4dbgp("block: non empty line");
4285  _line_progressed(firstpos);
4286  return mark;
4287  }
4288  else
4289  {
4290  _c4dbgp("block: rest of line is whitespace");
4291  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4292  return npos;
4293  }
4294 }
4295 
4296 template<class EventHandler>
4297 void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(size_t start_mark, size_t end_mark)
4298 {
4299  _c4dbgpf("block: start_mark={} end_mark={}", start_mark, end_mark);
4300  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4301  if(end_mark != start_mark)
4302  {
4303  csubstr leading = _buf().range(start_mark, end_mark);
4304  _c4dbgpf("block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading, true));
4305  size_t pos = leading.find('\t');
4306  if(pos != npos)
4307  {
4308  size_t fno = leading.first_not_of(" \t");
4309  if(fno == npos || pos < fno)
4310  _c4err("invalid tab character to the left");
4311  }
4312  (void)leading;
4313  }
4314 }
4315 
4316 
4317 //-----------------------------------------------------------------------------
4318 
4319 
4320 template<class EventHandler>
4321 void ParseEngine<EventHandler>::_handle_colon()
4322 {
4323  size_t curr = m_evt_handler->m_curr->pos.line;
4324  if(C4_UNLIKELY(m_prev_colon != npos && curr == m_prev_colon))
4325  {
4326  _c4dbgpf("colon: prevline={} currline={}", m_prev_colon, curr);
4327  _c4err("two colons on same line");
4328  }
4329  _c4dbgpf("colon: set prevline={}->{}", m_prev_colon, curr);
4330  m_prev_colon = curr;
4331 }
4332 
4333 template<class EventHandler>
4334 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4335 {
4336  _c4dbgpf("store annotation[{}]: {}", dst->num_entries, _prs(str));
4337  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4338  dst->annotations[dst->num_entries].str = str;
4339  dst->annotations[dst->num_entries].indentation = {};
4340  dst->annotations[dst->num_entries].line = {};
4341  dst->annotations[dst->num_entries].orig = {};
4342  ++dst->num_entries;
4343 }
4344 
4345 template<class EventHandler>
4346 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4347 {
4348  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4349  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4350  if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4351  {
4352  _c4err("parse error");
4353  }
4354  dst->annotations[dst->num_entries].str = str;
4355  dst->annotations[dst->num_entries].indentation = indentation;
4356  dst->annotations[dst->num_entries].line = line;
4357  dst->annotations[dst->num_entries].orig = {};
4358  ++dst->num_entries;
4359 }
4360 
4361 template<class EventHandler>
4362 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line, csubstr orig)
4363 {
4364  _c4dbgpf("store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4365  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4366  if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4367  {
4368  _c4err("parse error");
4369  }
4370  dst->annotations[dst->num_entries].str = str;
4371  dst->annotations[dst->num_entries].indentation = indentation;
4372  dst->annotations[dst->num_entries].line = line;
4373  dst->annotations[dst->num_entries].orig = orig;
4374  ++dst->num_entries;
4375 }
4376 
4377 template<class EventHandler>
4378 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4379 {
4380  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4381 }
4382 
4383 template<class EventHandler>
4384 bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4385 {
4386  if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4387  return false;
4388  _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4389  if(m_pending_tags.num_entries)
4390  {
4391  _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4392  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4393  {
4394  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4395  _clear_annotations(&m_pending_tags);
4396  }
4397  else
4398  {
4399  _c4err("too many tags");
4400  }
4401  }
4402  if(m_pending_anchors.num_entries)
4403  {
4404  _c4dbgpf("handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4405  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4406  {
4407  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4408  _clear_annotations(&m_pending_anchors);
4409  }
4410  else
4411  {
4412  _c4err("too many anchors");
4413  }
4414  }
4415  m_evt_handler->set_key_scalar_plain_empty();
4416  m_evt_handler->set_val_scalar_plain_empty();
4417  return true;
4418 }
4419 
4420 template<class EventHandler>
4421 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4422 {
4423  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4424  if(m_pending_tags.num_entries)
4425  {
4426  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4427  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4428  {
4429  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4430  _clear_annotations(&m_pending_tags);
4431  }
4432  else
4433  {
4434  _c4err("too many tags"); // LCOV_EXCL_LINE
4435  }
4436  }
4437  if(m_pending_anchors.num_entries)
4438  {
4439  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4440  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4441  {
4442  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4443  _clear_annotations(&m_pending_anchors);
4444  }
4445  else
4446  {
4447  _c4err("too many anchors"); // LCOV_EXCL_LINE
4448  }
4449  }
4450 }
4451 
4452 template<class EventHandler>
4453 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4454 {
4455  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4456  if(m_pending_tags.num_entries)
4457  {
4458  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4459  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4460  {
4461  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4462  _clear_annotations(&m_pending_tags);
4463  }
4464  else
4465  {
4466  _c4err("too many tags");
4467  }
4468  }
4469  if(m_pending_anchors.num_entries)
4470  {
4471  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4472  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4473  {
4474  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4475  _clear_annotations(&m_pending_anchors);
4476  }
4477  else
4478  {
4479  _c4err("too many anchors");
4480  }
4481  }
4482 }
4483 
4484 template<class EventHandler>
4485 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4486 {
4487  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4488  if(m_pending_tags.num_entries == 2)
4489  {
4490  _c4dbgp("2 tags, setting entry 0");
4491  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4492  }
4493  else if(m_pending_tags.num_entries == 1)
4494  {
4495  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4496  if(m_pending_tags.annotations[0].line < current_line)
4497  {
4498  _c4dbgp("...tag is for the map. setting it.");
4499  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4500  _clear_annotations(&m_pending_tags);
4501  }
4502  }
4503  //
4504  if(m_pending_anchors.num_entries == 2)
4505  {
4506  _c4dbgp("2 anchors, setting entry 0");
4507  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4508  }
4509  else if(m_pending_anchors.num_entries == 1)
4510  {
4511  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4512  if(m_pending_anchors.annotations[0].line < current_line)
4513  {
4514  _c4dbgp("...anchor is for the map. setting it.");
4515  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4516  _clear_annotations(&m_pending_anchors);
4517  }
4518  }
4519 }
4520 
4521 template<class EventHandler>
4522 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4523 {
4524  _c4dbgp("annotations_before_start_mapblck_as_key");
4525  switch(m_pending_tags.num_entries)
4526  {
4527  case 1u:
4528  _c4dbgpf("annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4529  if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4530  {
4531  _c4dbgp("annotations_after_start_mapblck_as_key: is map tag");
4532  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4533  _clear_annotations(&m_pending_tags);
4534  }
4535  break;
4536  case 2u:
4537  _c4dbgpf("annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4538  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4539  break;
4540  }
4541  switch(m_pending_anchors.num_entries)
4542  {
4543  case 1u:
4544  _c4dbgpf("annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4545  if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4546  {
4547  _c4dbgp("annotations_after_start_mapblck_as_key: is map anchor");
4548  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4549  _clear_annotations(&m_pending_anchors);
4550  }
4551  break;
4552  case 2u:
4553  _c4dbgpf("annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4554  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4555  break;
4556  }
4557 }
4558 
4559 template<class EventHandler>
4560 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4561 {
4562  _c4dbgp("annotations_after_start_mapblck");
4563  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4564  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4565  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4566  {
4567  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4568  switch(m_pending_tags.num_entries)
4569  {
4570  case 1u:
4571  _c4dbgpf("annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4572  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4573  _clear_annotations(&m_pending_tags);
4574  break;
4575  case 2u:
4576  _c4dbgpf("annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4577  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4578  _clear_annotations(&m_pending_tags);
4579  break;
4580  }
4581  switch(m_pending_anchors.num_entries)
4582  {
4583  case 1u:
4584  _c4dbgpf("annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4585  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4586  _clear_annotations(&m_pending_anchors);
4587  break;
4588  case 2u:
4589  _c4dbgpf("annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4590  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4591  _clear_annotations(&m_pending_anchors);
4592  break;
4593  }
4594  }
4595  _set_indentation(key_indentation);
4596 }
4597 
4598 template<class EventHandler>
4599 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4600 {
4601  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4602  // select the left-most annotation on the max line
4603  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4604  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4605  {
4606  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4607  if(ann.line > curr->line)
4608  curr = &ann;
4609  else if(ann.indentation < curr->indentation)
4610  curr = &ann;
4611  }
4612  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4613  {
4614  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4615  if(ann.line > curr->line)
4616  curr = &ann;
4617  else if(ann.indentation < curr->indentation)
4618  curr = &ann;
4619  }
4620  return curr->line < val_line ? val_indentation : curr->indentation;
4621 }
4622 
4623 template<class EventHandler>
4624 void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4625 {
4626  if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4627  m_evt_handler->set_key_ref(alias);
4628  else
4629  _c4err("aliases cannot have anchors or tags");
4630 }
4631 
4632 template<class EventHandler>
4633 void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4634 {
4635  if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4636  m_evt_handler->set_val_ref(alias);
4637  else
4638  _c4err("aliases cannot have anchors or tags");
4639 }
4640 
4641 template<class EventHandler>
4642 csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4643 {
4644  _c4dbgpf("resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4645  _c4assert(tag.is_sub(_buf()));
4646  TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4647  if(ret)
4648  {
4649  _c4dbgpf("resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4650  return ret.resolved;
4651  }
4652  _c4dbgpf("resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4653  size_t bufsz = 0;
4654  substr buf = m_evt_handler->arena_rem();
4655  TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4656  csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4657  m_evt_handler->m_curr->pos,
4658  m_evt_handler->m_stack.m_callbacks);
4659  _c4dbgpf("resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4660  _c4assert((bufsz > buf.len) == (!ttag.str));
4661  _c4assert(!!bufsz == (ttag.len == bufsz));
4662  // try again if the arena size was not enough
4663  if(!ttag.str)
4664  {
4665  _c4dbgpf("tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4666  _c4assert(ttag.len == bufsz);
4667  buf = _alloc_arena(bufsz, &tag);
4668  if(buf.str) // the alloc may fail eg with the ints handler
4669  {
4670  ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4671  m_evt_handler->m_curr->pos,
4672  m_evt_handler->m_stack.m_callbacks);
4673  }
4674  _c4assert(ttag.len == bufsz);
4675  _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4676  }
4677  else if(bufsz) // if we succeeded writing into the arena, grow it as needed
4678  {
4679  _c4dbgp("tag required arena. update size");
4680  _c4assert(ttag.len == bufsz);
4681  _c4assert(ttag.is_sub(buf));
4682  (void)_alloc_arena(bufsz);
4683  }
4684  C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127) // conditional expression is constant
4685  if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4686  {
4687  _c4dbgpf("handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4688  // is the resolved tag not in any of those buffers?
4689  if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4690  {
4691  _c4dbgpf("copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4692  buf = _alloc_arena(ttag.len, &tag);
4693  if(buf.str) // the alloc may fail eg with the ints handler
4694  memcpy(buf.str, ttag.str, ttag.len);
4695  ttag.str = buf.str; // keep the current len!
4696  _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4697  }
4698  }
4699  C4_SUPPRESS_WARNING_MSVC_POP
4700  _c4dbgpf("resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4701  _c4assert(ttag.len > 0);
4702  // cache the hard-earned result!
4703  m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4704  return ttag;
4705 }
4706 
4707 template<class EventHandler>
4708 bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT version) const
4709 {
4710  _c4assert(directive->begins_with("%YAML"));
4711  size_t version_start = directive->first_not_of(" \t", 5);
4712  if(version_start != npos)
4713  {
4714  csubstr digits = "0123456789";
4715  size_t major_end = directive->first_not_of(digits, version_start);
4716  if(major_end != npos && directive->str[major_end] == '.') // single dot
4717  {
4718  size_t minor_end = directive->first_not_of(digits, major_end + 1);
4719  if(minor_end == npos)
4720  minor_end = directive->len;
4721  _set_first_strict(*directive, minor_end);
4722  *version = directive->range(version_start, minor_end);
4723  _c4dbgpf("%YAML: version={} full={}", *version, _prs(*directive, true));
4724  return true;
4725  }
4726  }
4727  return false;
4728 }
4729 
4730 template<class EventHandler>
4731 bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix) const
4732 {
4733  _c4assert(directive->begins_with("%TAG"));
4734  csubstr whitespace = " \t";
4735  size_t handle_start = directive->first_not_of(whitespace, 4);
4736  if(handle_start != npos && directive->str[handle_start] == '!')
4737  {
4738  size_t handle_end = directive->first_of(whitespace, handle_start);
4739  if(handle_end != npos)
4740  {
4741  size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4742  if(prefix_start != npos)
4743  {
4744  size_t prefix_end = directive->first_of(whitespace, prefix_start);
4745  if(prefix_end == npos)
4746  prefix_end = directive->len;
4747  _set_first_strict(*directive, prefix_end);
4748  *handle = directive->range(handle_start, handle_end);
4749  *prefix = directive->range(prefix_start, prefix_end);
4750  _c4dbgpf("%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive, true));
4751  if(is_valid_tag_handle(*handle))
4752  return true;
4753  }
4754  }
4755  }
4756  return false;
4757 }
4758 
4759 template<class EventHandler>
4760 void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4761 {
4762  _c4dbgpf("handle_directive: rem={}", _prs(directive, true));
4763  _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with('%'));
4764  _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4765  const char *err = nullptr;
4766  csubstr rem;
4767  size_t pos;
4768  auto isdirective = [](csubstr str, csubstr dir) {
4769  if(str.begins_with(dir))
4770  {
4771  csubstr rest = str.sub(dir.len);
4772  return (!rest.len || rest.str[0] == ' ' || rest.str[0] == '\t');
4773  }
4774  return false;
4775  };
4776  if(isdirective(directive, "%TAG"))
4777  {
4778  csubstr handle;
4779  csubstr prefix;
4780  if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4781  {
4782  err = "invalid %TAG directive";
4783  goto directive_error; // NOLINT
4784  }
4785  m_evt_handler->add_directive_tag(handle, prefix);
4786  }
4787  else if(isdirective(directive, "%YAML"))
4788  {
4789  csubstr version;
4790  if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &version)))
4791  {
4792  err = "invalid %YAML directive";
4793  goto directive_error; // NOLINT
4794  }
4795  if(C4_UNLIKELY(m_has_directives_yaml))
4796  {
4797  err = "multiple %YAML directives";
4798  goto directive_error; // NOLINT
4799  }
4800  m_has_directives_yaml = true;
4801  m_evt_handler->add_directive_yaml(version);
4802  }
4803  m_has_directives = true;
4804  rem = m_evt_handler->m_curr->line_contents.rem;
4805  pos = rem.first_not_of(" \t", directive.len);
4806  pos = pos != npos ? pos : rem.len;
4807  _line_progressed(pos);
4808  rem = rem.sub(pos);
4809  _c4dbgpf("handle_directive: rest={}", _prs(rem));
4810  if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
4811  {
4812  err = "invalid tokens after directive";
4813  goto directive_error; // NOLINT
4814  }
4815 directive_error:
4816  if(C4_UNLIKELY(err != nullptr))
4817  _c4err(err);
4818 }
4819 
4820 template<class EventHandler>
4821 bool ParseEngine<EventHandler>::_handle_bom()
4822 {
4823  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4824  if(rem.len)
4825  {
4826  const csubstr rest = rem.sub(1);
4827  // https://yaml.org/spec/1.2.2/#52-character-encodings
4828  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4829  if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4830  {
4831  _c4dbgp("byte order mark: UTF32BE");
4832  _handle_bom(UTF32BE);
4833  _line_progressed(4);
4834  m_bom_len = 4;
4835  return true;
4836  }
4837  else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4838  {
4839  _c4dbgp("byte order mark: UTF32LE");
4840  _handle_bom(UTF32LE);
4841  _line_progressed(4);
4842  m_bom_len = 4;
4843  return true;
4844  }
4845  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4846  {
4847  _c4dbgp("byte order mark: UTF16BE");
4848  _handle_bom(UTF16BE);
4849  _line_progressed(2);
4850  m_bom_len = 2;
4851  return true;
4852  }
4853  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4854  {
4855  _c4dbgp("byte order mark: UTF16LE");
4856  _handle_bom(UTF16LE);
4857  _line_progressed(2);
4858  m_bom_len = 2;
4859  return true;
4860  }
4861  else if(rem.begins_with("\xef\xbb\xbf"))
4862  {
4863  _c4dbgp("byte order mark: UTF8");
4864  _handle_bom(UTF8);
4865  _line_progressed(3);
4866  m_bom_len = 3;
4867  return true;
4868  }
4869  #undef _rymlisascii
4870  }
4871  return false;
4872 }
4873 
4874 template<class EventHandler>
4875 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4876 {
4877  if(m_encoding == NOBOM)
4878  {
4879  if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4880  m_encoding = enc;
4881  else
4882  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4883  }
4884  else if(enc != m_encoding)
4885  {
4886  _c4err("byte order mark can only be set once");
4887  }
4888 }
4889 
4890 
4891 //-----------------------------------------------------------------------------
4892 
4893 template<class EventHandler>
4894 void ParseEngine<EventHandler>::_handle_seq_json()
4895 {
4896 seqjson_start:
4897  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4898 
4899  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
4900  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
4901  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
4902  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
4903  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
4904 
4905  _handle_flow_skip_whitespace();
4906  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4907  if(!rem.len)
4908  goto seqjson_again;
4909 
4910  if(has_any(RVAL))
4911  {
4912  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
4913  const char first = rem.str[0];
4914  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4915  switch(first)
4916  {
4917  case '"':
4918  {
4919  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4920  ScannedScalar sc = _scan_scalar_dquot();
4921  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4922  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4923  addrem_flags(RNXT, RVAL);
4924  break;
4925  }
4926  case '[':
4927  {
4928  _c4dbgp("seqjson[RVAL]: start child seqjson");
4929  addrem_flags(RNXT, RVAL);
4930  m_evt_handler->begin_seq_val_flow();
4931  addrem_flags(RVAL, RNXT);
4932  _line_progressed(1);
4933  break;
4934  }
4935  case '{':
4936  {
4937  _c4dbgp("seqjson[RVAL]: start child mapjson");
4938  addrem_flags(RNXT, RVAL);
4939  m_evt_handler->begin_map_val_flow();
4940  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4941  _line_progressed(1);
4942  goto seqjson_finish;
4943  }
4944  case ']': // this happens on a trailing comma like ", ]"
4945  {
4946  _c4dbgp("seqjson[RVAL]: end!");
4947  rem_flags(RSEQ);
4948  _end_seq_flow();
4949  _line_progressed(1);
4950  if(!has_all(RSEQ|RFLOW))
4951  goto seqjson_finish;
4952  break;
4953  }
4954  default:
4955  {
4956  ScannedScalar sc;
4957  if(_scan_scalar_seq_json(&sc))
4958  {
4959  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4960  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4961  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4962  addrem_flags(RNXT, RVAL);
4963  }
4964  else
4965  {
4966  _c4err("parse error");
4967  }
4968  }
4969  }
4970  }
4971  else // RNXT
4972  {
4973  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
4974  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
4975  const char first = rem.str[0];
4976  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4977  switch(first)
4978  {
4979  case ',':
4980  {
4981  _c4dbgp("seqjson[RNXT]: expect next val");
4982  addrem_flags(RVAL, RNXT);
4983  m_evt_handler->add_sibling();
4984  _line_progressed(1);
4985  break;
4986  }
4987  case ']':
4988  {
4989  _c4dbgp("seqjson[RNXT]: end!");
4990  _end_seq_flow();
4991  _line_progressed(1);
4992  goto seqjson_finish;
4993  }
4994  default:
4995  _c4err("parse error");
4996  }
4997  }
4998 
4999  seqjson_again:
5000  _c4dbgt("seqjson: go again", 0);
5001  if(_finished_line())
5002  {
5003  if(C4_LIKELY(!_finished_file()))
5004  {
5005  _line_ended();
5006  _scan_line();
5007  _c4dbgnextline();
5008  }
5009  else
5010  {
5011  _c4err("missing terminating ]");
5012  }
5013  }
5014  goto seqjson_start;
5015 
5016  seqjson_finish:
5017  _c4dbgp("seqjson: finish");
5018 }
5019 
5020 
5021 //-----------------------------------------------------------------------------
5022 
5023 template<class EventHandler>
5024 void ParseEngine<EventHandler>::_handle_map_json()
5025 {
5026 mapjson_start:
5027  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5028 
5029  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5030  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5031  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5032  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT), m_evt_handler->m_curr->pos);
5033  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
5034 
5035  _handle_flow_skip_whitespace();
5036  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5037  if(!rem.len)
5038  goto mapjson_again;
5039 
5040  if(has_any(RKEY))
5041  {
5042  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5043  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5044  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5045  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5046  const char first = rem.str[0];
5047  _c4dbgpf("mapjson[RKEY]: '{}'", first);
5048  switch(first)
5049  {
5050  case '"':
5051  {
5052  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
5053  ScannedScalar sc = _scan_scalar_dquot();
5054  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5055  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5056  addrem_flags(RKCL, RKEY);
5057  break;
5058  }
5059  case '}': // this happens on a trailing comma like ", }"
5060  {
5061  _c4dbgp("mapjson[RKEY]: end!");
5062  _end_map_flow();
5063  _line_progressed(1);
5064  goto mapjson_finish;
5065  }
5066  default:
5067  _c4err("parse error");
5068  }
5069  }
5070  else if(has_any(RVAL))
5071  {
5072  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5073  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5074  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5075  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5076  const char first = rem.str[0];
5077  _c4dbgpf("mapjson[RVAL]: '{}'", first);
5078  switch(first)
5079  {
5080  case '"':
5081  {
5082  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
5083  ScannedScalar sc = _scan_scalar_dquot();
5084  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5085  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5086  addrem_flags(RNXT, RVAL);
5087  break;
5088  }
5089  case '[':
5090  {
5091  _c4dbgp("mapjson[RVAL]: start val seqjson");
5092  addrem_flags(RNXT, RVAL);
5093  m_evt_handler->begin_seq_val_flow();
5094  _set_indentation(m_evt_handler->m_parent->indref);
5095  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5096  _line_progressed(1);
5097  goto mapjson_finish;
5098  }
5099  case '{':
5100  {
5101  _c4dbgp("mapjson[RVAL]: start val mapjson");
5102  addrem_flags(RNXT, RVAL);
5103  m_evt_handler->begin_map_val_flow();
5104  _set_indentation(m_evt_handler->m_parent->indref);
5105  addrem_flags(RKEY, RNXT);
5106  _line_progressed(1);
5107  // keep going in this function
5108  break;
5109  }
5110  default:
5111  {
5112  ScannedScalar sc;
5113  if(_scan_scalar_map_json(&sc))
5114  {
5115  _c4dbgp("mapjson[RVAL]: plain scalar.");
5116  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5117  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5118  addrem_flags(RNXT, RVAL);
5119  }
5120  else
5121  {
5122  _c4err("parse error");
5123  }
5124  break;
5125  }
5126  }
5127  }
5128  else if(has_any(RKCL)) // read the key colon
5129  {
5130  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5131  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5132  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5133  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5134  const char first = rem.str[0];
5135  _c4dbgpf("mapjson[RKCL]: '{}'", first);
5136  if(first == ':')
5137  {
5138  _c4dbgp("mapjson[RKCL]: found the colon");
5139  addrem_flags(RVAL, RKCL);
5140  _line_progressed(1);
5141  }
5142  else
5143  {
5144  _c4err("parse error");
5145  }
5146  }
5147  else if(has_any(RNXT))
5148  {
5149  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5150  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5151  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5152  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5153  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
5154  if(rem.begins_with(','))
5155  {
5156  _c4dbgp("mapjson[RNXT]: expect next keyval");
5157  m_evt_handler->add_sibling();
5158  addrem_flags(RKEY, RNXT);
5159  _line_progressed(1);
5160  }
5161  else if(rem.begins_with('}'))
5162  {
5163  _c4dbgp("mapjson[RNXT]: end!");
5164  _end_map_flow();
5165  _line_progressed(1);
5166  goto mapjson_finish;
5167  }
5168  else
5169  {
5170  _c4err("parse error"); // LCOV_EXCL_LINE
5171  }
5172  }
5173 
5174  mapjson_again:
5175  _c4dbgt("mapjson: go again", 0);
5176  if(_finished_line())
5177  {
5178  if(C4_LIKELY(!_finished_file()))
5179  {
5180  _line_ended();
5181  _scan_line();
5182  _c4dbgnextline();
5183  }
5184  else
5185  {
5186  _c4err("missing terminating }");
5187  }
5188  }
5189  goto mapjson_start;
5190 
5191  mapjson_finish:
5192  _c4dbgp("mapjson: finish");
5193 }
5194 
5195 
5196 //-----------------------------------------------------------------------------
5197 
5198 template<class EventHandler>
5199 void ParseEngine<EventHandler>::_handle_seq_imap()
5200 {
5201 seqimap_start:
5202  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5203 
5204  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP), m_evt_handler->m_curr->pos);
5205  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5206  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL), m_evt_handler->m_curr->pos);
5207  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL), m_evt_handler->m_curr->pos);
5208  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5209 
5210  _handle_flow_skip_whitespace();
5211  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5212  if(!rem.len)
5213  goto seqimap_again;
5214 
5215  if(has_any(RVAL))
5216  {
5217  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
5218  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5219  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5220  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5221  const char first = rem.str[0];
5222  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
5223  ScannedScalar sc;
5224  if(first == '\'')
5225  {
5226  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
5227  sc = _scan_scalar_squot();
5228  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5229  _handle_annotations_before_blck_val_scalar();
5230  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5231  _end_map_flow();
5232  goto seqimap_finish;
5233  }
5234  else if(first == '"')
5235  {
5236  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
5237  sc = _scan_scalar_dquot();
5238  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5239  _handle_annotations_before_blck_val_scalar();
5240  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5241  _end_map_flow();
5242  goto seqimap_finish;
5243  }
5244  // block scalars (ie | and >) cannot appear in flow containers
5245  else if(_scan_scalar_plain_map_flow(&sc))
5246  {
5247  _c4dbgp("seqimap[RVAL]: it's a scalar.");
5248  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5249  _handle_annotations_before_blck_val_scalar();
5250  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5251  _end_map_flow();
5252  goto seqimap_finish;
5253  }
5254  else if(first == '[')
5255  {
5256  _c4dbgp("seqimap[RVAL]: start child seqflow");
5257  addrem_flags(RNXT, RVAL);
5258  _handle_annotations_before_blck_val_scalar();
5259  m_evt_handler->begin_seq_val_flow();
5260  addrem_flags(RVAL, RNXT|RSEQIMAP);
5261  _set_indentation(m_evt_handler->m_parent->indref);
5262  _line_progressed(1);
5263  goto seqimap_finish;
5264  }
5265  else if(first == '{')
5266  {
5267  _c4dbgp("seqimap[RVAL]: start child mapflow");
5268  addrem_flags(RNXT, RVAL);
5269  _handle_annotations_before_blck_val_scalar();
5270  m_evt_handler->begin_map_val_flow();
5271  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
5272  _set_indentation(m_evt_handler->m_parent->indref);
5273  _line_progressed(1);
5274  goto seqimap_finish;
5275  }
5276  else if(first == ',' || first == ']')
5277  {
5278  _c4dbgp("seqimap[RVAL]: finish without val.");
5279  _handle_annotations_before_blck_val_scalar();
5280  m_evt_handler->set_val_scalar_plain_empty();
5281  _end_map_flow();
5282  goto seqimap_finish;
5283  }
5284  else if(first == '*')
5285  {
5286  csubstr ref = _scan_ref_seq();
5287  _c4dbgpf("seqimap[RVAL]: ref! {}", _prs(ref));
5288  _handle_valref(ref);
5289  addrem_flags(RNXT, RVAL);
5290  }
5291  else if(first == '&')
5292  {
5293  csubstr anchor = _scan_anchor();
5294  _c4dbgpf("seqimap[RVAL]: anchor! {}", _prs(anchor));
5295  _add_annotation(&m_pending_anchors, anchor);
5296  }
5297  else if(first == '!')
5298  {
5299  csubstr tag = _scan_tag();
5300  _c4dbgpf("seqimap[RVAL]: tag! {}", _prs(tag));
5301  _add_annotation(&m_pending_tags, tag);
5302  }
5303  else
5304  {
5305  _c4err("parse error"); // LCOV_EXCL_LINE
5306  }
5307  }
5308  else if(has_any(RNXT))
5309  {
5310  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5311  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5312  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5313  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5314  const char first = rem.str[0];
5315  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
5316  if(first == ',' || first == ']')
5317  {
5318  // we may get here because a map or a seq started and we
5319  // return later
5320  _c4dbgp("seqimap: done");
5321  _end_map_flow();
5322  goto seqimap_finish;
5323  }
5324  else
5325  {
5326  _c4err("parse error"); // LCOV_EXCL_LINE
5327  }
5328  }
5329  else if(has_any(QMRK))
5330  {
5331  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK), m_evt_handler->m_curr->pos);
5332  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5333  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5334  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5335  const char first = rem.str[0];
5336  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
5337  ScannedScalar sc;
5338  if(first == '\'')
5339  {
5340  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
5341  sc = _scan_scalar_squot();
5342  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5343  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5344  addrem_flags(RKCL, QMRK);
5345  goto seqimap_again;
5346  }
5347  else if(first == '"')
5348  {
5349  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
5350  sc = _scan_scalar_dquot();
5351  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5352  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5353  addrem_flags(RKCL, QMRK);
5354  goto seqimap_again;
5355  }
5356  // block scalars (ie | and >) cannot appear in flow containers
5357  else if(_scan_scalar_plain_map_flow(&sc))
5358  {
5359  _c4dbgp("seqimap[QMRK]: it's a scalar.");
5360  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5361  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5362  addrem_flags(RKCL, QMRK);
5363  goto seqimap_again;
5364  }
5365  else if(first == '[')
5366  {
5367  _c4dbgp("seqimap[QMRK]: start child seqflow");
5368  addrem_flags(RKCL, QMRK);
5369  m_evt_handler->begin_seq_key_flow();
5370  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
5371  _set_indentation(m_evt_handler->m_parent->indref);
5372  _line_progressed(1);
5373  goto seqimap_finish;
5374  }
5375  else if(first == '{')
5376  {
5377  _c4dbgp("seqimap[QMRK]: start child mapflow");
5378  addrem_flags(RKCL, QMRK);
5379  m_evt_handler->begin_map_key_flow();
5380  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
5381  _set_indentation(m_evt_handler->m_parent->indref);
5382  _line_progressed(1);
5383  goto seqimap_finish;
5384  }
5385  else if(first == ',' || first == ']')
5386  {
5387  _c4dbgp("seqimap[QMRK]: finish without key.");
5388  m_evt_handler->set_key_scalar_plain_empty();
5389  m_evt_handler->set_val_scalar_plain_empty();
5390  _end_map_flow();
5391  goto seqimap_finish;
5392  }
5393  else if(first == '&')
5394  {
5395  csubstr anchor = _scan_anchor();
5396  _c4dbgp("seqimap[QMRK]: anchor!");
5397  m_evt_handler->set_key_anchor(anchor);
5398  }
5399  else if(first == '*')
5400  {
5401  csubstr ref = _scan_ref_seq();
5402  _c4dbgp("seqimap[QMRK]: ref!");
5403  _handle_keyref(ref);
5404  addrem_flags(RKCL, QMRK);
5405  }
5406  else
5407  {
5408  _c4err("parse error"); // LCOV_EXCL_LINE
5409  }
5410  }
5411  else if(has_any(RKCL))
5412  {
5413  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5414  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5415  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5416  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL), m_evt_handler->m_curr->pos);
5417  const char first = rem.str[0];
5418  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
5419  if(first == ':')
5420  {
5421  _c4dbgp("seqimap[RKCL]: found ':'");
5422  addrem_flags(RVAL, RKCL);
5423  _line_progressed(1);
5424  goto seqimap_again;
5425  }
5426  else if(first == ',' || first == ']')
5427  {
5428  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
5429  m_evt_handler->set_val_scalar_plain_empty();
5430  _end_map_flow();
5431  goto seqimap_finish;
5432  }
5433  else
5434  {
5435  _c4err("parse error"); // LCOV_EXCL_LINE
5436  }
5437  }
5438 
5439  seqimap_again:
5440  _c4dbgt("seqimap: go again", 0);
5441  if(_finished_line())
5442  {
5443  if(C4_LIKELY(!_finished_file()))
5444  {
5445  _line_ended();
5446  _scan_line();
5447  _c4dbgnextline();
5448  }
5449  else
5450  {
5451  _c4err("parse error");
5452  }
5453  }
5454  goto seqimap_start;
5455 
5456  seqimap_finish:
5457  _c4dbgp("seqimap: finish");
5458 }
5459 
5460 
5461 //-----------------------------------------------------------------------------
5462 
5463 template<class EventHandler>
5464 void ParseEngine<EventHandler>::_handle_seq_flow()
5465 {
5466 seqflow_start:
5467  _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5468 
5469  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5470  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
5471  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5472  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
5473  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
5474  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
5475 
5476  if(m_evt_handler->m_curr->at_line_beginning())
5477  {
5478  _handle_flow_line_beginning();
5479  }
5480 
5481  _handle_flow_skip_whitespace();
5482  if(!m_evt_handler->m_curr->line_contents.rem.len)
5483  goto seqflow_again;
5484 
5485  if(has_any(RVAL))
5486  {
5487  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5488  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5489  ScannedScalar sc;
5490  if(first == '\'')
5491  {
5492  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5493  sc = _scan_scalar_squot();
5494  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5495  _handle_annotations_before_blck_val_scalar();
5496  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5497  addrem_flags(RNXT, RVAL);
5498  _mark_seqflow_val_end();
5499  }
5500  else if(first == '"')
5501  {
5502  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5503  sc = _scan_scalar_dquot();
5504  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5505  _handle_annotations_before_blck_val_scalar();
5506  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5507  addrem_flags(RNXT, RVAL);
5508  _mark_seqflow_val_end();
5509  }
5510  // block scalars (ie | and >) cannot appear in flow containers
5511  else if(_scan_scalar_plain_seq_flow(&sc))
5512  {
5513  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5514  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5515  _handle_annotations_before_blck_val_scalar();
5516  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5517  addrem_flags(RNXT, RVAL);
5518  _mark_seqflow_val_end();
5519  }
5520  else if(first == '[')
5521  {
5522  _c4dbgp("seqflow[RVAL]: start child seqflow");
5523  addrem_flags(RNXT, RVAL);
5524  _handle_annotations_before_blck_val_scalar();
5525  m_evt_handler->begin_seq_val_flow();
5526  _set_indentation(m_evt_handler->m_parent->indref);
5527  addrem_flags(RVAL, RNXT);
5528  _line_progressed(1);
5529  }
5530  else if(first == '{')
5531  {
5532  _c4dbgp("seqflow[RVAL]: start child mapflow");
5533  addrem_flags(RNXT, RVAL);
5534  _handle_annotations_before_blck_val_scalar();
5535  m_evt_handler->begin_map_val_flow();
5536  _set_indentation(m_evt_handler->m_parent->indref);
5537  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5538  _line_progressed(1);
5539  goto seqflow_finish;
5540  }
5541  else if(first == ']') // this happens on cases such as [] or [.., ]
5542  {
5543  _c4dbgp("seqflow[RVAL]: end!");
5544  if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5545  {
5546  _c4dbgp("seqflow[RVAL]: add pending annotations");
5547  _handle_annotations_before_blck_val_scalar();
5548  m_evt_handler->set_val_scalar_plain_empty();
5549  }
5550  _line_progressed(1);
5551  _end_seq_flow();
5552  goto seqflow_finish;
5553  }
5554  else if(first == '*')
5555  {
5556  csubstr ref = _scan_ref_seq();
5557  _c4dbgpf("seqflow[RVAL]: ref! {}", _prs(ref));
5558  _handle_valref(ref);
5559  addrem_flags(RNXT, RVAL);
5560  }
5561  else if(first == '&')
5562  {
5563  csubstr anchor = _scan_anchor();
5564  _c4dbgpf("seqflow[RVAL]: anchor! {}", _prs(anchor));
5565  _add_annotation(&m_pending_anchors, anchor);
5566  }
5567  else if(first == '!')
5568  {
5569  csubstr tag = _scan_tag();
5570  _c4dbgpf("seqflow[RVAL]: tag! {}", _prs(tag));
5571  _add_annotation(&m_pending_tags, tag);
5572  }
5573  else if(first == ':')
5574  {
5575  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5576  addrem_flags(RNXT, RVAL);
5577  m_evt_handler->begin_map_val_flow();
5578  _set_indentation(m_evt_handler->m_parent->indref);
5579  _handle_annotations_before_blck_key_scalar();
5580  m_evt_handler->set_key_scalar_plain_empty();
5581  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5582  _line_progressed(1);
5583  goto seqflow_finish;
5584  }
5585  else if(first == '?')
5586  {
5587  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5588  addrem_flags(RNXT, RVAL);
5589  m_evt_handler->begin_map_val_flow();
5590  _set_indentation(m_evt_handler->m_parent->indref);
5591  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5592  _line_progressed(1);
5593  _maybe_skip_whitespace_tokens();
5594  goto seqflow_finish;
5595  }
5596  else if(first == ',')
5597  {
5598  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5599  {
5600  _c4dbgp("seqflow[RVAL]: add pending annotations");
5601  _handle_annotations_before_blck_val_scalar();
5602  m_evt_handler->set_val_scalar_plain_empty();
5603  addrem_flags(RNXT, RVAL);
5604  _mark_seqflow_val_end();
5605  }
5606  else
5607  {
5608  _c4err("parse error");
5609  }
5610  }
5611  else
5612  {
5613  _c4err("parse error");
5614  }
5615  }
5616  else // RNXT
5617  {
5618  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5619  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5620  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5621  if(first == ',')
5622  {
5623  _c4dbgp("seqflow[RNXT]: expect next val");
5624  addrem_flags(RVAL, RNXT);
5625  m_evt_handler->add_sibling();
5626  _line_progressed(1);
5627  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5628  {
5629  _c4err("parse error: invalid comment after comma");
5630  }
5631  _mark_seqflow_val_end();
5632  }
5633  else if(first == ']')
5634  {
5635  _c4dbgp("seqflow[RNXT]: end!");
5636  _line_progressed(1);
5637  _end_seq_flow();
5638  goto seqflow_finish;
5639  }
5640  else if(first == ':')
5641  {
5642  _c4dbgpf("seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5643  if(m_prev_val_end != NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5644  {
5645  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5646  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5647  _set_indentation(m_evt_handler->m_parent->indref);
5648  _line_progressed(1);
5649  addrem_flags(RSEQIMAP|RVAL, RNXT);
5650  goto seqflow_finish;
5651  }
5652  else
5653  {
5654  _c4err("parse error");
5655  }
5656  }
5657  else
5658  {
5659  _c4err("parse error");
5660  }
5661  }
5662 
5663  seqflow_again:
5664  _c4dbgt("seqflow: go again", 0);
5665  if(_finished_line())
5666  {
5667  if(C4_LIKELY(!_finished_file()))
5668  {
5669  _line_ended();
5670  _scan_line();
5671  _c4dbgnextline();
5672  }
5673  else
5674  {
5675  _c4err("missing terminating ]");
5676  }
5677  }
5678  goto seqflow_start;
5679 
5680  seqflow_finish:
5681  _c4dbgp("seqflow: finish");
5682 }
5683 
5684 
5685 //-----------------------------------------------------------------------------
5686 
5687 template<class EventHandler>
5688 void ParseEngine<EventHandler>::_handle_map_flow()
5689 {
5690 mapflow_start:
5691  _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5692 
5693  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5694  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5695  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
5696  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
5697 
5698  if(m_evt_handler->m_curr->at_line_beginning())
5699  {
5700  _handle_flow_line_beginning();
5701  }
5702 
5703  _handle_flow_skip_whitespace();
5704  if(!m_evt_handler->m_curr->line_contents.rem.len)
5705  goto mapflow_again;
5706 
5707  if(has_any(RKEY))
5708  {
5709  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5710  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5711  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5712  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5713  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5714  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5715  ScannedScalar sc;
5716  if(first == '\'')
5717  {
5718  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5719  sc = _scan_scalar_squot();
5720  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5721  _handle_annotations_before_blck_key_scalar();
5722  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5723  addrem_flags(RKCL, RKEY|QMRK);
5724  }
5725  else if(first == '"')
5726  {
5727  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5728  sc = _scan_scalar_dquot();
5729  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5730  _handle_annotations_before_blck_key_scalar();
5731  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5732  addrem_flags(RKCL, RKEY|QMRK);
5733  }
5734  // block scalars (ie | and >) cannot appear in flow containers
5735  else if(_scan_scalar_plain_map_flow(&sc))
5736  {
5737  _c4dbgp("mapflow[RKEY]: plain scalar");
5738  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5739  _handle_annotations_before_blck_key_scalar();
5740  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5741  addrem_flags(RKCL, RKEY|QMRK);
5742  }
5743  else if(first == '?')
5744  {
5745  _c4dbgp("mapflow[RKEY]: explicit key");
5746  _handle_annotations_before_blck_key_scalar();
5747  addrem_flags(QMRK, RKEY);
5748  _line_progressed(1);
5749  _maybe_skip_whitespace_tokens();
5750  }
5751  else if(first == ':')
5752  {
5753  _c4dbgp("mapflow[RKEY]: setting empty key");
5754  _handle_annotations_before_blck_key_scalar();
5755  m_evt_handler->set_key_scalar_plain_empty();
5756  addrem_flags(RVAL, RKEY|QMRK);
5757  _line_progressed(1);
5758  _maybe_skip_whitespace_tokens();
5759  }
5760  else if(first == ',')
5761  {
5762  _c4dbgp("mapflow[RKEY]: comma!");
5763  if(!_handle_annotations_before_unexpected_flow_token_rkey())
5764  _c4err("unexpected comma");
5765  addrem_flags(RNXT, RKEY|QMRK);
5766  // keep going in this function
5767  }
5768  else if(first == '}') // this happens on a trailing comma like ", }"
5769  {
5770  _c4dbgp("mapflow[RKEY]: end!");
5771  (void)_handle_annotations_before_unexpected_flow_token_rkey();
5772  _line_progressed(1);
5773  _end_map_flow();
5774  goto mapflow_finish;
5775  }
5776  else if(first == '&')
5777  {
5778  csubstr anchor = _scan_anchor();
5779  _c4dbgpf("mapflow[RKEY]: key anchor! {}", _prs(anchor));
5780  _add_annotation(&m_pending_anchors, anchor);
5781  }
5782  else if(first == '!')
5783  {
5784  csubstr tag = _scan_tag();
5785  _c4dbgpf("mapflow[RKEY]: tag! {}", _prs(tag));
5786  _add_annotation(&m_pending_tags, tag);
5787  }
5788  else if(first == '*')
5789  {
5790  csubstr ref = _scan_ref_map();
5791  _c4dbgpf("mapflow[RKEY]: key ref! {}", _prs(ref));
5792  _handle_keyref(ref);
5793  addrem_flags(RKCL, RKEY);
5794  }
5795  else if(first == '[')
5796  {
5797  // RYML's tree cannot store container keys, but that's
5798  // handled inside the tree event handler. Other handler
5799  // types may be able to handle it.
5800  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5801  _handle_annotations_before_blck_key_scalar();
5802  addrem_flags(RKCL, RKEY);
5803  m_evt_handler->begin_seq_key_flow();
5804  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5805  _set_indentation(m_evt_handler->m_parent->indref);
5806  _line_progressed(1);
5807  goto mapflow_finish;
5808  }
5809  else if(first == '{')
5810  {
5811  // RYML's tree cannot store container keys, but that's
5812  // handled inside the tree event handler. Other handler
5813  // types may be able to handle it.
5814  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5815  _handle_annotations_before_blck_key_scalar();
5816  addrem_flags(RKCL, RKEY);
5817  m_evt_handler->begin_map_key_flow();
5818  addrem_flags(RKEY, RVAL|RKCL);
5819  _set_indentation(m_evt_handler->m_parent->indref);
5820  _line_progressed(1);
5821  // keep going in this function
5822  }
5823  else
5824  {
5825  _c4err("parse error"); // LCOV_EXCL_LINE
5826  }
5827  }
5828  else if(has_any(RKCL)) // read the key colon
5829  {
5830  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5831  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5832  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5833  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5834  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5835  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5836  if(first == ':')
5837  {
5838  _c4dbgp("mapflow[RKCL]: found the colon");
5839  addrem_flags(RVAL, RKCL);
5840  _line_progressed(1);
5841  }
5842  else if(first == '}')
5843  {
5844  _c4dbgp("mapflow[RKCL]: end with missing val!");
5845  addrem_flags(RVAL, RKCL);
5846  m_evt_handler->set_val_scalar_plain_empty();
5847  _line_progressed(1);
5848  _end_map_flow();
5849  goto mapflow_finish;
5850  }
5851  else if(first == ',')
5852  {
5853  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5854  m_evt_handler->set_val_scalar_plain_empty();
5855  m_evt_handler->add_sibling();
5856  addrem_flags(RKEY, RKCL);
5857  _line_progressed(1);
5858  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5859  {
5860  _c4err("parse error: invalid comment after comma");
5861  }
5862  }
5863  else
5864  {
5865  _c4err("parse error");
5866  }
5867  }
5868  else if(has_any(RVAL))
5869  {
5870  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5871  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5872  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5873  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5874  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5875  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5876  ScannedScalar sc;
5877  if(first == '\'')
5878  {
5879  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5880  sc = _scan_scalar_squot();
5881  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5882  _handle_annotations_before_blck_val_scalar();
5883  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5884  addrem_flags(RNXT, RVAL);
5885  }
5886  else if(first == '"')
5887  {
5888  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5889  sc = _scan_scalar_dquot();
5890  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5891  _handle_annotations_before_blck_val_scalar();
5892  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5893  addrem_flags(RNXT, RVAL);
5894  }
5895  // block scalars (ie | and >) cannot appear in flow containers
5896  else if(_scan_scalar_plain_map_flow(&sc))
5897  {
5898  _c4dbgp("mapflow[RVAL]: plain scalar.");
5899  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5900  _handle_annotations_before_blck_val_scalar();
5901  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5902  addrem_flags(RNXT, RVAL);
5903  }
5904  else if(first == '[')
5905  {
5906  _c4dbgp("mapflow[RVAL]: start val seqflow");
5907  addrem_flags(RNXT, RVAL);
5908  _handle_annotations_before_blck_val_scalar();
5909  m_evt_handler->begin_seq_val_flow();
5910  _set_indentation(m_evt_handler->m_parent->indref);
5911  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5912  _line_progressed(1);
5913  goto mapflow_finish;
5914  }
5915  else if(first == '{')
5916  {
5917  _c4dbgp("mapflow[RVAL]: start val mapflow");
5918  addrem_flags(RNXT, RVAL);
5919  _handle_annotations_before_blck_val_scalar();
5920  m_evt_handler->begin_map_val_flow();
5921  _set_indentation(m_evt_handler->m_parent->indref);
5922  addrem_flags(RKEY, RNXT);
5923  _line_progressed(1);
5924  // keep going in this function
5925  }
5926  else if(first == '}')
5927  {
5928  _c4dbgp("mapflow[RVAL]: end!");
5929  _handle_annotations_before_blck_val_scalar();
5930  m_evt_handler->set_val_scalar_plain_empty();
5931  _line_progressed(1);
5932  _end_map_flow();
5933  goto mapflow_finish;
5934  }
5935  else if(first == ',')
5936  {
5937  _c4dbgp("mapflow[RVAL]: empty val!");
5938  _handle_annotations_before_blck_val_scalar();
5939  m_evt_handler->set_val_scalar_plain_empty();
5940  addrem_flags(RNXT, RVAL);
5941  // keep going in this function
5942  }
5943  else if(first == '*')
5944  {
5945  csubstr ref = _scan_ref_map();
5946  _c4dbgpf("mapflow[RVAL]: key ref! {}", _prs(ref));
5947  _handle_valref(ref);
5948  addrem_flags(RNXT, RVAL);
5949  }
5950  else if(first == '&')
5951  {
5952  csubstr anchor = _scan_anchor();
5953  _c4dbgpf("mapflow[RVAL]: key anchor! {}", _prs(anchor));
5954  _add_annotation(&m_pending_anchors, anchor);
5955  }
5956  else if(first == '!')
5957  {
5958  csubstr tag = _scan_tag();
5959  _c4dbgpf("mapflow[RVAL]: tag! {}", _prs(tag));
5960  _add_annotation(&m_pending_tags, tag);
5961  }
5962  else
5963  {
5964  _c4err("parse error");
5965  }
5966  }
5967  else if(has_any(RNXT))
5968  {
5969  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5970  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5971  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5972  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5973  _c4dbgpf("mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5974  if(m_evt_handler->m_curr->line_contents.rem.begins_with(','))
5975  {
5976  _c4dbgp("mapflow[RNXT]: expect next keyval");
5977  m_evt_handler->add_sibling();
5978  addrem_flags(RKEY, RNXT);
5979  _line_progressed(1);
5980  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5981  {
5982  _c4err("parse error: invalid comment after comma");
5983  }
5984  }
5985  else if(m_evt_handler->m_curr->line_contents.rem.begins_with('}'))
5986  {
5987  _c4dbgp("mapflow[RNXT]: end!");
5988  _line_progressed(1);
5989  _end_map_flow();
5990  goto mapflow_finish;
5991  }
5992  else
5993  {
5994  _c4err("parse error");
5995  }
5996  }
5997  else if(has_any(QMRK))
5998  {
5999  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6000  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6001  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6002  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6003  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6004  _c4dbgpf("mapflow[QMRK]: '{}'", first);
6005  ScannedScalar sc;
6006  if(first == '\'')
6007  {
6008  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
6009  sc = _scan_scalar_squot();
6010  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6011  _handle_annotations_before_blck_key_scalar();
6012  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6013  addrem_flags(RKCL, QMRK);
6014  }
6015  else if(first == '"')
6016  {
6017  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
6018  sc = _scan_scalar_dquot();
6019  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6020  _handle_annotations_before_blck_key_scalar();
6021  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6022  addrem_flags(RKCL, QMRK);
6023  }
6024  // block scalars (ie | and >) cannot appear in flow containers
6025  else if(_scan_scalar_plain_map_flow(&sc))
6026  {
6027  _c4dbgp("mapflow[QMRK]: plain scalar");
6028  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6029  _handle_annotations_before_blck_key_scalar();
6030  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6031  addrem_flags(RKCL, QMRK);
6032  }
6033  else if(first == ':')
6034  {
6035  _c4dbgp("mapflow[QMRK]: setting empty key");
6036  _handle_annotations_before_blck_key_scalar();
6037  m_evt_handler->set_key_scalar_plain_empty();
6038  addrem_flags(RVAL, QMRK);
6039  _line_progressed(1);
6040  _maybe_skip_whitespace_tokens();
6041  }
6042  else if(first == '}') // this happens on a trailing comma like ", }"
6043  {
6044  _c4dbgp("mapflow[QMRK]: end!");
6045  _handle_annotations_before_blck_key_scalar();
6046  m_evt_handler->set_key_scalar_plain_empty();
6047  m_evt_handler->set_val_scalar_plain_empty();
6048  _end_map_flow();
6049  _line_progressed(1);
6050  goto mapflow_finish;
6051  }
6052  else if(first == ',')
6053  {
6054  _c4dbgp("mapflow[QMRK]: empty key+val!");
6055  _handle_annotations_before_blck_key_scalar();
6056  m_evt_handler->set_key_scalar_plain_empty();
6057  m_evt_handler->set_val_scalar_plain_empty();
6058  addrem_flags(RNXT, QMRK);
6059  }
6060  else if(first == '&')
6061  {
6062  csubstr anchor = _scan_anchor();
6063  _c4dbgpf("mapflow[QMRK]: key anchor! {}", _prs(anchor));
6064  _add_annotation(&m_pending_anchors, anchor);
6065  }
6066  else if(first == '*')
6067  {
6068  csubstr ref = _scan_ref_map();
6069  _c4dbgpf("mapflow[QMRK]: key ref! {}", _prs(ref));
6070  _handle_keyref(ref);
6071  addrem_flags(RKCL, QMRK);
6072  }
6073  else if(first == '[')
6074  {
6075  // RYML's tree cannot store container keys, but that's
6076  // handled inside the tree sink. Other sink types may be
6077  // able to handle it.
6078  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
6079  addrem_flags(RKCL, QMRK);
6080  _handle_annotations_before_blck_key_scalar();
6081  m_evt_handler->begin_seq_key_flow();
6082  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6083  _set_indentation(m_evt_handler->m_parent->indref);
6084  _line_progressed(1);
6085  goto mapflow_finish;
6086  }
6087  else if(first == '{')
6088  {
6089  // RYML's tree cannot store container keys, but that's
6090  // handled inside the tree sink. Other sink types may be
6091  // able to handle it.
6092  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
6093  addrem_flags(RKCL, QMRK);
6094  _handle_annotations_before_blck_key_scalar();
6095  m_evt_handler->begin_map_key_flow();
6096  _set_indentation(m_evt_handler->m_parent->indref);
6097  addrem_flags(RKEY, RKCL);
6098  _line_progressed(1);
6099  // keep going in this function
6100  }
6101  else if(first == '!')
6102  {
6103  csubstr tag = _scan_tag();
6104  _c4dbgpf("mapflow[QMRK]: tag! {}", _prs(tag));
6105  _add_annotation(&m_pending_tags, tag);
6106  }
6107  else
6108  {
6109  _c4err("parse error"); // LCOV_EXCL_LINE
6110  }
6111  }
6112 
6113  mapflow_again:
6114  _c4dbgt("mapflow: go again", 0);
6115  if(_finished_line())
6116  {
6117  if(C4_LIKELY(!_finished_file()))
6118  {
6119  _line_ended();
6120  _scan_line();
6121  _c4dbgnextline();
6122  }
6123  else
6124  {
6125  _c4err("missing terminating }");
6126  }
6127  }
6128  goto mapflow_start;
6129 
6130  mapflow_finish:
6131  _c4dbgp("mapflow: finish");
6132 }
6133 
6134 
6135 //-----------------------------------------------------------------------------
6136 
6137 template<class EventHandler>
6138 void ParseEngine<EventHandler>::_handle_seq_block()
6139 {
6140 seqblck_start:
6141  _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6142 
6143  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
6144  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6145  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
6146  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
6147 
6148  _maybe_skip_comment_strict();
6149  if(!m_evt_handler->m_curr->line_contents.rem.len)
6150  goto seqblck_again;
6151 
6152  if(has_any(RVAL))
6153  {
6154  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6155  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6156  if(m_evt_handler->m_curr->at_line_beginning())
6157  {
6158  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6159  if(m_evt_handler->m_curr->indentation_ge_extra())
6160  {
6161  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6162  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6163  if(!m_evt_handler->m_curr->line_contents.rem.len)
6164  goto seqblck_again;
6165  }
6166  else if(m_evt_handler->m_curr->indentation_lt_extra())
6167  {
6168  _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6169  if(m_evt_handler->m_curr->indentation_eq())
6170  {
6171  _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6172  _handle_annotations_before_blck_val_scalar();
6173  m_evt_handler->set_val_scalar_plain_empty();
6174  addrem_flags(RNXT, RVAL);
6175  goto seqblck_again;
6176  }
6177  else
6178  {
6179  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6180  _c4dbgp("seqblck[RVAL]: smaller indentation!");
6181  _handle_indentation_pop_from_block_seq();
6182  goto seqblck_finish;
6183  }
6184  }
6185  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6186  {
6187  _c4dbgp("seqblck[RVAL]: empty line!");
6188  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6189  goto seqblck_again;
6190  }
6191  }
6192  _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6193  const size_t startmark = _handle_block_skip_leading_whitespace();
6194  _c4dbgpf("seqblck[RVAL]: startmark={}", startmark);
6195  if(startmark == npos)
6196  {
6197  _c4dbgp("seqblck[RVAL]: whitespace only");
6198  goto seqblck_again;
6199  }
6200  const size_t tabmark = _handle_block_get_whitespace_mark();
6201  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6202  _c4dbgpf("seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6203  const size_t startline = m_evt_handler->m_curr->pos.line;
6204  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6205  ScannedScalar sc;
6206  if(first == '\'')
6207  {
6208  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
6209  sc = _scan_scalar_squot();
6210  if(!_maybe_scan_following_colon())
6211  {
6212  _c4dbgp("seqblck[RVAL]: set as val");
6213  _handle_annotations_before_blck_val_scalar();
6214  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6215  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6216  addrem_flags(RNXT, RVAL);
6217  }
6218  else
6219  {
6220  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6221  _handle_block_check_leading_tabs(startmark);
6222  addrem_flags(RNXT, RVAL);
6223  _handle_annotations_before_start_mapblck(startline);
6224  _handle_colon();
6225  m_evt_handler->begin_map_val_block();
6226  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6227  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6228  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6229  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6230  _maybe_skip_whitespace_tokens();
6231  goto seqblck_finish;
6232  }
6233  }
6234  else if(first == '"')
6235  {
6236  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
6237  sc = _scan_scalar_dquot();
6238  if(!_maybe_scan_following_colon())
6239  {
6240  _c4dbgp("seqblck[RVAL]: set as val");
6241  _handle_annotations_before_blck_val_scalar();
6242  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6243  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6244  addrem_flags(RNXT, RVAL);
6245  }
6246  else
6247  {
6248  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6249  addrem_flags(RNXT, RVAL);
6250  _handle_block_check_leading_tabs(startmark);
6251  _handle_annotations_before_start_mapblck(startline);
6252  _handle_colon();
6253  m_evt_handler->begin_map_val_block();
6254  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6255  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6256  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6257  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6258  _maybe_skip_whitespace_tokens();
6259  goto seqblck_finish;
6260  }
6261  }
6262  // block scalars can only appear as keys when in QMRK scope
6263  // (ie, after ? tokens), so no need to scan following colon in
6264  // here.
6265  else if(first == '|')
6266  {
6267  _c4dbgp("seqblck[RVAL]: block-literal scalar");
6268  ScannedBlock sb;
6269  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6270  _handle_annotations_before_blck_val_scalar();
6271  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6272  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6273  addrem_flags(RNXT, RVAL);
6274  }
6275  else if(first == '>')
6276  {
6277  _c4dbgp("seqblck[RVAL]: block-folded scalar");
6278  ScannedBlock sb;
6279  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6280  _handle_annotations_before_blck_val_scalar();
6281  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6282  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6283  addrem_flags(RNXT, RVAL);
6284  }
6285  else if(_scan_scalar_plain_seq_blck(&sc))
6286  {
6287  _c4dbgp("seqblck[RVAL]: plain scalar.");
6288  if(!_maybe_scan_following_colon())
6289  {
6290  _c4dbgp("seqblck[RVAL]: set as val");
6291  _handle_annotations_before_blck_val_scalar();
6292  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6293  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6294  addrem_flags(RNXT, RVAL);
6295  }
6296  else
6297  {
6298  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6299  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6300  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6301  _handle_block_check_leading_tabs(startmark, tabmark);
6302  addrem_flags(RNXT, RVAL);
6303  _handle_annotations_before_start_mapblck(startline);
6304  _handle_colon();
6305  m_evt_handler->begin_map_val_block();
6306  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6307  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6308  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6309  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6310  _maybe_skip_whitespace_tokens();
6311  goto seqblck_finish;
6312  }
6313  }
6314  else if(first == '[')
6315  {
6316  _c4dbgp("seqblck[RVAL]: start child seqflow");
6317  addrem_flags(RNXT, RVAL);
6318  _handle_annotations_before_blck_val_scalar();
6319  m_evt_handler->begin_seq_val_flow();
6320  addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
6321  _line_progressed(1);
6322  _set_indentation(m_evt_handler->m_parent->indref + 1u);
6323  goto seqblck_finish;
6324  }
6325  else if(first == '{')
6326  {
6327  _c4dbgp("seqblck[RVAL]: start child mapflow");
6328  addrem_flags(RNXT, RVAL);
6329  _handle_annotations_before_blck_val_scalar();
6330  m_evt_handler->begin_map_val_flow();
6331  addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
6332  _line_progressed(1);
6333  _set_indentation(m_evt_handler->m_parent->indref + 1u);
6334  goto seqblck_finish;
6335  }
6336  else if(first == '-')
6337  {
6338  _c4dbgp("seqblck[RVAL]: dash");
6339  _handle_block_check_leading_tabs(startmark);
6340  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6341  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6342  _c4dbgp("seqblck[RVAL]: start child seqblck");
6343  _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6344  addrem_flags(RNXT, RVAL);
6345  _handle_annotations_before_blck_val_scalar();
6346  m_evt_handler->begin_seq_val_block();
6347  addrem_flags(RVAL, RNXT);
6348  _set_indentation(startindent);
6349  // keep going on inside this function
6350  _line_progressed(1);
6351  }
6352  else if(first == ':')
6353  {
6354  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
6355  addrem_flags(RNXT, RVAL);
6356  _handle_annotations_before_start_mapblck(startline);
6357  _handle_colon();
6358  m_evt_handler->begin_map_val_block();
6359  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6360  m_evt_handler->set_key_scalar_plain_empty();
6361  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6362  _line_progressed(1);
6363  _maybe_skip_whitespace_tokens();
6364  goto seqblck_finish;
6365  }
6366  else if(first == '&')
6367  {
6368  const csubstr anchor = _scan_anchor();
6369  _c4dbgpf("seqblck[RVAL]: anchor! {}", _prs(anchor));
6370  // we need to buffer the anchors, as there may be two
6371  // consecutive anchors in here
6372  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6373  }
6374  else if(first == '*')
6375  {
6376  csubstr ref = _scan_ref_seq();
6377  _c4dbgpf("seqblck[RVAL]: ref! {}", _prs(ref));
6378  if(!_maybe_scan_following_colon())
6379  {
6380  _c4dbgp("seqblck[RVAL]: set ref as val!");
6381  _handle_valref(ref);
6382  addrem_flags(RNXT, RVAL);
6383  }
6384  else
6385  {
6386  _c4dbgp("seqblck[RVAL]: ref is key of map");
6387  addrem_flags(RNXT, RVAL);
6388  _handle_annotations_before_start_mapblck(startline);
6389  m_evt_handler->begin_map_val_block();
6390  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6391  _handle_keyref(ref);
6392  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6393  _set_indentation(startindent);
6394  _maybe_skip_whitespace_tokens();
6395  goto seqblck_finish;
6396  }
6397  }
6398  else if(first == '!')
6399  {
6400  csubstr tag = _scan_tag();
6401  _c4dbgpf("seqblck[RVAL]: val tag! {}", _prs(tag));
6402  // we need to buffer the tags, as there may be two
6403  // consecutive tags in here
6404  _add_annotation(&m_pending_tags, tag, startindent, startline);
6405  }
6406  else if(first == '?')
6407  {
6408  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
6409  addrem_flags(RNXT, RVAL);
6410  m_evt_handler->begin_map_val_block();
6411  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
6412  _set_indentation(startindent);
6413  _line_progressed(1);
6414  _maybe_skipchars(' ');
6415  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6416  {
6417  _c4dbgp("seqblck[RVAL]: seqblck starts after ?");
6418  addrem_flags(RKCL, QMRK);
6419  m_evt_handler->begin_seq_key_block();
6420  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6421  _save_indentation();
6422  _line_progressed(1);
6423  _maybe_skipchars(' ');
6424  }
6425  goto seqblck_finish;
6426  }
6427  else
6428  {
6429  _c4err("parse error");
6430  }
6431  }
6432  else // RNXT
6433  {
6434  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6435  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6436  //
6437  // handle indentation
6438  //
6439  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6440  if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6441  {
6442  _c4dbgp("seqblck[RNXT]: at line begin");
6443  if(m_evt_handler->m_curr->indentation_ge())
6444  {
6445  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6446  _line_progressed(m_evt_handler->m_curr->indref);
6447  if(!m_evt_handler->m_curr->line_contents.rem.len)
6448  goto seqblck_again;
6449  }
6450  else if(m_evt_handler->m_curr->indentation_lt())
6451  {
6452  _c4dbgp("seqblck[RNXT]: smaller indentation!");
6453  _handle_indentation_pop_from_block_seq();
6454  if(has_all(RSEQ|RBLCK))
6455  {
6456  _c4dbgp("seqblck[RNXT]: still seqblck!");
6457  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6458  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6459  if(!m_evt_handler->m_curr->line_contents.rem.len)
6460  goto seqblck_again; // LCOV_EXCL_LINE
6461  }
6462  else
6463  {
6464  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
6465  goto seqblck_finish;
6466  }
6467  }
6468  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6469  {
6470  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6471  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6472  if(!m_evt_handler->m_curr->line_contents.rem.len)
6473  goto seqblck_again; // LCOV_EXCL_LINE
6474  }
6475  }
6476  else
6477  {
6478  _c4dbgp("seqblck[RNXT]: NOT at line begin");
6479  if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
6480  {
6481  _c4err("parse error");
6482  }
6483  else
6484  {
6485  _skipchars(" \t");
6486  if(!m_evt_handler->m_curr->line_contents.rem.len)
6487  {
6488  _c4dbgp("seqblck[RNXT]: again");
6489  goto seqblck_again; // LCOV_EXCL_LINE
6490  }
6491  }
6492  }
6493  //
6494  // now handle the tokens
6495  //
6496  _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6497  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6498  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6499  if(first == '-')
6500  {
6501  if(m_evt_handler->m_curr->indref > 0
6502  || m_evt_handler->m_curr->line_contents.indentation > 0
6503  || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6504  {
6505  if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6506  {
6507  _c4dbgp("seqblck[RNXT]: expect next val");
6508  addrem_flags(RVAL, RNXT);
6509  m_evt_handler->add_sibling();
6510  _line_progressed(1);
6511  }
6512  else
6513  {
6514  _c4err("parse error");
6515  }
6516  }
6517  else
6518  {
6519  _c4dbgp("seqblck[RNXT]: start doc");
6520  _start_doc_suddenly();
6521  _line_progressed(3);
6522  _maybe_skip_whitespace_tokens();
6523  goto seqblck_finish;
6524  }
6525  }
6526  else if(first == ':')
6527  {
6528  // This happens for example in `- [a: b]: c` (after
6529  // terminating the seq, ie, after `]`). All other cases
6530  // (ie colon after scalars) are caught elsewhere (ie, in
6531  // RVAL state).
6532  if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags & RMAP)))
6533  {
6534  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6535  m_evt_handler->end_seq_block();
6536  goto seqblck_finish;
6537  }
6538  else
6539  {
6540  _c4err("parse error");
6541  }
6542  }
6543  else if(first == '.')
6544  {
6545  _c4dbgp("seqblck[RNXT]: maybe doc?");
6546  if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6547  {
6548  _c4dbgp("seqblck[RNXT]: end doc");
6549  _end_doc_suddenly();
6550  _line_progressed(3);
6551  _maybe_skip_whitespace_tokens();
6552  _check_doc_end_tokens();
6553  goto seqblck_finish;
6554  }
6555  else
6556  {
6557  _c4err("parse error");
6558  }
6559  }
6560  else
6561  {
6562  // may be an indentless sequence nested in a map...
6563  #ifdef RYML_DBG
6564  _print_state_stack();
6565  #endif
6566  if(m_evt_handler->m_parent
6567  && has_all(RMAP|RBLCK, m_evt_handler->m_parent)
6568  && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6569  {
6570  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6571  _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6572  _handle_indentation_pop(m_evt_handler->m_parent);
6573  _RYML_ASSERT_PARSE_(this->callbacks(), has_all(RMAP|RBLCK), m_evt_handler->m_curr->pos);
6574  m_evt_handler->add_sibling();
6575  addrem_flags(RKEY, RNXT);
6576  goto seqblck_finish;
6577  }
6578  else if(first == '\t')
6579  {
6580  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of('\t');
6581  if(pos == npos)
6582  {
6583  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6584  goto seqblck_again;
6585  }
6586  }
6587  _c4err("parse error");
6588  }
6589  }
6590 
6591  seqblck_again:
6592  _c4dbgt("seqblck: go again", 0);
6593  if(_finished_line())
6594  {
6595  m_bom_len = 0;
6596  _line_ended();
6597  _scan_line();
6598  if(_finished_file())
6599  {
6600  _c4dbgp("seqblck: finish!");
6601  _end_seq_blck();
6602  goto seqblck_finish;
6603  }
6604  _c4dbgnextline();
6605  }
6606  goto seqblck_start;
6607 
6608  seqblck_finish:
6609  _c4dbgp("seqblck: finish");
6610 }
6611 
6612 
6613 //-----------------------------------------------------------------------------
6614 
6615 template<class EventHandler>
6616 void ParseEngine<EventHandler>::_handle_map_block()
6617 {
6618 mapblck_start:
6619  _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6620 
6621  // states: RKEY -> RVAL -> RNXT
6622  // states: QMRK -> RKCL -> RVAL -> RNXT
6623  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
6624  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6625  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
6626  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
6627 
6628  _maybe_skip_comment();
6629  if(!m_evt_handler->m_curr->line_contents.rem.len)
6630  goto mapblck_again;
6631 
6632  if(has_any(RKEY))
6633  {
6634  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6635  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6636  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6637  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6638  //
6639  // handle indentation
6640  //
6641  if(m_evt_handler->m_curr->at_line_beginning())
6642  {
6643  if(m_evt_handler->m_curr->indentation_eq())
6644  {
6645  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6646  _line_progressed(m_evt_handler->m_curr->indref);
6647  if(!m_evt_handler->m_curr->line_contents.rem.len)
6648  goto mapblck_again;
6649  }
6650  else if(m_evt_handler->m_curr->indentation_lt())
6651  {
6652  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6653  _handle_indentation_pop_from_block_map();
6654  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6655  if(has_all(RMAP|RBLCK))
6656  {
6657  _c4dbgp("mapblck[RKEY]: still mapblck!");
6658  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY), m_evt_handler->m_curr->pos);
6659  if(!m_evt_handler->m_curr->line_contents.rem.len)
6660  goto mapblck_again;
6661  }
6662  else
6663  {
6664  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6665  goto mapblck_finish;
6666  }
6667  }
6668  else
6669  {
6670  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6671  _c4err("invalid indentation");
6672  }
6673  }
6674  //
6675  // now handle the tokens
6676  //
6677  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6678  const size_t startline = m_evt_handler->m_curr->pos.line;
6679  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6680  _c4dbgpf("mapblck[RKEY]: '{}'", _c4prc(first));
6681  ScannedScalar sc;
6682  if(first == '\'')
6683  {
6684  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6685  sc = _scan_scalar_squot();
6686  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6687  _handle_annotations_before_blck_key_scalar();
6688  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6689  addrem_flags(RVAL, RKEY);
6690  if(!_maybe_scan_following_colon())
6691  _c4err("could not find ':' colon after key");
6692  _handle_colon();
6693  _maybe_skip_whitespace_tokens();
6694  }
6695  else if(first == '"')
6696  {
6697  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6698  sc = _scan_scalar_dquot();
6699  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6700  _handle_annotations_before_blck_key_scalar();
6701  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6702  addrem_flags(RVAL, RKEY);
6703  if(!_maybe_scan_following_colon())
6704  _c4err("could not find ':' colon after key");
6705  _handle_colon();
6706  _maybe_skip_whitespace_tokens();
6707  }
6708  // block scalars (| and >) can not be used as keys unless they
6709  // appear in an explicit QMRK scope (ie, after the ? token),
6710  else if(C4_UNLIKELY(first == '|'))
6711  {
6712  _c4err("block map: literal keys must be enclosed in '?'");
6713  }
6714  else if(C4_UNLIKELY(first == '>'))
6715  {
6716  _c4err("block map: folded keys must be enclosed in '?'");
6717  }
6718  else if(_scan_scalar_plain_map_blck(&sc))
6719  {
6720  _c4dbgp("mapblck[RKEY]: plain scalar");
6721  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6722  _handle_annotations_before_blck_key_scalar();
6723  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6724  addrem_flags(RVAL, RKEY);
6725  if(!_maybe_scan_following_colon())
6726  _c4err("could not find ':' colon after key");
6727  _handle_colon();
6728  _maybe_skip_whitespace_tokens();
6729  }
6730  else if(first == '?')
6731  {
6732  _c4dbgp("mapblck[RKEY]: key token!");
6733  addrem_flags(QMRK, RKEY);
6734  _line_progressed(1);
6735  _maybe_skipchars(' ');
6736  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6737  {
6738  _c4dbgp("mapblck[RKEY]: seqblck starts after ?");
6739  addrem_flags(RKCL, QMRK);
6740  m_evt_handler->begin_seq_key_block();
6741  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6742  _save_indentation();
6743  _line_progressed(1);
6744  _maybe_skipchars(' ');
6745  goto mapblck_finish;
6746  }
6747  goto mapblck_again;
6748  }
6749  else if(first == ':')
6750  {
6751  _c4dbgp("mapblck[RKEY]: setting empty key");
6752  _handle_annotations_before_blck_key_scalar();
6753  m_evt_handler->set_key_scalar_plain_empty();
6754  addrem_flags(RVAL, RKEY);
6755  _line_progressed(1);
6756  _handle_colon();
6757  _maybe_skip_whitespace_tokens();
6758  }
6759  else if(first == '*')
6760  {
6761  csubstr ref = _scan_ref_map();
6762  _c4dbgpf("mapblck[RKEY]: key ref! {}", _prs(ref));
6763  _handle_keyref(ref);
6764  addrem_flags(RVAL, RKEY);
6765  if(!_maybe_scan_following_colon())
6766  _c4err("could not find ':' colon after key");
6767  _handle_colon();
6768  _maybe_skip_whitespace_tokens();
6769  }
6770  else if(first == '&')
6771  {
6772  csubstr anchor = _scan_anchor();
6773  _c4dbgpf("mapblck[RKEY]: key anchor! {}", _prs(anchor));
6774  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6775  }
6776  else if(first == '!')
6777  {
6778  csubstr tag = _scan_tag();
6779  _c4dbgpf("mapblck[RKEY]: key tag! {}", _prs(tag));
6780  _add_annotation(&m_pending_tags, tag, startindent, startline);
6781  }
6782  else if(first == '[')
6783  {
6784  // RYML's tree cannot store container keys, but that's
6785  // handled inside the tree handler. Other handlers may be
6786  // able to handle it.
6787  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6788  _handle_annotations_before_blck_key_scalar();
6789  m_evt_handler->begin_seq_key_flow();
6790  addrem_flags(RSEQ|RFLOW|RVAL, RKEY|RMAP|RBLCK);
6791  _line_progressed(1);
6792  _set_indentation(startindent);
6793  goto mapblck_finish;
6794  }
6795  else if(first == '{')
6796  {
6797  // RYML's tree cannot store container keys, but that's
6798  // handled inside the tree handler. Other handlers may be
6799  // able to handle it.
6800  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6801  _handle_annotations_before_blck_key_scalar();
6802  m_evt_handler->begin_map_key_flow();
6803  addrem_flags(RFLOW|RKEY, RBLCK);
6804  _line_progressed(1);
6805  _set_indentation(startindent);
6806  goto mapblck_finish;
6807  }
6808  else if(first == '-')
6809  {
6810  _c4dbgp("mapblck[RKEY]: maybe doc?");
6811  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6812  {
6813  _c4dbgp("mapblck[RKEY]: end+start doc");
6814  _start_doc_suddenly();
6815  _line_progressed(3);
6816  _maybe_skip_whitespace_tokens();
6817  goto mapblck_finish;
6818  }
6819  else
6820  {
6821  _c4err("parse error");
6822  }
6823  }
6824  else if(first == '.')
6825  {
6826  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6827  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6828  {
6829  _c4dbgp("mapblck[RKEY]: end doc");
6830  _end_doc_suddenly();
6831  _line_progressed(3);
6832  _maybe_skip_whitespace_tokens();
6833  _check_doc_end_tokens();
6834  goto mapblck_finish;
6835  }
6836  else
6837  {
6838  _c4err("parse error"); // LCOV_EXCL_LINE
6839  }
6840  }
6841  else
6842  {
6843  _c4err("parse error");
6844  }
6845  }
6846  else if(has_any(RVAL))
6847  {
6848  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6849  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6850  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6851  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6852  //
6853  // handle indentation
6854  //
6855  if(m_evt_handler->m_curr->at_line_beginning())
6856  {
6857  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6858  m_evt_handler->m_curr->more_indented = false;
6859  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6860  if(m_evt_handler->m_curr->indentation_eq_extra())
6861  {
6862  _c4dbgp("mapblck[RVAL]: skip indentation!");
6863  _line_progressed(m_evt_handler->m_curr->indref + 1);
6864  if(!m_evt_handler->m_curr->line_contents.rem.len)
6865  goto mapblck_again;
6866  }
6867  else if(m_evt_handler->m_curr->indentation_gt_extra())
6868  {
6869  _c4dbgp("mapblck[RVAL]: more indented!");
6870  m_evt_handler->m_curr->more_indented = true;
6871  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6872  if(!m_evt_handler->m_curr->line_contents.rem.len)
6873  goto mapblck_again; // LCOV_EXCL_LINE
6874  }
6875  else if(m_evt_handler->m_curr->indentation_lt_extra())
6876  {
6877  if(m_evt_handler->m_curr->indentation_eq())
6878  {
6879  _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6880  // watchout for indentless seqs
6881  if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6882  {
6883  _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6884  _handle_annotations_before_blck_val_scalar();
6885  m_evt_handler->set_val_scalar_plain_empty();
6886  addrem_flags(RNXT, RVAL);
6887  goto mapblck_again;
6888  }
6889  }
6890  else
6891  {
6892  _c4dbgp("mapblck[RVAL]: smaller indentation than RKEY!");
6893  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6894  _handle_indentation_pop_from_block_map();
6895  if(has_all(RMAP|RBLCK))
6896  {
6897  _c4dbgp("mapblck[RVAL]: still mapblck!");
6898  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6899  if(has_any(RNXT))
6900  {
6901  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6902  m_evt_handler->add_sibling();
6903  addrem_flags(RKEY, RNXT);
6904  }
6905  goto mapblck_again;
6906  }
6907  else
6908  {
6909  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6910  goto mapblck_finish;
6911  }
6912  }
6913  }
6914  }
6915  const size_t startcol = _handle_block_skip_leading_whitespace();
6916  if(startcol == npos)
6917  {
6918  _c4dbgp("mapblck[RVAL]: whitespace only");
6919  goto mapblck_again; // LCOV_EXCL_LINE
6920  }
6921  const size_t tabmark = _handle_block_get_whitespace_mark();
6922  //
6923  // now handle the tokens
6924  //
6925  _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6926  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6927  const size_t startline = m_evt_handler->m_curr->pos.line;
6928  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6929  _c4dbgpf("mapblck[RVAL]: '{}'", _c4prc(first));
6930  ScannedScalar sc;
6931  if(first == '\'')
6932  {
6933  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6934  sc = _scan_scalar_squot();
6935  if(!_maybe_scan_following_colon())
6936  {
6937  _c4dbgp("mapblck[RVAL]: set as val");
6938  _handle_annotations_before_blck_val_scalar();
6939  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6940  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6941  addrem_flags(RNXT, RVAL);
6942  }
6943  else
6944  {
6945  _c4assert(m_evt_handler->m_curr->indref != npos);
6946  _c4assert(startindent > m_evt_handler->m_curr->indref);
6947  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6948  _handle_block_check_leading_tabs(startcol);
6949  _handle_annotations_before_start_mapblck(startline);
6950  addrem_flags(RNXT, RVAL);
6951  _handle_colon();
6952  m_evt_handler->begin_map_val_block();
6953  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6954  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6955  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6956  _maybe_skip_whitespace_tokens();
6957  // keep the child state on RVAL
6958  addrem_flags(RVAL, RNXT);
6959  }
6960  }
6961  else if(first == '"')
6962  {
6963  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6964  sc = _scan_scalar_dquot();
6965  if(!_maybe_scan_following_colon())
6966  {
6967  _c4dbgp("mapblck[RVAL]: set as val");
6968  _handle_annotations_before_blck_val_scalar();
6969  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6970  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6971  addrem_flags(RNXT, RVAL);
6972  }
6973  else
6974  {
6975  _c4assert(m_evt_handler->m_curr->indref != npos);
6976  _c4assert(startindent > m_evt_handler->m_curr->indref);
6977  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6978  _handle_block_check_leading_tabs(startcol);
6979  _handle_annotations_before_start_mapblck(startline);
6980  addrem_flags(RNXT, RVAL);
6981  _handle_colon();
6982  m_evt_handler->begin_map_val_block();
6983  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6984  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6985  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6986  _maybe_skip_whitespace_tokens();
6987  // keep the child state on RVAL
6988  addrem_flags(RVAL, RNXT);
6989  }
6990  }
6991  // block scalars can only appear as keys when in QMRK scope
6992  // (ie, after ? tokens), so no need to scan following colon
6993  else if(first == '|')
6994  {
6995  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6996  ScannedBlock sb;
6997  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6998  _handle_annotations_before_blck_val_scalar();
6999  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7000  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7001  addrem_flags(RNXT, RVAL);
7002  }
7003  else if(first == '>')
7004  {
7005  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
7006  ScannedBlock sb;
7007  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7008  _handle_annotations_before_blck_val_scalar();
7009  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7010  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7011  addrem_flags(RNXT, RVAL);
7012  }
7013  else if(_scan_scalar_plain_map_blck(&sc))
7014  {
7015  _c4dbgp("mapblck[RVAL]: plain scalar.");
7016  if(!_maybe_scan_following_colon())
7017  {
7018  _c4dbgp("mapblck[RVAL]: set as val");
7019  _handle_annotations_before_blck_val_scalar();
7020  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
7021  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7022  addrem_flags(RNXT, RVAL);
7023  }
7024  else
7025  {
7026  _c4assert(m_evt_handler->m_curr->indref != npos);
7027  _c4assert(startindent > m_evt_handler->m_curr->indref);
7028  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7029  _handle_block_check_leading_tabs(startcol, tabmark);
7030  addrem_flags(RNXT, RVAL);
7031  _handle_annotations_before_start_mapblck(startline);
7032  _handle_colon();
7033  m_evt_handler->begin_map_val_block();
7034  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7035  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7036  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7037  _maybe_skip_whitespace_tokens();
7038  // keep the child state on RVAL
7039  addrem_flags(RVAL, RNXT);
7040  }
7041  }
7042  else if(first == '-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7043  {
7044  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7045  _c4err("parse error");
7046  _c4dbgp("mapblck[RVAL]: start val seqblck");
7047  _handle_block_check_leading_tabs(startcol);
7048  addrem_flags(RNXT, RVAL);
7049  _handle_annotations_before_blck_val_scalar();
7050  m_evt_handler->begin_seq_val_block();
7051  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7052  _set_indentation(startindent);
7053  _line_progressed(1);
7054  _maybe_skip_whitespace_tokens();
7055  goto mapblck_finish;
7056  }
7057  else if(first == '[')
7058  {
7059  _c4dbgp("mapblck[RVAL]: start val seqflow");
7060  addrem_flags(RNXT, RVAL);
7061  _handle_annotations_before_blck_val_scalar();
7062  m_evt_handler->begin_seq_val_flow();
7063  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
7064  _set_indentation(m_evt_handler->m_parent->indref + 1u);
7065  _line_progressed(1);
7066  goto mapblck_finish;
7067  }
7068  else if(first == '{')
7069  {
7070  _c4dbgp("mapblck[RVAL]: start val mapflow");
7071  addrem_flags(RNXT, RVAL);
7072  _handle_annotations_before_blck_val_scalar();
7073  m_evt_handler->begin_map_val_flow();
7074  addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
7075  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7076  _set_indentation(m_evt_handler->m_parent->indref + 1u);
7077  _line_progressed(1);
7078  goto mapblck_finish;
7079  }
7080  else if(first == '*')
7081  {
7082  csubstr ref = _scan_ref_map();
7083  _c4dbgpf("mapblck[RVAL]: ref! {}", _prs(ref));
7084  if(_maybe_scan_following_colon())
7085  {
7086  _c4dbgp("mapblck[RVAL]: start child map, block");
7087  addrem_flags(RNXT, RVAL);
7088  _handle_annotations_before_blck_val_scalar();
7089  m_evt_handler->begin_map_val_block();
7090  _handle_keyref(ref);
7091  _set_indentation(startindent);
7092  // keep going in RVAL
7093  addrem_flags(RVAL, RNXT);
7094  }
7095  else
7096  {
7097  _c4dbgp("mapblck[RVAL]: was val ref");
7098  _handle_valref(ref);
7099  addrem_flags(RNXT, RVAL);
7100  }
7101  _maybe_skip_whitespace_tokens();
7102  }
7103  else if(first == '&')
7104  {
7105  csubstr anchor = _scan_anchor();
7106  _c4dbgpf("mapblck[RVAL]: anchor! {}", _prs(anchor));
7107  // we need to buffer the anchors, as there may be two
7108  // consecutive anchors in here
7109  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7110  }
7111  else if(first == '!')
7112  {
7113  csubstr tag = _scan_tag();
7114  _c4dbgpf("mapblck[RVAL]: tag! {}", _prs(tag));
7115  // we need to buffer the tags, as there may be two
7116  // consecutive tags in here
7117  _add_annotation(&m_pending_tags, tag, startindent, startline);
7118  }
7119  else if(first == '?')
7120  {
7121  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7122  _c4err("parse error");
7123  _c4dbgp("mapblck[RVAL]: start val mapblck");
7124  addrem_flags(RNXT, RVAL);
7125  _handle_annotations_before_blck_val_scalar();
7126  m_evt_handler->begin_map_val_block();
7127  addrem_flags(QMRK, RNXT);
7128  _set_indentation(startindent);
7129  _line_progressed(1);
7130  _maybe_skipchars(' ');
7131  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7132  {
7133  _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7134  addrem_flags(RKCL, QMRK);
7135  m_evt_handler->begin_seq_key_block();
7136  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7137  _save_indentation();
7138  _line_progressed(1);
7139  _maybe_skipchars(' ');
7140  goto mapblck_finish;
7141  }
7142  goto mapblck_again;
7143  }
7144  else if(first == ':')
7145  {
7146  _c4dbgp("mapblck[RVAL]: start val mapblck");
7147  addrem_flags(RNXT, RVAL);
7148  _handle_annotations_before_start_mapblck(startline);
7149  _handle_colon();
7150  m_evt_handler->begin_map_val_block();
7151  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7152  m_evt_handler->set_key_scalar_plain_empty();
7153  // keep the child state on RVAL
7154  addrem_flags(RVAL, RNXT);
7155  _line_progressed(1);
7156  _maybe_skip_whitespace_tokens();
7157  goto mapblck_again;
7158  }
7159  else
7160  {
7161  _c4err("parse error"); // LCOV_EXCL_LINE
7162  }
7163  }
7164  else if(has_any(RNXT))
7165  {
7166  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7167  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7168  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7169  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7170  //
7171  // handle indentation
7172  //
7173  if(m_evt_handler->m_curr->at_line_beginning())
7174  {
7175  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7176  if(m_evt_handler->m_curr->indentation_eq())
7177  {
7178  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7179  _line_progressed(m_evt_handler->m_curr->indref);
7180  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7181  m_evt_handler->add_sibling();
7182  addrem_flags(RKEY, RNXT);
7183  goto mapblck_again;
7184  }
7185  else if(m_evt_handler->m_curr->indentation_lt())
7186  {
7187  _c4dbgp("mapblck[RNXT]: smaller indentation!");
7188  _handle_indentation_pop_from_block_map();
7189  if(has_all(RMAP|RBLCK))
7190  {
7191  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7192  if(!has_any(RKCL))
7193  {
7194  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7195  m_evt_handler->add_sibling();
7196  addrem_flags(RKEY, RNXT);
7197  }
7198  goto mapblck_again;
7199  }
7200  else
7201  {
7202  goto mapblck_finish;
7203  }
7204  }
7205  }
7206  else
7207  {
7208  _c4dbgp("mapblck[RNXT]: NOT at line begin");
7209  if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
7210  {
7211  _c4err("parse error");
7212  }
7213  else
7214  {
7215  _skipchars(" \t");
7216  if(!m_evt_handler->m_curr->line_contents.rem.len)
7217  {
7218  _c4dbgp("seqblck[RNXT]: again");
7219  goto mapblck_again; // LCOV_EXCL_LINE
7220  }
7221  }
7222  }
7223  //
7224  // handle tokens
7225  //
7226  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7227  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7228  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
7229  if(first == ' ')
7230  {
7231  _c4dbgp("mapblck[RNXT]: skip spaces");
7232  _maybe_skip_whitespace_tokens();
7233  }
7234  else
7235  {
7236  _c4err("parse error");
7237  }
7238  }
7239  else if(has_any(QMRK))
7240  {
7241  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7242  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7243  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7244  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7245  if(_handle_map_block_qmrk())
7246  goto mapblck_again;
7247  else
7248  goto mapblck_finish;
7249  }
7250  else if(has_any(RKCL)) // read the key colon (after QMRK)
7251  {
7252  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7253  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7254  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7255  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7256  if(_handle_map_block_rkcl())
7257  goto mapblck_again;
7258  else
7259  goto mapblck_finish;
7260  }
7261 
7262  mapblck_again:
7263  _c4dbgt("mapblck: again", 0);
7264  if(_finished_line())
7265  {
7266  _line_ended();
7267  _scan_line();
7268  if(_finished_file())
7269  {
7270  _c4dbgp("mapblck: file finished!");
7271  _end_map_blck();
7272  goto mapblck_finish;
7273  }
7274  _c4dbgnextline();
7275  }
7276  goto mapblck_start;
7277 
7278  mapblck_finish:
7279  _c4dbgp("mapblck: finish");
7280 }
7281 
7282 
7283 //-----------------------------------------------------------------------------
7284 
7285 // return true if we should remain in map_block
7286 template<class EventHandler>
7287 bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7288 {
7289  //
7290  // handle indentation
7291  //
7292  if(m_evt_handler->m_curr->at_line_beginning())
7293  {
7294  _c4dbgpf("mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7295  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos, m_evt_handler->m_curr->pos);
7296  if(m_evt_handler->m_curr->indentation_eq_extra())
7297  {
7298  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7299  _line_progressed(m_evt_handler->m_curr->indref + 1);
7300  if(!m_evt_handler->m_curr->line_contents.rem.len)
7301  return true; // go again
7302  }
7303  // indentation can be larger in QMRK state
7304  else if(m_evt_handler->m_curr->indentation_gt_extra())
7305  {
7306  _c4dbgp("mapblck[QMRK]: larger indentation !");
7307  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7308  if(!m_evt_handler->m_curr->line_contents.rem.len)
7309  return true; // go again
7310  }
7311  else
7312  {
7313  _c4dbgp("mapblck[QMRK]: smaller indentation!");
7314  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7315  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7316  if(m_evt_handler->m_curr->indentation_eq()
7317  // defend against docs or indentless seqs
7318  && m_evt_handler->m_curr->line_contents.rem.str[0] != '-')
7319  {
7320  _c4dbgp("mapblck[QMRK]: QMRK finished!");
7321  _handle_annotations_before_blck_key_scalar();
7322  m_evt_handler->set_key_scalar_plain_empty();
7323  addrem_flags(RKCL, QMRK);
7324  return true; // go again
7325  }
7326  else if(m_evt_handler->m_curr->indentation_lt())
7327  {
7328  _c4dbgp("mapblck[QMRK]: indentation pop!");
7329  _handle_indentation_pop_from_block_map();
7330  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7331  if(has_all(RMAP|RBLCK))
7332  {
7333  _c4dbgp("mapblck[QMRK]: still mapblck!");
7334  return true; // go again
7335  }
7336  else
7337  {
7338  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7339  return false; // finish mapblck
7340  }
7341  }
7342  }
7343  }
7344  //
7345  // now handle the tokens
7346  //
7347  _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7348  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7349  const size_t startline = m_evt_handler->m_curr->pos.line;
7350  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7351  _c4dbgpf("mapblck[QMRK]: '{}'", first);
7352  ScannedScalar sc;
7353  if(first == '\'')
7354  {
7355  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7356  sc = _scan_scalar_squot();
7357  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7358  addrem_flags(RKCL, QMRK);
7359  if(!_maybe_scan_following_colon())
7360  {
7361  _c4dbgp("mapblck[QMRK]: set as key");
7362  _handle_annotations_before_blck_key_scalar();
7363  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7364  }
7365  else
7366  {
7367  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7368  _handle_annotations_before_start_mapblck_as_key();
7369  m_evt_handler->begin_map_key_block();
7370  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7371  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7372  _maybe_skip_whitespace_tokens();
7373  _set_indentation(startindent);
7374  // keep the child state on RVAL
7375  addrem_flags(RVAL, RKCL);
7376  }
7377  }
7378  else if(first == '"')
7379  {
7380  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7381  sc = _scan_scalar_dquot();
7382  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7383  addrem_flags(RKCL, QMRK);
7384  if(!_maybe_scan_following_colon())
7385  {
7386  _c4dbgp("mapblck[QMRK]: set as key");
7387  _handle_annotations_before_blck_key_scalar();
7388  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7389  }
7390  else
7391  {
7392  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7393  _handle_annotations_before_start_mapblck_as_key();
7394  m_evt_handler->begin_map_key_block();
7395  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7396  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7397  _maybe_skip_whitespace_tokens();
7398  _set_indentation(startindent);
7399  // keep the child state on RVAL
7400  addrem_flags(RVAL, RKCL);
7401  }
7402  }
7403  else if(first == '|')
7404  {
7405  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7406  ScannedBlock sb;
7407  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7408  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7409  _handle_annotations_before_blck_key_scalar();
7410  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7411  addrem_flags(RKCL, QMRK);
7412  }
7413  else if(first == '>')
7414  {
7415  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7416  ScannedBlock sb;
7417  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7418  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7419  _handle_annotations_before_blck_key_scalar();
7420  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7421  addrem_flags(RKCL, QMRK);
7422  }
7423  else if(_scan_scalar_plain_map_blck(&sc))
7424  {
7425  _c4dbgp("mapblck[QMRK]: plain scalar");
7426  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7427  addrem_flags(RKCL, QMRK);
7428  if(!_maybe_scan_following_colon())
7429  {
7430  _c4dbgp("mapblck[QMRK]: set as key");
7431  _handle_annotations_before_blck_key_scalar();
7432  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7433  }
7434  else
7435  {
7436  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7437  _handle_annotations_before_start_mapblck_as_key();
7438  m_evt_handler->begin_map_key_block();
7439  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7440  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7441  _maybe_skip_whitespace_tokens();
7442  _set_indentation(startindent);
7443  // keep the child state on RVAL
7444  addrem_flags(RVAL, RKCL);
7445  }
7446  }
7447  else if(first == ':')
7448  {
7449  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7450  addrem_flags(RKCL, QMRK);
7451  _handle_annotations_before_start_mapblck_as_key();
7452  m_evt_handler->begin_map_key_block();
7453  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7454  m_evt_handler->set_key_scalar_plain_empty();
7455  _line_progressed(1);
7456  _maybe_skip_whitespace_tokens();
7457  _set_indentation(startindent);
7458  // keep the child state on RVAL
7459  addrem_flags(RVAL, RKCL);
7460  }
7461  else if(first == '*')
7462  {
7463  csubstr ref = _scan_ref_map();
7464  _c4dbgpf("mapblck[QMRK]: key ref! {}", _prs(ref));
7465  addrem_flags(RKCL, QMRK);
7466  if(!_maybe_scan_following_colon())
7467  {
7468  _c4dbgp("mapblck[QMRK]: set ref as key");
7469  _handle_keyref(ref);
7470  }
7471  else
7472  {
7473  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7474  _handle_annotations_before_start_mapblck_as_key();
7475  m_evt_handler->begin_map_key_block();
7476  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7477  _handle_keyref(ref);
7478  _set_indentation(startindent);
7479  // keep the child state on RVAL
7480  addrem_flags(RVAL, RKCL|QMRK);
7481  }
7482  _maybe_skip_whitespace_tokens();
7483  }
7484  else if(first == '&')
7485  {
7486  csubstr anchor = _scan_anchor();
7487  _c4dbgpf("mapblck[QMRK]: key anchor! {}", _prs(anchor));
7488  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7489  }
7490  else if(first == '!')
7491  {
7492  csubstr tag = _scan_tag();
7493  _c4dbgpf("mapblck[QMRK]: key tag! {}", _prs(tag));
7494  _add_annotation(&m_pending_tags, tag, startindent, startline);
7495  }
7496  else if(first == '-')
7497  {
7498  _c4dbgp("mapblck[QMRK]: maybe seq or doc?");
7499  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7500  {
7501  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7502  addrem_flags(RKCL, QMRK);
7503  _handle_annotations_before_blck_key_scalar();
7504  m_evt_handler->begin_seq_key_block();
7505  addrem_flags(RVAL|RSEQ, RMAP|RKCL);
7506  _set_indentation(startindent);
7507  _line_progressed(1);
7508  }
7509  else
7510  {
7511  _c4dbgp("mapblck[QMRK]: end+start doc");
7512  _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7513  _start_doc_suddenly();
7514  _line_progressed(3);
7515  }
7516  _maybe_skip_whitespace_tokens();
7517  return false; // finish mapblck
7518  }
7519  else if(first == '[')
7520  {
7521  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7522  addrem_flags(RKCL, QMRK);
7523  _handle_annotations_before_blck_key_scalar();
7524  m_evt_handler->begin_seq_key_flow();
7525  addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|RBLCK);
7526  _set_indentation(m_evt_handler->m_parent->indref + 1);
7527  _line_progressed(1);
7528  return false; // finish mapblck
7529  }
7530  else if(first == '{')
7531  {
7532  _c4dbgp("mapblck[QMRK]: start child mapflow (!)");
7533  addrem_flags(RKCL, QMRK);
7534  _handle_annotations_before_blck_key_scalar();
7535  m_evt_handler->begin_map_key_flow();
7536  addrem_flags(RKEY|RFLOW, RVAL|RKCL|RBLCK);
7537  _set_indentation(m_evt_handler->m_parent->indref + 1);
7538  _line_progressed(1);
7539  return false; // finish mapblck
7540  }
7541  else if(first == '?')
7542  {
7543  _c4dbgpf("mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7544  _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7545  _c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7546  addrem_flags(RKCL, QMRK);
7547  _handle_annotations_before_blck_key_scalar();
7548  m_evt_handler->begin_map_key_block();
7549  addrem_flags(QMRK, RKCL);
7550  _set_indentation(startindent);
7551  // indentation_lt() should be handled elsewhere
7552  _line_progressed(1);
7553  _maybe_skipchars(' ');
7554  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7555  {
7556  _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7557  addrem_flags(RKCL, QMRK);
7558  m_evt_handler->begin_seq_key_block();
7559  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7560  _save_indentation();
7561  _line_progressed(1);
7562  _maybe_skipchars(' ');
7563  return false;
7564  }
7565  }
7566  else
7567  {
7568  _c4err("parse error");
7569  }
7570  return true; // continue in mapblck
7571 }
7572 
7573 
7574 //-----------------------------------------------------------------------------
7575 
7576 // return true if we should remain in map_block
7577 template<class EventHandler>
7578 bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7579 {
7580  //
7581  // handle indentation
7582  //
7583  if(m_evt_handler->m_curr->at_line_beginning())
7584  {
7585  if(m_evt_handler->m_curr->indentation_eq())
7586  {
7587  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7588  _line_progressed(m_evt_handler->m_curr->indref);
7589  if(!m_evt_handler->m_curr->line_contents.rem.len)
7590  return true; // continue in mapblck
7591  }
7592  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7593  {
7594  _c4err("invalid indentation");
7595  }
7596  }
7597  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7598  _c4dbgpf("mapblck[RKCL]: '{}'", first);
7599  if(first == ':')
7600  {
7601  _c4dbgp("mapblck[RKCL]: found the colon");
7602  _line_progressed(1);
7603  _maybe_skipchars(' ');
7604  #if defined(__GNUC__) && (__GNUC__ >= 12) \
7605  && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))
7606  C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7607  #endif
7608  // sequence is valid after the RKCL ':'
7609  if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7610  {
7611  addrem_flags(RVAL, RKCL);
7612  return true; // continue in mapblck
7613  }
7614  else
7615  {
7616  _c4dbgp("mapblck[RKCL]: start val seqblck");
7617  addrem_flags(RNXT, RKCL);
7618  m_evt_handler->begin_seq_val_block();
7619  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7620  _save_indentation();
7621  _line_progressed(1);
7622  _maybe_skipchars(' ');
7623  return false; // finish mapblck
7624  }
7625  }
7626  else if(first == '?')
7627  {
7628  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
7629  m_evt_handler->set_val_scalar_plain_empty();
7630  m_evt_handler->add_sibling();
7631  addrem_flags(QMRK, RKCL);
7632  _line_progressed(1);
7633  _maybe_skipchars(' ');
7634  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7635  {
7636  _c4dbgp("mapblck[RKCL]: seqblck starts after ?");
7637  addrem_flags(RKCL, QMRK);
7638  m_evt_handler->begin_seq_key_block();
7639  addrem_flags(RSEQ|RVAL, RMAP|QMRK);
7640  _save_indentation();
7641  _line_progressed(1);
7642  _maybe_skipchars(' ');
7643  return false;
7644  }
7645  }
7646  else if(first == '-')
7647  {
7648  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7649  {
7650  _c4dbgp("mapblck[RKCL]: end+start doc");
7651  _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7652  _start_doc_suddenly();
7653  _line_progressed(3);
7654  _maybe_skip_whitespace_tokens();
7655  return false; // finish mapblck
7656  }
7657  else
7658  {
7659  _c4err("parse error"); // LCOV_EXCL_LINE
7660  }
7661  }
7662  else if(first == '.')
7663  {
7664  _c4dbgp("mapblck[RKCL]: maybe end doc?");
7665  csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7666  if(rs == ".." || rs.begins_with(".. "))
7667  {
7668  _c4dbgp("mapblck[RKCL]: end+start doc");
7669  _end_doc_suddenly();
7670  _line_progressed(3);
7671  _maybe_skip_whitespace_tokens();
7672  _check_doc_end_tokens();
7673  return false; // finish mapblck
7674  }
7675  else
7676  {
7677  _c4err("parse error"); // LCOV_EXCL_LINE
7678  }
7679  }
7680  else/* if(m_was_inside_qmrk) */
7681  {
7682  _c4dbgp("mapblck[RKCL]: missing :");
7683  if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7684  _c4err("parse error"); // LCOV_EXCL_LINE
7685  m_evt_handler->set_val_scalar_plain_empty();
7686  m_evt_handler->add_sibling();
7687  addrem_flags(RKEY, RKCL);
7688  }
7689  return true;
7690 }
7691 
7692 
7693 //-----------------------------------------------------------------------------
7694 
7695 template<class EventHandler>
7696 void ParseEngine<EventHandler>::_handle_unk_json()
7697 {
7698  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7699 
7700  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7701  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7702 
7703  _maybe_skip_comment();
7704  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7705  if(!rem.len)
7706  return;
7707 
7708  size_t pos = rem.first_not_of(" \t");
7709  if(pos)
7710  {
7711  pos = pos != npos ? pos : rem.len;
7712  _c4dbgpf("skipping indentation of {}", pos);
7713  _line_progressed(pos);
7714  rem = m_evt_handler->m_curr->line_contents.rem;
7715  if(!rem.len)
7716  return;
7717  _c4dbgpf("rem is now {}", _prs(rem));
7718  }
7719 
7720  if(rem.begins_with('['))
7721  {
7722  _c4dbgp("it's a seq");
7723  _check_trailing_doc_token();
7724  _maybe_begin_doc();
7725  m_evt_handler->begin_seq_val_flow();
7726  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7727  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7728  m_doc_empty = false;
7729  _line_progressed(1);
7730  }
7731  else if(rem.begins_with('{'))
7732  {
7733  _c4dbgp("it's a map");
7734  _check_trailing_doc_token();
7735  _maybe_begin_doc();
7736  m_evt_handler->begin_map_val_flow();
7737  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7738  m_doc_empty = false;
7739  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7740  _line_progressed(1);
7741  }
7742  else if(_handle_bom())
7743  {
7744  _c4dbgp("byte order mark");
7745  }
7746  else
7747  {
7748  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
7749  _maybe_skip_whitespace_tokens();
7750  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7751  if(!s.len)
7752  return;
7753  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7754  const char first = s.str[0];
7755  ScannedScalar sc;
7756  if(first == '"')
7757  {
7758  _c4dbgp("runk_json: scanning double-quoted scalar");
7759  _check_trailing_doc_token();
7760  _maybe_begin_doc();
7761  add_flags(RDOC);
7762  m_doc_empty = false;
7763  sc = _scan_scalar_dquot();
7764  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7765  if(!_maybe_scan_following_colon())
7766  {
7767  _c4dbgp("runk_json: set as val");
7768  _handle_annotations_before_blck_val_scalar();
7769  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7770  }
7771  else
7772  {
7773  _c4err("parse error");
7774  }
7775  }
7776  else if(_scan_scalar_plain_unk(&sc))
7777  {
7778  _c4dbgp("runk_json: got a plain scalar");
7779  _check_trailing_doc_token();
7780  _maybe_begin_doc();
7781  add_flags(RDOC);
7782  m_doc_empty = false;
7783  if(!_maybe_scan_following_colon())
7784  {
7785  _c4dbgp("runk_json: set as val");
7786  _handle_annotations_before_blck_val_scalar();
7787  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7788  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7789  }
7790  else
7791  {
7792  _c4err("parse error"); // LCOV_EXCL_LINE
7793  }
7794  }
7795  else
7796  {
7797  _c4err("parse error"); // LCOV_EXCL_LINE
7798  }
7799  }
7800 }
7801 
7802 
7803 //-----------------------------------------------------------------------------
7804 
7805 template<class EventHandler>
7806 void ParseEngine<EventHandler>::_handle_unk()
7807 {
7808  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7809 
7810  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7811  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7812 
7813  _maybe_skipchars(' ');
7814  _maybe_skip_comment();
7815 
7816  if(!m_evt_handler->m_curr->line_contents.rem.len)
7817  return;
7818 
7819  _c4dbgpf("runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7820 
7821  if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7822  {
7823  _c4dbgpf("runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7824  _c4dbgp("runk: check BOM");
7825  if(_handle_bom())
7826  {
7827  m_bom_line = m_evt_handler->m_curr->pos.line;
7828  _c4dbgpf("runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7829  return;
7830  }
7831  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7832  _c4dbgpf("runk: rtop: first={}", _c4prc(first));
7833  if(first == '-')
7834  {
7835  _c4dbgp("runk: rtop: suspecting doc");
7836  if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7837  {
7838  _c4dbgp("runk: rtop: begin doc");
7839  _maybe_end_doc();
7840  _begin2_doc_expl();
7841  _set_indentation(0);
7842  addrem_flags(RDOC|RUNK, NDOC);
7843  _line_progressed(3u);
7844  _maybe_skip_whitespace_tokens();
7845  return;
7846  }
7847  }
7848  else if(first == '.')
7849  {
7850  _c4dbgp("runk: rtop: suspecting doc end");
7851  if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7852  {
7853  _c4dbgp("runk: rtop: end doc");
7854  if(has_any(RDOC))
7855  {
7856  _end2_doc_expl();
7857  }
7858  else
7859  {
7860  _c4dbgp("runk: rtop: ignore end doc");
7861  }
7862  addrem_flags(NDOC|RUNK, RDOC);
7863  _line_progressed(3u);
7864  _maybe_skip_whitespace_tokens();
7865  _check_doc_end_tokens();
7866  return;
7867  }
7868  }
7869  else if(first == '%')
7870  {
7871  _c4dbgpf("directive: {}", m_evt_handler->m_curr->line_contents.rem);
7872  if(C4_UNLIKELY(has_any(RDOC) || (!m_doc_empty && has_none(NDOC))))
7873  _c4err("need document footer before directives");
7874  _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7875  return;
7876  }
7877  }
7878 
7879  /* no else-if! */
7880 
7881  size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7882  size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7883  if(m_bom_len)
7884  {
7885  _c4dbgpf("runk: prev BOMlen={}", m_bom_len);
7886  if(m_evt_handler->m_curr->pos.line == m_bom_line)
7887  {
7888  _c4dbgpf("runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7889  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7890  remindent -= m_bom_len;
7891  }
7892  else
7893  {
7894  m_bom_len = 0;
7895  }
7896  }
7897 
7898  size_t startcol = _handle_block_skip_leading_whitespace();
7899  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7900 
7901  if(first == '[')
7902  {
7903  _c4dbgp("runk: flow seq?");
7904  _handle_unk_begin_doc();
7905  if(C4_LIKELY( ! _annotations_require_key_container()))
7906  {
7907  _c4dbgp("runk: it's a seq, flow");
7908  _handle_annotations_before_blck_val_scalar();
7909  m_evt_handler->begin_seq_val_flow();
7910  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7911  _set_indentation(0);
7912  }
7913  else
7914  {
7915  _c4dbgp("runk: start new block map, set flow seq as key (!)");
7916  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7917  m_evt_handler->begin_map_val_block();
7918  addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7919  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7920  m_evt_handler->begin_seq_key_flow();
7921  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKEY);
7922  _set_indentation(0);
7923  }
7924  _line_progressed(1);
7925  }
7926  else if(first == '{')
7927  {
7928  _c4dbgp("runk: flow map?");
7929  _handle_unk_begin_doc();
7930  if(C4_LIKELY( ! _annotations_require_key_container()))
7931  {
7932  _c4dbgp("runk: it's a map, flow");
7933  _handle_annotations_before_blck_val_scalar();
7934  m_evt_handler->begin_map_val_flow();
7935  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7936  _set_indentation(0);
7937  }
7938  else
7939  {
7940  _c4dbgp("runk: start new block map, set flow map as key (!)");
7941  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7942  m_evt_handler->begin_map_val_block();
7943  addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7944  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7945  m_evt_handler->begin_map_key_flow();
7946  addrem_flags(RMAP|RFLOW, RBLCK);
7947  _set_indentation(0);
7948  }
7949  _line_progressed(1);
7950  }
7951  else if(first == '-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7952  {
7953  _c4dbgp("runk: it's a seq, block");
7954  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7955  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7956  _handle_unk_begin_doc();
7957  _handle_annotations_before_blck_val_scalar();
7958  m_evt_handler->begin_seq_val_block();
7959  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7960  _set_indentation(startindent);
7961  _line_progressed(1);
7962  _maybe_skipchars(' ');
7963  }
7964  else if(first == '?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7965  {
7966  _c4dbgp("runk: it's a map + this key is complex");
7967  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7968  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7969  _handle_block_check_leading_tabs(startcol);
7970  _handle_unk_begin_doc();
7971  _handle_annotations_before_blck_val_scalar();
7972  m_evt_handler->begin_map_val_block();
7973  addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
7974  _set_indentation(startindent);
7975  _line_progressed(1);
7976  _maybe_skipchars(' ');
7977  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7978  {
7979  _c4dbgp("runk: seqblck key starts after ?");
7980  addrem_flags(RKCL, QMRK);
7981  m_evt_handler->begin_seq_key_block();
7982  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7983  _save_indentation();
7984  _line_progressed(1);
7985  _maybe_skipchars(' ');
7986  }
7987  }
7988  else if(first == ':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7989  {
7990  if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7991  {
7992  _c4dbgp("runk: it's a map with an empty key");
7993  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7994  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7995  _handle_block_check_leading_tabs(startcol);
7996  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7997  _handle_unk_begin_doc();
7998  _handle_annotations_before_start_mapblck(startline);
7999  _handle_colon();
8000  m_evt_handler->begin_map_val_block();
8001  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8002  m_evt_handler->set_key_scalar_plain_empty();
8003  _set_indentation(startindent);
8004  }
8005  else
8006  {
8007  _c4err("block colon cannot occur on a new line unless ? is used");
8008  }
8009  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8010  _line_progressed(1);
8011  _maybe_skip_whitespace_tokens();
8012  }
8013  else if(first == '&')
8014  {
8015  csubstr anchor = _scan_anchor();
8016  _c4dbgpf("anchor! {}", _prs(anchor));
8017  const size_t line = m_evt_handler->m_curr->pos.line;
8018  _handle_unk_begin_doc();
8019  _add_annotation(&m_pending_anchors, anchor, remindent, line);
8020  _set_indentation(0);
8021  }
8022  else if(first == '*')
8023  {
8024  csubstr ref = _scan_ref_map();
8025  _c4dbgpf("runk: ref! {}", _prs(ref));
8026  _handle_unk_begin_doc();
8027  if(!_maybe_scan_following_colon())
8028  {
8029  _c4dbgp("runk: set val ref");
8030  _handle_valref(ref);
8031  }
8032  else
8033  {
8034  _c4dbgp("runk: start new block map, set ref as key");
8035  _handle_block_check_leading_tabs(startcol);
8036  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8037  _handle_annotations_before_start_mapblck(startline);
8038  m_evt_handler->begin_map_val_block();
8039  _handle_keyref(ref);
8040  _maybe_skip_whitespace_tokens();
8041  _set_indentation(0);
8042  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8043  }
8044  }
8045  else if(first == '!')
8046  {
8047  csubstr tag_orig;
8048  csubstr tag = _scan_tag(&tag_orig);
8049  _c4dbgpf("runk: val tag! {}", _prs(tag));
8050  // we need to buffer the tags, as there may be two
8051  // consecutive tags in here
8052  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8053  const size_t line = m_evt_handler->m_curr->pos.line;
8054  _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8055  }
8056  else
8057  {
8058  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8059  const size_t startscalar = _handle_block_get_whitespace_mark();
8060  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8061  auto beginmap = [&](size_t startindent_){
8062  if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8063  _c4err("multiline scalars cannot be used as implicit keys");
8064  _handle_block_check_leading_tabs(startcol, startscalar);
8065  _handle_annotations_before_start_mapblck(startline);
8066  _handle_colon();
8067  m_evt_handler->begin_map_val_block();
8068  _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8069  };
8070  auto after_beginmap = [&](size_t startindent_){
8071  _maybe_skip_whitespace_tokens();
8072  _set_indentation(startindent_);
8073  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8074  };
8075  if(first == '|')
8076  {
8077  _c4dbgp("runk: block-literal scalar");
8078  _handle_unk_begin_doc();
8079  ScannedBlock sb;
8080  _scan_block(&sb, startindent);
8081  _handle_annotations_before_blck_val_scalar();
8082  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8083  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8084  }
8085  else if(first == '>')
8086  {
8087  _c4dbgp("runk: block-folded scalar");
8088  _handle_unk_begin_doc();
8089  ScannedBlock sb;
8090  _scan_block(&sb, startindent);
8091  _handle_annotations_before_blck_val_scalar();
8092  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8093  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8094  }
8095  else if(first == '\'')
8096  {
8097  _c4dbgp("runk: single-quoted scalar");
8098  _handle_unk_begin_doc();
8099  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8100  size_t col = m_evt_handler->m_curr->pos.col;
8101  ScannedScalar sc = _scan_scalar_squot();
8102  if(!_maybe_scan_following_colon())
8103  {
8104  _c4dbgp("runk: set as val");
8105  _handle_annotations_before_blck_val_scalar();
8106  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8107  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8108  }
8109  else
8110  {
8111  _c4dbgp("runk: start new block map, set single-quoted scalar as key");
8112  if(!firsttoken)
8113  startindent = _handle_unk_check_left_tokens(startindent, col);
8114  beginmap(startindent);
8115  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8116  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8117  after_beginmap(startindent);
8118  }
8119  }
8120  else if(first == '"')
8121  {
8122  _c4dbgp("runk: double-quoted scalar");
8123  _handle_unk_begin_doc();
8124  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8125  size_t col = m_evt_handler->m_curr->pos.col;
8126  ScannedScalar sc = _scan_scalar_dquot();
8127  if(!_maybe_scan_following_colon())
8128  {
8129  _c4dbgp("runk: set as val");
8130  _handle_annotations_before_blck_val_scalar();
8131  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8132  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8133  }
8134  else
8135  {
8136  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
8137  if(!firsttoken)
8138  startindent = _handle_unk_check_left_tokens(startindent, col);
8139  beginmap(startindent);
8140  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8141  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8142  after_beginmap(startindent);
8143  }
8144  }
8145  else
8146  {
8147  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8148  size_t col = m_evt_handler->m_curr->pos.col;
8149  ScannedScalar sc;
8150  if(_scan_scalar_plain_unk(&sc))
8151  {
8152  _c4dbgp("runk: plain scalar");
8153  _handle_unk_begin_doc();
8154  if(!_maybe_scan_following_colon())
8155  {
8156  _c4dbgp("runk: set as val");
8157  _handle_annotations_before_blck_val_scalar();
8158  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8159  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8160  }
8161  else
8162  {
8163  _c4dbgp("runk: start new block map, set plain scalar as key");
8164  if(!firsttoken)
8165  startindent = _handle_unk_check_left_tokens(startindent, col);
8166  beginmap(startindent);
8167  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8168  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8169  after_beginmap(startindent);
8170  }
8171  }
8172  else
8173  {
8174  _c4err("parse error"); // LCOV_EXCL_LINE
8175  }
8176  }
8177  }
8178 }
8179 
8180 template<class EventHandler>
8181 void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8182 {
8183  _c4dbgp("runk: begin doc");
8184  _check_trailing_doc_token();
8185  _maybe_begin_doc();
8186  add_flags(RDOC);
8187  m_doc_empty = false;
8188 }
8189 
8190 template<class EventHandler>
8191 size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(size_t realindent, size_t col, bool skip_annotations)
8192 {
8193  _c4assert(col >= 1);
8194  col -= 1;
8195  _c4assert(col >= m_bom_len);
8196  csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8197  size_t pos = 0;
8198  _c4dbgpf("runk: check left tokens: s={}", _prs(s, /*escape*/true));
8199  if(skip_annotations)
8200  {
8201  _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8202  _c4dbgpf("runk: skip annotations: realindent={} pos={}", realindent, pos);
8203  }
8204  size_t firstns = s.first_not_of(' ', pos);
8205  if(firstns == npos)
8206  firstns = s.len;
8207  _c4dbgpf("runk: check left tokens:\n"
8208  " tokens={} skipped={}\n"
8209  " bomlen={} first={} col={}\n"
8210  " (bomlen+first)={} vs {}=col\n"
8211  " startindent={} lineindent={}"
8212  , _prs(s, /*escape*/true), _prs(s.sub(firstns), /*escape*/true)
8213  , m_bom_len, firstns, col
8214  , m_bom_len+firstns, col,
8215  realindent, m_evt_handler->m_curr->line_contents.indentation);
8216  if(m_bom_len + firstns != col)
8217  _c4err("parse error");
8218  if(!skip_annotations)
8219  realindent = firstns;
8220  _c4dbgpf("runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8221  return realindent;
8222 }
8223 
8224 
8225 /** skip annotations which are pending on the same line */
8226 template<class EventHandler>
8227 void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s, size_t *indent, size_t *first_non_token_pos)
8228 {
8229  csubstr first, second;
8230  uint32_t total = _get_annotations_same_line(s, &first, &second);
8231  _c4dbgpf("runk: before skip: {}", _prs(s, true));
8232  size_t pos = s.first_not_of(" \t");
8233  if(pos == npos)
8234  pos = s.len;
8235  if(!total)
8236  {
8237  *indent = *first_non_token_pos = pos;
8238  return;
8239  }
8240  _c4assert(!s.sub(pos).begins_with_any(" \t"));
8241  _c4dbgpf("runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos), true));
8242  _c4dbgpf("runk: first annotation: {}", first);
8243  _c4assert(first.len);
8244  _c4assert(first.is_sub(s));
8245  _c4assert(first.is_sub(s.sub(pos)));
8246  _c4assert(s.sub(pos).begins_with(first));
8247  *indent = pos;
8248  pos += first.len;
8249  _c4dbgpf("runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos), true));
8250  if(total > 1)
8251  {
8252  _c4dbgpf("runk: second annotation: {}", second);
8253  _c4assert(total == 2);
8254  _c4assert(second.len);
8255  _c4assert(second.is_sub(s));
8256  _c4assert(second.is_sub(s.sub(pos)));
8257  csubstr spos = s.sub(pos);
8258  size_t more = spos.first_not_of(" \t");
8259  _c4assert(more != npos); // because the annotations are on the same line
8260  _c4dbgpf("runk: next nonspace: {}", pos + more);
8261  pos += more;
8262  _c4dbgpf("runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos), true));
8263  _c4assert(s.sub(pos).begins_with(second));
8264  pos += second.len;
8265  _c4dbgpf("runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos), true));
8266  }
8267  *first_non_token_pos = pos;
8268 }
8269 
8270 
8271 template<class EventHandler>
8272 uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_) const
8273 {
8274  _c4assert(!m_evt_handler->m_curr->at_first_token());
8275  (void)token_soup;
8276  using EntryPtr = typename Annotation::Entry const* C4_RESTRICT;
8277  EntryPtr first = nullptr;
8278  EntryPtr second = nullptr;
8279  uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8280  if(total)
8281  {
8282  _c4dbgpf("there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8283  auto valid_if_same_line = [this](EntryPtr entry){
8284  _c4dbgpf("pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8285  return (entry->line == m_evt_handler->m_curr->pos.line) ? entry : nullptr;
8286  };
8287  // now select annotations only on the same line
8288  total = 0;
8289  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8290  total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8291  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8292  total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8293  _c4dbgpf("{} annotations on same line", total);
8294  _c4assert(total > 0); // because this function is only called
8295  // while not at the first token. That
8296  // means we must have same-line
8297  // annotations.
8298  auto get_first_on_same_line = [this](EntryPtr not_this_one){
8299  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8300  if(&m_pending_anchors.annotations[i] != not_this_one
8301  && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8302  return &m_pending_anchors.annotations[i];
8303  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8304  if(&m_pending_tags.annotations[i] != not_this_one
8305  && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8306  return &m_pending_tags.annotations[i];
8307  C4_UNREACHABLE();
8308  return (EntryPtr)nullptr; // LCOV_EXCL_LINE
8309  };
8310  _c4assert(total >= 1);
8311  // assign to first
8312  first = get_first_on_same_line(nullptr);
8313  _c4assert(first);
8314  _c4dbgpf("first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8315  if(total > 1)
8316  {
8317  _c4assert(total == 2);
8318  // assign to second
8319  second = get_first_on_same_line(first);
8320  _c4assert(second);
8321  _c4dbgpf("second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8322  }
8323  auto extract_string = [&](EntryPtr e){
8324  // tags can be null when the arena ran out of space
8325  if(!e->str.str || e->str.begins_with_any("!<"))
8326  {
8327  csubstr tag = e->orig;
8328  _c4assert(tag.str);
8329  _c4assert(tag.len);
8330  _c4assert(tag.is_sub(token_soup));
8331  _c4dbgpf("tag: {} -> {}", _maybe_null_str(e->str), tag);
8332  return tag;
8333  }
8334  csubstr anchor = e->str;
8335  _c4assert(anchor.len);
8336  _c4assert(anchor.str);
8337  _c4assert(anchor.is_sub(token_soup));
8338  _c4assert(!anchor.begins_with('&'));
8339  _c4assert(anchor.str - token_soup.str > 0);
8340  // add back the anchor's &
8341  --anchor.str;
8342  ++anchor.len;
8343  _c4assert(anchor.begins_with('&'));
8344  _c4dbgpf("anchor: {} -> {}", e->str, anchor);
8345  return anchor;
8346  };
8347  *first_ = first ? extract_string(first) : nullptr;
8348  *second_ = second ? extract_string(second) : nullptr;
8349  if(total > 1 && (first_->str > second_->str))
8350  {
8351  csubstr tmp = *first_;
8352  *first_ = *second_;
8353  *second_ = tmp;
8354  _c4dbgpf("swap first and second: {} -> {}", *first_, *second_);
8355  }
8356  }
8357  return total;
8358 }
8359 
8360 
8361 //-----------------------------------------------------------------------------
8362 
8363 template<class EventHandler>
8364 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
8365 {
8366  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8367 
8368  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW), m_evt_handler->m_curr->pos);
8369 
8370  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8371  if(has_any(RNXT))
8372  {
8373  _c4dbgp("usty[RNXT]: finishing!");
8374  _end_stream();
8375  }
8376  #endif
8377 
8378  _maybe_skip_comment();
8379  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8380  if(!rem.len)
8381  return;
8382 
8383  size_t pos = rem.first_not_of(" \t");
8384  if(pos)
8385  {
8386  pos = pos != npos ? pos : rem.len;
8387  _c4dbgpf("skipping indentation of {}", pos);
8388  _line_progressed(pos);
8389  rem = m_evt_handler->m_curr->line_contents.rem;
8390  if(!rem.len)
8391  return;
8392  _c4dbgpf("rem is now {}", _prs(rem));
8393  }
8394 
8395  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8396  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8397  char first = rem.str[0];
8398  if(has_any(RSEQ)) // destination is a sequence
8399  {
8400  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP), m_evt_handler->m_curr->pos);
8401  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
8402  if(first == '[')
8403  {
8404  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
8405  add_flags(RNXT);
8406  m_evt_handler->_push();
8407  addrem_flags(RFLOW|RVAL, RNXT|USTY);
8408  _set_indentation(startindent);
8409  _line_progressed(1);
8410  _maybe_skip_whitespace_tokens();
8411  }
8412  else if(first == '-' && _is_blck_token(rem))
8413  {
8414  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
8415  add_flags(RNXT);
8416  m_evt_handler->_push();
8417  addrem_flags(RBLCK|RVAL, RNXT|USTY);
8418  _set_indentation(startindent);
8419  _line_progressed(1);
8420  _maybe_skip_whitespace_tokens();
8421  }
8422  else
8423  {
8424  _c4err("can only parse a seq into an existing seq");
8425  }
8426  }
8427  else if(has_any(RMAP)) // destination is a map
8428  {
8429  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8430  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
8431  if(first == '{')
8432  {
8433  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
8434  add_flags(RNXT);
8435  _handle_annotations_before_blck_val_scalar();
8436  m_evt_handler->_push();
8437  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8438  _set_indentation(startindent);
8439  _line_progressed(1);
8440  _maybe_skip_whitespace_tokens();
8441  }
8442  else if(first == '?' && _is_blck_token(rem))
8443  {
8444  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
8445  add_flags(RNXT);
8446  _handle_annotations_before_blck_val_scalar();
8447  m_evt_handler->_push();
8448  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8449  _save_indentation();
8450  _line_progressed(1);
8451  _maybe_skip_whitespace_tokens();
8452  }
8453  else if(first == ':' && _is_blck_token(rem))
8454  {
8455  _c4dbgp("usty[RMAP]: it's a map with an empty key");
8456  add_flags(RNXT);
8457  _handle_annotations_before_blck_val_scalar();
8458  m_evt_handler->_push();
8459  m_evt_handler->set_key_scalar_plain_empty();
8460  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8461  _save_indentation();
8462  _line_progressed(1);
8463  _maybe_skip_whitespace_tokens();
8464  }
8465  else if(rem.begins_with('&'))
8466  {
8467  csubstr anchor = _scan_anchor();
8468  _c4dbgpf("usty[RMAP]: anchor! {}", _prs(anchor));
8469  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8470  const size_t line = m_evt_handler->m_curr->pos.line;
8471  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8472  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8473  }
8474  else if(first == '*')
8475  {
8476  csubstr ref = _scan_ref_map();
8477  _c4dbgpf("usty[RMAP]: ref! {}", _prs(ref));
8478  if(!_maybe_scan_following_colon())
8479  {
8480  _c4err("cannot read a VAL to a map");
8481  }
8482  else
8483  {
8484  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
8485  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8486  add_flags(RNXT);
8487  _handle_annotations_before_start_mapblck(startline);
8488  m_evt_handler->_push();
8489  _handle_keyref(ref);
8490  _maybe_skip_whitespace_tokens();
8491  _set_indentation(startindent);
8492  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8493  }
8494  }
8495  else if(first == '!')
8496  {
8497  csubstr tag = _scan_tag();
8498  _c4dbgpf("usty[RMAP]: val tag! {}", _prs(tag));
8499  // we need to buffer the tags, as there may be two
8500  // consecutive tags in here
8501  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8502  const size_t line = m_evt_handler->m_curr->pos.line;
8503  _add_annotation(&m_pending_tags, tag, indentation, line);
8504  }
8505  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
8506  {
8507  _c4err("cannot parse a seq into an existing map");
8508  }
8509  else
8510  {
8511  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8512  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8513  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8514  ScannedScalar sc;
8515  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8516  if(first == '\'')
8517  {
8518  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
8519  sc = _scan_scalar_squot();
8520  if(!_maybe_scan_following_colon())
8521  {
8522  _c4err("cannot read a VAL to a map");
8523  }
8524  else
8525  {
8526  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8527  add_flags(RNXT);
8528  _handle_annotations_before_start_mapblck(startline);
8529  m_evt_handler->_push();
8530  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8531  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8532  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8533  _set_indentation(startindent);
8534  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8535  _maybe_skip_whitespace_tokens();
8536  }
8537  }
8538  else if(first == '"')
8539  {
8540  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
8541  sc = _scan_scalar_dquot();
8542  if(!_maybe_scan_following_colon())
8543  {
8544  _c4err("cannot read a VAL to a map");
8545  }
8546  else
8547  {
8548  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
8549  add_flags(RNXT);
8550  _handle_annotations_before_start_mapblck(startline);
8551  m_evt_handler->_push();
8552  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8553  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8554  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8555  _set_indentation(startindent);
8556  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8557  _maybe_skip_whitespace_tokens();
8558  }
8559  }
8560  else if(first == '|')
8561  {
8562  _c4err("block literal keys must be enclosed in '?'");
8563  }
8564  else if(first == '>')
8565  {
8566  _c4err("block literal keys must be enclosed in '?'");
8567  }
8568  else if(_scan_scalar_plain_unk(&sc))
8569  {
8570  _c4dbgp("usty[RMAP]: got a plain scalar");
8571  if(!_maybe_scan_following_colon())
8572  {
8573  _c4err("cannot read a VAL to a map");
8574  }
8575  else
8576  {
8577  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8578  add_flags(RNXT);
8579  _handle_annotations_before_start_mapblck(startline);
8580  m_evt_handler->_push();
8581  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8582  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8583  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8584  _set_indentation(startindent);
8585  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8586  _maybe_skip_whitespace_tokens();
8587  }
8588  }
8589  else
8590  {
8591  _c4err("parse error"); // LCOV_EXCL_LINE
8592  }
8593  }
8594  }
8595  else // destination is unknown
8596  {
8597  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8598  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8599  if(first == '[')
8600  {
8601  _c4dbgp("usty[UNK]: it's a flow seq");
8602  add_flags(RNXT);
8603  _handle_annotations_before_blck_val_scalar();
8604  m_evt_handler->begin_seq_val_flow();
8605  addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8606  _set_indentation(startindent);
8607  _line_progressed(1);
8608  _maybe_skip_whitespace_tokens();
8609  }
8610  else if(first == '-' && _is_blck_token(rem))
8611  {
8612  _c4dbgp("usty[UNK]: it's a block seq");
8613  add_flags(RNXT);
8614  _handle_annotations_before_blck_val_scalar();
8615  m_evt_handler->begin_seq_val_block();
8616  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8617  _set_indentation(startindent);
8618  _line_progressed(1);
8619  _maybe_skip_whitespace_tokens();
8620  }
8621  else if(first == '{')
8622  {
8623  _c4dbgp("usty[UNK]: it's a flow map");
8624  add_flags(RNXT);
8625  _handle_annotations_before_blck_val_scalar();
8626  m_evt_handler->begin_map_val_flow();
8627  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8628  _set_indentation(startindent);
8629  _line_progressed(1);
8630  _maybe_skip_whitespace_tokens();
8631  }
8632  else if(first == '?' && _is_blck_token(rem))
8633  {
8634  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8635  add_flags(RNXT);
8636  _handle_annotations_before_blck_val_scalar();
8637  m_evt_handler->begin_map_val_block();
8638  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8639  _save_indentation();
8640  _line_progressed(1);
8641  _maybe_skip_whitespace_tokens();
8642  }
8643  else if(first == ':' && _is_blck_token(rem))
8644  {
8645  _c4dbgp("usty[UNK]: it's a map with an empty key");
8646  add_flags(RNXT);
8647  _handle_annotations_before_blck_val_scalar();
8648  m_evt_handler->begin_map_val_block();
8649  m_evt_handler->set_key_scalar_plain_empty();
8650  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8651  _save_indentation();
8652  _line_progressed(1);
8653  _maybe_skip_whitespace_tokens();
8654  }
8655  else if(first == '&')
8656  {
8657  csubstr anchor = _scan_anchor();
8658  _c4dbgpf("usty[UNK]: anchor! {}", _prs(anchor));
8659  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8660  const size_t line = m_evt_handler->m_curr->pos.line;
8661  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8662  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8663  }
8664  else if(first == '*')
8665  {
8666  csubstr ref = _scan_ref_map();
8667  _c4dbgpf("usty[UNK]: ref! {}", _prs(ref));
8668  if(!_maybe_scan_following_colon())
8669  {
8670  _c4dbgp("usty[UNK]: set val ref");
8671  _handle_valref(ref);
8672  }
8673  else
8674  {
8675  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8676  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8677  add_flags(RNXT);
8678  _handle_annotations_before_start_mapblck(startline);
8679  m_evt_handler->begin_map_val_block();
8680  _handle_keyref(ref);
8681  _maybe_skip_whitespace_tokens();
8682  _set_indentation(startindent);
8683  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8684  }
8685  }
8686  else if(first == '!')
8687  {
8688  csubstr tag = _scan_tag();
8689  _c4dbgpf("usty[UNK]: val tag! {}", _prs(tag));
8690  // we need to buffer the tags, as there may be two
8691  // consecutive tags in here
8692  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8693  const size_t line = m_evt_handler->m_curr->pos.line;
8694  _add_annotation(&m_pending_tags, tag, indentation, line);
8695  }
8696  else
8697  {
8698  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8699  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8700  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8701  first = rem.str[0];
8702  ScannedScalar sc;
8703  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8704  if(first == '\'')
8705  {
8706  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8707  sc = _scan_scalar_squot();
8708  if(!_maybe_scan_following_colon())
8709  {
8710  _c4dbgp("usty[UNK]: set as val");
8711  _handle_annotations_before_blck_val_scalar();
8712  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8713  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8714  _end_stream();
8715  }
8716  else
8717  {
8718  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8719  add_flags(RNXT);
8720  _handle_annotations_before_start_mapblck(startline);
8721  m_evt_handler->begin_map_val_block();
8722  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8723  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8724  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8725  _set_indentation(startindent);
8726  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8727  _maybe_skip_whitespace_tokens();
8728  }
8729  }
8730  else if(first == '"')
8731  {
8732  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8733  sc = _scan_scalar_dquot();
8734  if(!_maybe_scan_following_colon())
8735  {
8736  _c4dbgp("usty[UNK]: set as val");
8737  _handle_annotations_before_blck_val_scalar();
8738  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8739  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8740  _end_stream();
8741  }
8742  else
8743  {
8744  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8745  add_flags(RNXT);
8746  _handle_annotations_before_start_mapblck(startline);
8747  m_evt_handler->begin_map_val_block();
8748  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8749  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8750  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8751  _set_indentation(startindent);
8752  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8753  _maybe_skip_whitespace_tokens();
8754  }
8755  }
8756  else if(first == '|')
8757  {
8758  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8759  ScannedBlock sb;
8760  _scan_block(&sb, startindent);
8761  _c4dbgp("usty[UNK]: set as val");
8762  _handle_annotations_before_blck_val_scalar();
8763  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8764  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8765  _end_stream();
8766  }
8767  else if(first == '>')
8768  {
8769  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8770  ScannedBlock sb;
8771  _scan_block(&sb, startindent);
8772  _c4dbgp("usty[UNK]: set as val");
8773  _handle_annotations_before_blck_val_scalar();
8774  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8775  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8776  _end_stream();
8777  }
8778  else if(_scan_scalar_plain_unk(&sc))
8779  {
8780  _c4dbgp("usty[UNK]: got a plain scalar");
8781  if(!_maybe_scan_following_colon())
8782  {
8783  _c4dbgp("usty[UNK]: set as val");
8784  _handle_annotations_before_blck_val_scalar();
8785  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8786  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8787  _end_stream();
8788  }
8789  else
8790  {
8791  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8792  add_flags(RNXT);
8793  _handle_annotations_before_start_mapblck(startline);
8794  m_evt_handler->begin_map_val_block();
8795  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8796  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8797  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8798  _set_indentation(startindent);
8799  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8800  _maybe_skip_whitespace_tokens();
8801  }
8802  }
8803  else
8804  {
8805  _c4err("parse error"); // LCOV_EXCL_LINE
8806  }
8807  }
8808  }
8809 }
8810 
8811 
8812 //-----------------------------------------------------------------------------
8813 
8814 template<class EventHandler>
8815 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8816 {
8817  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8818  _RYML_SAVE_TEST_JSON(filename, src);
8819  m_evt_handler->start_parse(filename.str, src);
8820  m_evt_handler->begin_stream();
8821  _reset();
8822  while( ! _finished_file())
8823  {
8824  _scan_line();
8825  while( ! _finished_line())
8826  {
8827  _c4dbgnextline();
8828  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8829  if(has_any(RSEQ))
8830  {
8831  _handle_seq_json();
8832  }
8833  else if(has_any(RMAP))
8834  {
8835  _handle_map_json();
8836  }
8837  else if(has_any(RUNK))
8838  {
8839  _handle_unk_json();
8840  }
8841  else
8842  {
8843  _c4err("internal error"); // LCOV_EXCL_LINE
8844  }
8845  }
8846  if(_finished_file())
8847  break; // it may have finished because of multiline blocks
8848  _line_ended();
8849  }
8850  _end_stream();
8851  m_evt_handler->finish_parse();
8852 }
8853 
8854 
8855 //-----------------------------------------------------------------------------
8856 
8857 template<class EventHandler>
8858 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8859 {
8860  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8861  _RYML_SAVE_TEST_YAML(filename, src);
8862  m_evt_handler->start_parse(filename.str, src);
8863  m_evt_handler->begin_stream();
8864  _reset();
8865  while( ! _finished_file())
8866  {
8867  _scan_line();
8868  while( ! _finished_line())
8869  {
8870  _c4dbgnextline();
8871  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8872  if(has_any(RFLOW))
8873  {
8874  if(has_none(RSEQIMAP))
8875  {
8876  if(has_any(RSEQ))
8877  {
8878  _handle_seq_flow();
8879  }
8880  else
8881  {
8882  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8883  _handle_map_flow();
8884  }
8885  }
8886  else
8887  {
8888  _handle_seq_imap();
8889  }
8890  }
8891  else if(has_any(RBLCK))
8892  {
8893  if(has_any(RSEQ))
8894  {
8895  _handle_seq_block();
8896  }
8897  else
8898  {
8899  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8900  _handle_map_block();
8901  }
8902  }
8903  else if(has_any(RUNK))
8904  {
8905  _handle_unk();
8906  }
8907  else if(has_any(USTY))
8908  {
8909  _handle_usty();
8910  }
8911  else
8912  {
8913  _c4err("internal error"); // LCOV_EXCL_LINE
8914  }
8915  }
8916  if(_finished_file())
8917  break; // it may have finished because of multiline blocks
8918  _line_ended();
8919  }
8920  _end_stream();
8921  m_evt_handler->finish_parse();
8922 }
8923 /** @endcond */
8924 
8925 } // namespace yml
8926 } // namespace c4
8927 
8928 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
8929 
8930 #undef _c4dbgnextline
8931 #undef _c4assert
8932 #undef _c4err
8933 
8934 C4_SUPPRESS_WARNING_MSVC_POP
8935 C4_SUPPRESS_WARNING_GCC_CLANG_POP
8936 
8937 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:28
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:192
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1544
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition: common.cpp:210
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:889
bool is_valid_tag_handle(csubstr handle)
Definition: tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition: tag.cpp:9
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ npos
a null string position
Definition: common.hpp:258
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
@ UTF16BE
UTF16, Big-Endian.
Definition: common.hpp:266
@ UTF8
UTF8.
Definition: common.hpp:264
@ UTF16LE
UTF16, Little-Endian.
Definition: common.hpp:265
@ NOBOM
No Byte Order Mark was found.
Definition: common.hpp:263
@ UTF32BE
UTF32, Big-Endian.
Definition: common.hpp:268
@ UTF32LE
UTF32, Little-Endian.
Definition: common.hpp:267
enum c4::yml::Encoding_ Encoding_e
csubstr version()
Definition: version.cpp:6
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _ryml_relocate(s)
#define _c4err(...)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _c4assert(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
csubstr name
name of the file
Definition: common.hpp:288
Options to give to the parser to control its behavior.
Definition: common.hpp:350
utilities for UTF and Byte Order Mark