rapidyaml  0.12.0
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
4 #ifndef _C4_YML_PARSE_ENGINE_HPP_
6 #endif
7 #ifndef _C4_CHARCONV_HPP_
8 #include "c4/charconv.hpp"
9 #endif
10 #ifndef C4_UTF_HPP_
11 #include "c4/utf.hpp"
12 #endif
13 #ifndef _C4_YML_FILTER_PROCESSOR_HPP_
15 #endif
16 #ifndef _C4_YML_TAG_HPP_
17 #include "c4/yml/tag.hpp"
18 #endif
19 #ifndef _C4_YML_NODE_TYPE_HPP_
20 #include "c4/yml/node_type.hpp"
21 #endif
22 
23 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24 #include "c4/yml/detail/dbgprint.hpp"
25 #endif
26 
27 #ifdef RYML_DBG
28 #ifndef C4_DUMP_HPP_
29 #include <c4/dump.hpp>
30 #endif
31 #define _c4err(...) \
32  do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
33 #else
34 #define _c4err(...) \
35  this->_err(RYML_LOC_HERE(), __VA_ARGS__)
36 #endif
37 #define _c4assert(...) \
38  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
39 
40 
41 #if defined(RYML_WITH_TAB_TOKENS)
42 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43 #define _RYML_WITHOUT_TAB_TOKENS(...)
44 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
45 #else
46 #define _RYML_WITH_TAB_TOKENS(...)
47 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
49 #endif
50 
51 // helper to export cases to the YAML test suite
52 #ifndef RYML_SAVE_TEST_YAML
53 #define _RYML_SAVE_TEST_YAML(filename, src)
54 #define _RYML_SAVE_TEST_JSON(filename, src)
55 #else
56 #define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57 #define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
58 namespace c4 {
59 namespace yml {
60 void ryml_save_test_yaml(csubstr filename, csubstr src);
61 void ryml_save_test_json(csubstr filename, csubstr src);
62 } // namespace yml
63 } // namespace c4
64 #endif
65 
66 
67 // scaffold:
68 #define _c4dbgnextline() \
69  do { \
70  _c4dbgq("\n-----------"); \
71  _c4dbgt("handling line={}, offset={}B", \
72  m_evt_handler->m_curr->pos.line, \
73  m_evt_handler->m_curr->pos.offset); \
74  } while(0)
75 
76 
77 #if defined(_MSC_VER)
78 # pragma warning(push)
79 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
80 # pragma warning(disable: 4702/*unreachable code*/)
81 #elif defined(__clang__)
82 # pragma clang diagnostic push
83 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
84 # pragma clang diagnostic ignored "-Wformat-nonliteral"
85 # pragma clang diagnostic ignored "-Wold-style-cast"
86 #elif defined(__GNUC__)
87 # pragma GCC diagnostic push
88 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
89 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
90 # pragma GCC diagnostic ignored "-Wold-style-cast"
91 # if __GNUC__ >= 7
92 # pragma GCC diagnostic ignored "-Wduplicated-branches"
93 # endif
94 #endif
95 
96 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
97 
98 namespace c4 {
99 namespace yml {
100 
101 namespace { // NOLINT
102 
103 C4_HOT C4_ALWAYS_INLINE void _set_first(substr &C4_RESTRICT subject, size_t pos) noexcept
104 {
105  // avoids reassigning the ptr in substr
106  subject.len = pos != npos ? pos : subject.len;
107 }
108 C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos) noexcept
109 {
110  // avoids reassigning the ptr in substr
111  subject.len = pos != npos ? pos : subject.len;
112 }
113 C4_HOT C4_ALWAYS_INLINE void _set_first_strict(substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
114 {
115  // avoids reassigning the ptr in substr
116  _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
117  subject.len = pos;
118 }
119 C4_HOT C4_ALWAYS_INLINE void _set_first_strict(csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
120 {
121  // avoids reassigning the ptr in substr
122  _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
123  subject.len = pos;
124 }
125 
126 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) RYML_NOEXCEPT
127 {
128  _RYML_ASSERT_BASIC(s.len > 0);
129  _RYML_ASSERT_BASIC(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
130  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
131 }
132 
133 C4_HOT C4_ALWAYS_INLINE bool _is_blck_seq_token_maybe(csubstr const& C4_RESTRICT s) noexcept
134 {
135  return ((s.len >= 1) && (s.str[0] == '-') && ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t')))));
136 }
137 
138 inline bool _is_doc_begin_token(csubstr s) RYML_NOEXCEPT
139 {
140  _RYML_ASSERT_BASIC(s.begins_with('-'));
141  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
142  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
143  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
144  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
145 }
146 
147 inline bool _is_doc_end_token(csubstr s) RYML_NOEXCEPT
148 {
149  _RYML_ASSERT_BASIC(s.begins_with('.'));
150  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
151  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
152  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
153  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
154 }
155 
156 inline bool _is_doc_token(csubstr s) noexcept
157 {
158  if(s.len >= 3)
159  {
160  switch(s.str[0])
161  {
162  case '-':
163  //return _is_doc_begin_token(s); // this was failing with gcc -O2
164  return (s.str[1] == '-' && s.str[2] == '-')
165  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
166  case '.':
167  //return _is_doc_end_token(s); // this was failing with gcc -O2
168  return (s.str[1] == '.' && s.str[2] == '.')
169  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
170  }
171  }
172  return false;
173 }
174 
175 inline size_t _begins_with_special_json_scalar(csubstr s) RYML_NOEXCEPT
176 {
177  _RYML_ASSERT_BASIC(s.len);
178  switch(s.str[0])
179  {
180  case 'f':
181  return s.begins_with("false") ? 5u : 0u;
182  case 't':
183  return s.begins_with("true") ? 4u : 0u;
184  case 'n':
185  return s.begins_with("null") ? 4u : 0u;
186  }
187  return 0u;
188 }
189 
190 
191 //-----------------------------------------------------------------------------
192 
193 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
194 {
195  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
196 }
197 
198 //! look for the next newline chars, and jump to the right of those
199 inline substr _from_next_line(substr rem)
200 {
201  size_t nlpos = rem.first_of("\r\n");
202  if(nlpos == csubstr::npos)
203  return {};
204  const char nl = rem[nlpos];
205  rem = rem.right_of(nlpos);
206  if(rem.empty())
207  return {};
208  if(_extend_from_combined_newline(nl, rem.front()))
209  rem = rem.sub(1);
210  return rem;
211 }
212 
213 
214 //-----------------------------------------------------------------------------
215 
216 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
217 {
218  _RYML_ASSERT_BASIC(r[*i] == '\n');
219  size_t numnl_following = 0;
220  ++(*i);
221  for( ; *i < r.len; ++(*i))
222  {
223  if(r.str[*i] == '\n')
224  ++numnl_following;
225  // skip leading whitespace
226  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
227  ;
228  else
229  break;
230  }
231  return numnl_following;
232 }
233 
234 /** @p i is set to the first non whitespace character after the line
235  * @return the number of empty lines after the initial position */
236 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
237 {
238  _RYML_ASSERT_BASIC(r[*i] == '\n');
239  size_t numnl_following = 0;
240  ++(*i);
241  if(indentation == 0)
242  {
243  for( ; *i < r.len; ++(*i))
244  {
245  const char c = r.str[*i];
246  if(c == '\n')
247  ++numnl_following;
248  // skip leading whitespace
249  else if(c != ' ' && c != '\t' && c != '\r')
250  break;
251  }
252  }
253  else
254  {
255  for( ; *i < r.len; ++(*i))
256  {
257  char c = r.str[*i];
258  if(c == '\n')
259  {
260  ++numnl_following;
261  // skip the indentation after the newline
262  size_t stop = *i + indentation;
263  for( ; *i < r.len; ++(*i))
264  {
265  c = r.str[*i];
266  if(c != ' ' && c != '\r')
267  break;
268  _RYML_ASSERT_BASIC(*i < stop); // LCOV_EXCL_LINE
269  }
270  C4_UNUSED(stop);
271  }
272  // skip leading whitespace
273  else if(c != ' ' && c != '\t' && c != '\r')
274  {
275  break;
276  }
277  }
278  }
279  return numnl_following;
280 }
281 
282 } // anon namespace
283 
284 
285 //-----------------------------------------------------------------------------
286 //-----------------------------------------------------------------------------
287 //-----------------------------------------------------------------------------
288 
289 template<class EventHandler>
291 {
292  _free();
293  _clr();
294 }
295 
296 template<class EventHandler>
298  : m_options(opts)
299  , m_evt_handler(evt_handler)
300  , m_pending_anchors()
301  , m_pending_tags()
302  , m_has_directives_yaml(false)
303  , m_has_directives(false)
304  , m_doc_empty(true)
305  , m_prev_colon(npos)
306  , m_prev_val_end(npos)
307  , m_encoding(NOBOM)
308  , m_newline_offsets()
309  , m_newline_offsets_size(0)
310  , m_newline_offsets_capacity(0)
311 {
312  _RYML_CHECK_BASIC(evt_handler);
313 }
314 
315 template<class EventHandler>
317  : m_options(that.m_options)
318  , m_evt_handler(that.m_evt_handler)
319  , m_pending_anchors(that.m_pending_anchors)
320  , m_pending_tags(that.m_pending_tags)
321  , m_has_directives_yaml(that.m_has_directives_yaml)
322  , m_has_directives(that.m_has_directives)
323  , m_doc_empty(that.m_doc_empty)
324  , m_prev_colon(npos)
325  , m_prev_val_end(npos)
326  , m_encoding(NOBOM)
327  , m_newline_offsets(that.m_newline_offsets)
328  , m_newline_offsets_size(that.m_newline_offsets_size)
329  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
330 {
331  that._clr();
332 }
333 
334 template<class EventHandler>
336  : m_options(that.m_options)
337  , m_evt_handler(that.m_evt_handler)
338  , m_pending_anchors(that.m_pending_anchors)
339  , m_pending_tags(that.m_pending_tags)
340  , m_has_directives_yaml(that.m_has_directives_yaml)
341  , m_has_directives(that.m_has_directives)
342  , m_doc_empty(that.m_doc_empty)
343  , m_prev_colon(npos)
344  , m_prev_val_end(npos)
345  , m_encoding(NOBOM)
346  , m_newline_offsets()
347  , m_newline_offsets_size()
348  , m_newline_offsets_capacity()
349 {
350  if(that.m_newline_offsets_capacity)
351  {
352  _resize_locations(that.m_newline_offsets_capacity);
353  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
354  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
355  m_newline_offsets_size = that.m_newline_offsets_size;
356  }
357 }
358 
359 template<class EventHandler>
361 {
362  _free();
363  m_options = (that.m_options);
364  m_evt_handler = that.m_evt_handler;
365  m_pending_anchors = that.m_pending_anchors;
366  m_pending_tags = that.m_pending_tags;
367  m_has_directives_yaml = that.m_has_directives_yaml;
368  m_has_directives = that.m_has_directives;
369  m_doc_empty = that.m_doc_empty;
370  m_prev_colon = that.m_prev_colon;
371  m_prev_val_end = that.m_prev_val_end;
372  m_encoding = that.m_encoding;
373  m_newline_offsets = (that.m_newline_offsets);
374  m_newline_offsets_size = (that.m_newline_offsets_size);
375  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
376  that._clr();
377  return *this;
378 }
379 
380 template<class EventHandler>
382 {
383  if(&that != this)
384  {
385  _free();
386  m_options = (that.m_options);
387  m_evt_handler = that.m_evt_handler;
388  m_pending_anchors = that.m_pending_anchors;
389  m_pending_tags = that.m_pending_tags;
390  m_has_directives_yaml = that.m_has_directives_yaml;
391  m_has_directives = that.m_has_directives;
392  m_doc_empty = that.m_doc_empty;
393  m_prev_colon = that.m_prev_colon;
394  m_prev_val_end = that.m_prev_val_end;
395  m_encoding = that.m_encoding;
396  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
397  _resize_locations(that.m_newline_offsets_capacity);
398  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
399  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
400  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
401  m_newline_offsets_size = that.m_newline_offsets_size;
402  }
403  return *this;
404 }
405 
406 template<class EventHandler>
408 {
409  m_options = {};
410  m_evt_handler = {};
411  m_pending_anchors = {};
412  m_pending_tags = {};
413  m_has_directives_yaml = false;
414  m_has_directives = false;
415  m_doc_empty = true;
416  m_prev_colon = npos;
417  m_prev_val_end = npos;
418  m_encoding = NOBOM;
419  m_newline_offsets = {};
420  m_newline_offsets_size = {};
421  m_newline_offsets_capacity = {};
422 }
423 
424 template<class EventHandler>
425 void ParseEngine<EventHandler>::_free()
426 {
427  if(m_newline_offsets)
428  {
429  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
430  m_newline_offsets = nullptr;
431  m_newline_offsets_size = 0u;
432  m_newline_offsets_capacity = 0u;
433  }
434 }
435 
436 
437 //-----------------------------------------------------------------------------
438 
439 template<class EventHandler>
440 void ParseEngine<EventHandler>::_reset()
441 {
442  m_pending_anchors = {};
443  m_pending_tags = {};
444  m_has_directives_yaml = false;
445  m_has_directives = false;
446  m_doc_empty = true;
447  m_prev_colon = npos;
448  m_prev_val_end = npos;
449  m_bom_len = 0;
450  m_encoding = NOBOM;
451  m_bom_line = 0;
452  if(m_options.locations())
453  {
454  _prepare_locations();
455  }
456 }
457 
458 
459 //-----------------------------------------------------------------------------
460 
461 template<class EventHandler>
462 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
463 {
464  _c4dbgp("relocate to new arena");
465  const char *pb = prev_arena.str;
466  const char *pe = prev_arena.str + prev_arena.len;
467  #define _ryml_relocate(s) \
468  if((s).str >= pb && (s).str <= pe) \
469  { \
470  (s).str = next_arena.str + ((s).str - pb); \
471  }
472  for(ParserState &st : m_evt_handler->m_stack)
473  {
474  _ryml_relocate(st.line_contents.rem);
475  _ryml_relocate(st.line_contents.full);
476  }
477  _ryml_relocate(m_evt_handler->m_src);
478  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
479  {
480  _ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
481  _ryml_relocate(m_pending_tags.annotations[i].orig); // LCOV_EXCL_LINE
482  }
483  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
484  {
485  _ryml_relocate(m_pending_anchors.annotations[i].str);
486  _ryml_relocate(m_pending_anchors.annotations[i].orig);
487  }
488  {
489  TagDirectives &tds = m_evt_handler->tag_directives();
490  for(size_t i = 0, sz = tds.size(); i < sz; ++i)
491  {
492  _ryml_relocate(tds.m_directives[i].handle);
493  _ryml_relocate(tds.m_directives[i].prefix);
494  }
495  }
496  {
497  TagCache &tch = m_evt_handler->tag_cache();
498  for(id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
499  {
500  _ryml_relocate(tch.m_entries[i].tag);
501  _ryml_relocate(tch.m_entries[i].resolved);
502  }
503  }
504  if(other)
505  {
506  _ryml_relocate(*other);
507  }
508  #undef _ryml_relocate
509 }
510 
511 /** @cond dev */
512 template<class EventHandler>
513 substr ParseEngine<EventHandler>::_alloc_arena(size_t len, substr *other)
514 {
515  csubstr prev = m_evt_handler->arena();
516  substr out = m_evt_handler->alloc_arena(len);
517  substr curr = m_evt_handler->arena();
518  if(curr.str != prev.str)
519  _relocate_arena(prev, curr, other);
520  return out;
521 }
522 /** @endcond */
523 
524 
525 //-----------------------------------------------------------------------------
526 
527 #ifdef RYML_DBG
528 template<class EventHandler>
529 template<class DumpFn>
530 C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
531 {
532  ParserState const *const C4_RESTRICT st = m_evt_handler->m_curr;
533  LineContents const& C4_RESTRICT lc = st->line_contents;
534  csubstr contents = lc.full.first(lc.num_cols);
535  if(contents.len)
536  {
537  // print the yaml src line
538  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
539  csubstr m_file = m_evt_handler->m_curr->pos.name;
540  if(m_file.len)
541  {
542  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
543  offs += m_file.len + 1;
544  }
545  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
546  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
547  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
548  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", escaped_scalar(maybe_full_content, /*escape*/true), maybe_ellipsis, contents.len);
549  // highlight the remaining portion of the previous line
550  size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
551  size_t lastcol = firstcol + lc.rem.len;
552  size_t firstcol_adj = adjust_pos_with_escapes(lc.full, firstcol);
553  size_t len = adjust_pos_with_escapes(lc.rem, lc.rem.len);
554  for(size_t i = 0; i < offs + firstcol_adj; ++i)
555  std::forward<DumpFn>(dumpfn)(" ");
556  std::forward<DumpFn>(dumpfn)("^");
557  for(size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
558  std::forward<DumpFn>(dumpfn)("~");
559  _dbg_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
560  }
561  else
562  {
563  std::forward<DumpFn>(dumpfn)("\n");
564  }
565  // next line: print the state flags
566  {
567  char flagbuf_[128];
568  _dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
569  }
570 }
571 
572 template<class EventHandler>
573 void ParseEngine<EventHandler>::_print_state_stack(substr buf) const
574 {
575  if(_dbg_enabled())
576  {
577  for(ParserState const& s : m_evt_handler->m_stack)
578  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
579  }
580 }
581 
582 template<class EventHandler>
583 void ParseEngine<EventHandler>::_print_state_stack() const
584 {
585  char buf[128];
586  _print_state_stack(buf);
587 }
588 #endif
589 
590 
591 //-----------------------------------------------------------------------------
592 
593 template<class EventHandler>
594 template<class ...Args>
595 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
596 {
597  m_evt_handler->cancel_parse();
598  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
599 }
600 
601 template<class EventHandler>
602 template<class ...Args>
603 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
604 {
605  m_evt_handler->cancel_parse();
606  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
607 }
608 
609 
610 //-----------------------------------------------------------------------------
611 #ifdef RYML_DBG
612 template<class EventHandler>
613 template<class ...Args>
614 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
615 {
616  if(_dbg_enabled())
617  {
618  _dbg_printf(fmt, args...);
619  _dbg_dumper("\n");
620  _fmt_msg(_dbg_dumper);
621  }
622 }
623 #endif
624 
625 
626 //-----------------------------------------------------------------------------
627 template<class EventHandler>
628 bool ParseEngine<EventHandler>::_finished_file() const
629 {
630  bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
631  if(ret)
632  {
633  _c4dbgp("finished file!!!");
634  }
635  return ret;
636 }
637 
638 template<class EventHandler>
639 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const // LCOV_EXCL_LINE
640 {
641  return m_evt_handler->m_curr->line_contents.rem.empty();
642 }
643 
644 
645 //-----------------------------------------------------------------------------
646 
647 template<class EventHandler>
648 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
649 {
650  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')))
651  {
652  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
653  if(pos == npos)
654  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all whitespace
655  _c4dbgpf("skip {} whitespace characters", pos);
656  _line_progressed(pos);
657  }
658 }
659 
660 template<class EventHandler>
661 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
662 {
663  if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
664  {
665  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
666  if(pos == npos)
667  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all c
668  _c4dbgpf("skip {}x'{}'", pos, _c4prc(c));
669  _line_progressed(pos);
670  }
671 }
672 
673 template<class EventHandler>
674 template<size_t N>
675 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
676 {
677  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
678  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
679  if(pos == npos)
680  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
681  _c4dbgpf("skip {} characters", pos);
682  _line_progressed(pos);
683 }
684 
685 template<class EventHandler>
686 void ParseEngine<EventHandler>::_skip_comment()
687 {
688  LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
689  const size_t col = m_evt_handler->m_curr->pos.col - 1u;
690  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with('#'), m_evt_handler->m_curr->pos);
691  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
692  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos); // 1-based
693  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
694  // raise an error if the comment is not preceded by whitespace
695  if(lc.rem.str != lc.full.str) // not at line beginning
696  {
697  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
698  const char prev = lc.full.str[col - 1u];
699  if(C4_UNLIKELY(prev != ' ' && prev != '\t'))
700  _c4err("comment not preceded by whitespace");
701  }
702  _c4dbgpf("comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
703  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
704 }
705 
706 template<class EventHandler>
707 void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
708 {
709  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
710  if(pos != npos)
711  {
712  if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
713  {
714  _line_progressed(pos);
715  _skip_comment();
716  }
717  }
718 }
719 
720 template<class EventHandler>
721 void ParseEngine<EventHandler>::_maybe_skip_comment()
722 {
723  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
724  if(pos != npos)
725  {
726  if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
727  {
728  _line_progressed(pos);
729  _skip_comment();
730  }
731  }
732  else
733  {
734  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
735  }
736 }
737 
738 template<class EventHandler>
739 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
740 {
741  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
742  if(pos != npos)
743  {
744  if(':' == m_evt_handler->m_curr->line_contents.rem[pos])
745  {
746  // bump pos to skip the colon as well, and check the colon
747  // is followed by space or tab
748  if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
749  {
750  const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
751  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
752  ++pos;
753  else
754  return false;
755  }
756  _line_progressed(pos);
757  return true;
758  }
759  }
760  else
761  {
762  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
763  }
764  return false;
765 }
766 
767 
768 //-----------------------------------------------------------------------------
769 
770 template<class EventHandler>
771 csubstr ParseEngine<EventHandler>::_scan_anchor()
772 {
773  csubstr s = m_evt_handler->m_curr->line_contents.rem;
774  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'), m_evt_handler->m_curr->pos);
775  csubstr anchor = s.range(1, s.first_of(" ,]}\t"));
776  _line_progressed(1u + anchor.len);
777  _maybe_skipchars(' ');
778  return anchor;
779 }
780 
781 template<class EventHandler>
782 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
783 {
784  csubstr s = m_evt_handler->m_curr->line_contents.rem;
785  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
786  _set_first(s, s.first_of(" ,]\t"));
787  _line_progressed(s.len);
788  return s;
789 }
790 
791 template<class EventHandler>
792 csubstr ParseEngine<EventHandler>::_scan_ref_map()
793 {
794  csubstr s = m_evt_handler->m_curr->line_contents.rem;
795  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
796  _set_first(s, s.first_of(" ,}\t"));
797  _line_progressed(s.len);
798  return s;
799 }
800 
801 template<class EventHandler>
802 csubstr ParseEngine<EventHandler>::_scan_tag()
803 {
804  csubstr t = m_evt_handler->m_curr->line_contents.rem;
805  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
806  if(!t.begins_with("!<"))
807  {
808  _c4dbgp("begins with '!'");
809  _set_first(t, t.first_of(" ,]}\t"));
810  if(C4_UNLIKELY(t.first_of("[{") != npos))
811  _c4err("invalid tag");
812  _line_progressed(t.len);
813  if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
814  t = _resolve_tag(t);
815  }
816  else
817  {
818  _c4dbgp("begins with '!<'");
819  size_t pos = t.find('>');
820  if(C4_UNLIKELY(pos == npos))
821  _c4err("invalid tag");
822  _set_first_strict(t, pos+1);
823  _line_progressed(t.len);
824  t = t.sub(1);
825  }
826  _maybe_skip_whitespace_tokens();
827  return t;
828 }
829 
830 template<class EventHandler>
831 csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
832 {
833  csubstr t = m_evt_handler->m_curr->line_contents.rem;
834  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
835  if(!t.begins_with("!<"))
836  {
837  _c4dbgp("begins with '!'");
838  _set_first(t, t.first_of(" ,\t"));
839  if(C4_UNLIKELY(t.first_of("[{") != npos))
840  _c4err("invalid tag");
841  _line_progressed(t.len);
842  *orig = t;
843  if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
844  t = _resolve_tag(t);
845  }
846  else
847  {
848  _c4dbgp("begins with '!<'");
849  size_t pos = t.find('>');
850  if(C4_UNLIKELY(pos == npos))
851  _c4err("invalid tag");
852  _set_first_strict(t, pos+1);
853  _line_progressed(t.len);
854  *orig = t;
855  t = t.sub(1);
856  }
857  _maybe_skip_whitespace_tokens();
858  return t;
859 }
860 
861 
862 //-----------------------------------------------------------------------------
863 
864 template<class EventHandler>
865 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
866 {
867  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
868  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(":-"), m_evt_handler->m_curr->pos);
869  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
870  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
871  if(s.len > 1)
872  {
873  switch(s.str[1])
874  {
875  case ' ':
876  case ',':
877  case '}':
878  case ']':
879  case '\t':
880  if(s.str[0] == ':')
881  {
882  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
883  return false;
884  }
885  else
886  {
887  _c4err("invalid scalar");
888  }
889  break;
890  case '{':
891  case '[':
892  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
893  break;
894  default:
895  break;
896  }
897  }
898  else
899  {
900  if(s.str[0] == '-')
901  _c4err("invalid scalar");
902  return false;
903  }
904  return true;
905 }
906 
907 template<class EventHandler>
908 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
909 {
910  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
911  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '?', m_evt_handler->m_curr->pos);
912  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
913  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
914  if(s.len > 1)
915  {
916  switch(s.str[1])
917  {
918  case ' ':
919  case '\t':
920  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
921  return false;
922  case '{':
923  case '}':
924  case '[':
925  case ']':
926  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
927  break;
928  default:
929  break;
930  }
931  }
932  else
933  {
934  return false;
935  }
936  return true;
937 }
938 
939 
940 template<class EventHandler>
941 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
942 {
943  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
944  // it's not a scalar if it starts with any of these characters:
945  switch(s.str[0])
946  {
947  // these are all legal tokens which mean no scalar is starting:
948  case '[':
949  case ']':
950  case '{':
951  case '}':
952  case '&':
953  case '*':
954  case '!':
955  case '|':
956  case '>':
957  case '#':
958  case ',':
959  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
960  return false;
961  // '-' and ':' are illegal at the beginning if not followed by a scalar character
962  case '-':
963  case ':':
964  _c4dbgpf("suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
965  return _is_valid_start_scalar_plain_flow_check_block_token(s);
966  case '?':
967  _c4dbgpf("qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
968  return _is_valid_start_scalar_plain_flow_check_qmrk(s);
969  // everything else is a legal starting character
970  default:
971  return true;
972  }
973 }
974 
975 
976 template<class EventHandler>
977 bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s, size_t offs)
978 {
979  _c4dbgpf("newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs), true));
980  if(s.len > offs + 1)
981  {
982  _c4dbgp("newl[PLAIN]: buffer continues");
983  csubstr next_line = s.sub(offs + 1);
984  size_t next_line_indentation = next_line.first_not_of(' ');
985  if(next_line_indentation != npos)
986  {
987  _c4dbgpf("newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
988  next_line = next_line.first(next_line.first_of("\n\r"));
989  _c4dbgpf("newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
990  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
991  if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
992  {
993  _c4dbgp("newl[PLAIN]: larger indentation");
994  next_line = next_line.sub(next_line_indentation);
995  }
996  else if(C4_UNLIKELY(next_line.len && next_line.triml(' ').len))
997  {
998  _c4dbgp("newl[PLAIN]: err, smaller indentation");
999  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1000  _line_ended();
1001  _scan_line();
1002  if(m_evt_handler->m_curr->line_contents.indentation != npos)
1003  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1004  _c4err("parse error"); // cannot reduce indentation here
1005  }
1006  _c4dbgpf("newl[PLAIN]: next_line.len={}", next_line.len);
1007  if(next_line.len)
1008  {
1009  next_line = next_line.triml(" \t");
1010  if(next_line.begins_with_any(",]#:")) // any of the characters we're interested in
1011  {
1012  _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[0]);
1013  return false;
1014  }
1015  }
1016  }
1017  }
1018  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1019  _line_ended();
1020  _scan_line();
1021  return true;
1022 }
1023 
1024 template<class EventHandler>
1025 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1026 {
1027  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1028  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1029  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP), m_evt_handler->m_curr->pos);
1030  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1031  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1032 
1033  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1034  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1035 
1036  if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1037  return false;
1038 
1039  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1040  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1041 
1042  _c4dbgp("scanning seqflow scalar...");
1043 
1044  bool needs_filter = false;
1045  size_t col = 0; // zero-based column
1046  size_t offs = 0; // offset
1047  for( ; offs < s.len; ++offs, ++col)
1048  {
1049  const char c = s.str[offs];
1050  switch(c)
1051  {
1052  case ',':
1053  case ']':
1054  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1055  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1056  goto ended_scalar;
1057  case '\n':
1058  _c4dbgpf("found '\\n' at col={}", col);
1059  if(!_scan_scalar_plain_handle_newline(s, offs))
1060  goto ended_scalar;
1061  col = (size_t)-1; // so that col is 0 in the next loop iteration
1062  needs_filter = true;
1063  break;
1064  case '\r':
1065  --col; // don't count \r when calling _line_progressed()
1066  needs_filter = true;
1067  break;
1068  case ':':
1069  _c4dbgp("found suspicious ':'");
1070  if(s.len > offs + 1)
1071  {
1072  char next = s.str[offs + 1];
1073  _c4dbgpf("next char is '{}'", _c4prc(next));
1074  if(next == '\r')
1075  {
1076  csubstr after = s.sub(offs + 1).triml('\r');
1077  if(after.len)
1078  {
1079  next = after.str[0];
1080  _c4dbgpf("skip \\r to '{}'", _c4prc(next));
1081  }
1082  }
1083  // no else here.
1084  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t') || next == ',' || next == '\n' || next == ']')
1085  {
1086  _c4dbgp("map starting!");
1087  goto ended_scalar;
1088  }
1089  else
1090  {
1091  _c4dbgp("':' nothing to see here");
1092  }
1093  }
1094  else
1095  {
1096  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1097  _line_progressed(col);
1098  _c4err("missing termination: '{}'", c); // noreturn
1099  }
1100  break;
1101  case '#':
1102  {
1103  _c4dbgp("found suspicious '#'");
1104  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1105  char prev = s.str[offs - 1];
1106  if(prev == ' ' _RYML_WITH_TAB_TOKENS(|| prev == '\t'))
1107  {
1108  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1109  goto ended_scalar;
1110  }
1111  }
1112  break;
1113  case '[':
1114  case '{':
1115  case '}':
1116  _line_progressed(col); // advance to report the proper position in the error
1117  _c4err("invalid character: '{}'", c); // noreturn
1118  case '-':
1119  case '.':
1120  _c4dbgpf("doc token character: '{}', offs={}", c, offs);
1121  if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1122  {
1123  _c4dbgp("at line beginning");
1124  if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1125  {
1126  _c4err("parse error"); // no return
1127  }
1128  }
1129  default:
1130  ;
1131  }
1132  }
1133 
1134 ended_scalar:
1135 
1136  _line_progressed(col);
1137  _set_first(s, offs);
1138  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1139  sc->needs_filter = needs_filter;
1140 
1141  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1142 
1143  return true;
1144 }
1145 
1146 template<class EventHandler>
1147 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1148 {
1149  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1150  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1151  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP), m_evt_handler->m_curr->pos);
1152  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1153  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1154 
1155  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1156  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1157 
1158  if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1159  return false;
1160 
1161  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1162  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1163 
1164  _c4dbgp("scanning mapflow scalar...");
1165 
1166  bool needs_filter = false;
1167  size_t col = 0; // zero-based column
1168  size_t offs = 0; // offset
1169  for( ; offs < s.len; ++offs, ++col)
1170  {
1171  const char c = s.str[offs];
1172  switch(c)
1173  {
1174  case ',':
1175  case '}':
1176  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1177  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1178  goto ended_scalar;
1179  case '\n':
1180  _c4dbgpf("found '\\n' at col={}", col);
1181  if(!_scan_scalar_plain_handle_newline(s, offs))
1182  goto ended_scalar;
1183  col = (size_t)-1; // so that col is 0 in the next loop iteration
1184  needs_filter = true;
1185  break;
1186  case '\r':
1187  --col; // don't count \r when calling _line_progressed()
1188  needs_filter = true;
1189  break;
1190  case ':':
1191  _c4dbgpf("found ':'", c);
1192  if(s.len == offs+1)
1193  break;
1194  {
1195  const char next = s.str[offs+1];
1196  _c4dbgpf("next='{}'", c);
1197  if(next == ' ' || next == ',' || next == '}' || next == '\n' || next == '\r' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
1198  {
1199  _c4dbgpf("found terminating character: '{}'", c);
1200  goto ended_scalar;
1201  }
1202  }
1203  break;
1204  case '{':
1205  case '[':
1206  _line_progressed(col);
1207  _c4err("invalid character: '{}'", c); // noreturn
1208  break;
1209  case ']':
1210  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1211  goto ended_scalar;
1212  default:
1213  ;
1214  }
1215  }
1216 
1217 ended_scalar:
1218 
1219  _line_progressed(col);
1220  s = s.first(offs);
1221  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1222  sc->needs_filter = needs_filter;
1223 
1224  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1225 
1226  return sc->scalar.len > 0u;
1227 }
1228 
1229 template<class EventHandler>
1230 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1231 {
1232  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1233  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1234  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1235  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1236 
1237  substr s = m_evt_handler->m_curr->line_contents.rem;
1238  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1239  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1240 
1241  _c4dbgp("seq_json: scanning scalar...");
1242 
1243  switch(s.str[0])
1244  {
1245  case ']':
1246  case '{':
1247  case ',':
1248  _c4dbgp("seq_json: not a scalar.");
1249  return false;
1250  }
1251 
1252  {
1253  const size_t len = _begins_with_special_json_scalar(s);
1254  if(len)
1255  {
1256  char c = s.len > len ? s.str[len] : ',';
1257  if(c == ',' || c == ']' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1258  {
1259  sc->scalar = s.first(len);
1260  sc->needs_filter = false;
1261  _c4dbgpf("seq_json: special scalar: '{}'", sc->scalar);
1262  _line_progressed(len);
1263  return true;
1264  }
1265  else
1266  {
1267  return false;
1268  }
1269  }
1270  }
1271 
1272  // must be a number or special scalar
1273  size_t i = 0;
1274  for( ; i < s.len; ++i)
1275  {
1276  const char c = s.str[i];
1277  switch(c)
1278  {
1279  case ',':
1280  case ']':
1281  case ' ':
1282  case '\t':
1283  _c4dbgpf("seq_json: found terminating character: '{}'", c);
1284  goto ended_scalar;
1285  default:
1286  ;
1287  }
1288  }
1289 
1290 ended_scalar:
1291 
1292  if(C4_LIKELY(i > 0))
1293  {
1294  _line_progressed(i);
1295  sc->scalar = s.first(i);
1296  sc->needs_filter = false;
1297  _c4dbgpf("seq_json: scalar was {}", _prs(sc->scalar, /*escape*/true));
1298  }
1299 
1300  return true;
1301 }
1302 
1303 template<class EventHandler>
1304 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1305 {
1306  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1307  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1308  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1309  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1310  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL), m_evt_handler->m_curr->pos);
1311 
1312  substr s = m_evt_handler->m_curr->line_contents.rem;
1313  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1314  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1315 
1316  _c4dbgp("scanning scalar...");
1317 
1318  {
1319  const size_t len = _begins_with_special_json_scalar(s);
1320  if(len)
1321  {
1322  char c = s.len > len ? s.str[len] : ',';
1323  _c4dbgpf("begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1324  if(c == ',' || c == '}' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1325  {
1326  sc->scalar = s.first(len);
1327  sc->needs_filter = false;
1328  _c4dbgpf("special json scalar: '{}'", _prs(sc->scalar));
1329  _line_progressed(len);
1330  return true;
1331  }
1332  else
1333  {
1334  return false;
1335  }
1336  }
1337  }
1338 
1339  // must be a number
1340  size_t i = 0;
1341  for( ; i < s.len; ++i)
1342  {
1343  const char c = s.str[i];
1344  switch(c)
1345  {
1346  case ',':
1347  case '}':
1348  case ' ':
1349  case '\t':
1350  _c4dbgpf("found terminating character: '{}'", c);
1351  goto ended_scalar;
1352  default:
1353  ;
1354  }
1355  }
1356 
1357 ended_scalar:
1358 
1359  if(C4_LIKELY(i > 0))
1360  {
1361  _line_progressed(i);
1362  sc->scalar = s.first(i);
1363  sc->needs_filter = false;
1364  _c4dbgpf("scalar was {}", _prs(sc->scalar));
1365  return true;
1366  }
1367 
1368  return false;
1369 }
1370 
1371 template<class EventHandler>
1372 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1373 {
1374  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '-', m_evt_handler->m_curr->pos);
1375  return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1376 }
1377 
1378 template<class EventHandler>
1379 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1380 {
1381  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '.', m_evt_handler->m_curr->pos);
1382  return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1383 }
1384 
1385 template<class EventHandler>
1386 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1387 {
1388  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1389  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1390  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY), m_evt_handler->m_curr->pos);
1391 
1392  substr s = m_evt_handler->m_curr->line_contents.rem;
1393  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1394  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1395 
1396  switch(s.str[0])
1397  {
1398  case '-':
1399  if(_is_blck_token(s))
1400  {
1401  return false;
1402  }
1403  else if(_is_doc_begin(s))
1404  {
1405  _c4dbgp("token is doc start");
1406  return false;
1407  }
1408  break;
1409  case ':':
1410  case '?':
1411  if(_is_blck_token(s))
1412  return false;
1413  break;
1414  case '[':
1415  case '{':
1416  case '&':
1417  case '*':
1418  case '!':
1419  case '\t':
1420  case ',':
1421  case '%':
1422  return false;
1423  case '.':
1424  if(_is_doc_end(s))
1425  {
1426  _c4dbgp("token is doc end");
1427  return false;
1428  }
1429  break;
1430  }
1431 
1432  _c4dbgpf("plain scalar! indentation={}", indentation);
1433 
1434  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1435  const size_t start_line = m_evt_handler->m_curr->pos.line;
1436 
1437  bool needs_filter = false;
1438  while(true)
1439  {
1440  _c4dbgpf("plain scalar line: {}", _prs(s));
1441  for(size_t i = 0; i < s.len; ++i)
1442  {
1443  const char curr = s.str[i];
1444  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1445  switch(curr)
1446  {
1447  case ':':
1448  _c4dbgpf("[{}]: got suspicious ':'", i);
1449  // are there more characters?
1450  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1451  {
1452  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1453  _line_progressed(i);
1454  // ': ' is accepted only on the first line
1455  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1456  {
1457  _c4dbgp("start line. scalar ends here");
1458  goto ended_scalar;
1459  }
1460  else
1461  {
1462  _c4err("multiline scalars cannot be used as implicit keys");
1463  }
1464  }
1465  else
1466  {
1467  size_t j = i;
1468  while(j + 1 < s.len && s.str[j+1] == ':')
1469  {
1470  _c4dbgp("skip colon");
1471  ++j;
1472  }
1473  i = j > i ? j-1 : i;
1474  _c4dbgp("nothing to see here");
1475  }
1476  break;
1477  case '#':
1478  _c4dbgp("got suspicious '#'");
1479  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1480  {
1481  _c4dbgp("comment! scalar ends here");
1482  _line_progressed(i);
1483  goto ended_scalar;
1484  }
1485  else
1486  {
1487  _c4dbgp("nothing to see here");
1488  }
1489  break;
1490  }
1491  }
1492  _line_progressed(s.len);
1493  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1494  next_peeked = next_peeked.trimr("\n\r");
1495  const size_t next_indentation = next_peeked.first_not_of(' ');
1496  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1497  if(next_indentation < indentation)
1498  {
1499  _c4dbgp("smaller indentation! scalar ended");
1500  goto ended_scalar;
1501  }
1502  else if(next_indentation == 0 && next_peeked.len > 0)
1503  {
1504  const char first = next_peeked.str[0];
1505  switch(first)
1506  {
1507  case '-':
1508  _c4dbgpf("doc begin? peeked={}", _prs(next_peeked, size_t(3)));
1509  if(_is_doc_begin_token(next_peeked))
1510  {
1511  _c4dbgp("doc begin! scalar ended");
1512  goto ended_scalar;
1513  }
1514  break;
1515  case '.':
1516  _c4dbgpf("doc end? peeked={}", _prs(next_peeked, size_t(3)));
1517  if(_is_doc_end_token(next_peeked))
1518  {
1519  _c4dbgp("doc end! scalar ended");
1520  goto ended_scalar;
1521  }
1522  break;
1523  }
1524  }
1525  // load with next line
1526  _c4dbgp("next line!");
1527  if(!_finished_file())
1528  {
1529  _c4dbgp("next line!");
1530  _line_ended();
1531  _scan_line();
1532  }
1533  else
1534  {
1535  _c4dbgp("file finished!");
1536  goto ended_scalar;
1537  }
1538  s = m_evt_handler->m_curr->line_contents.rem;
1539  needs_filter = true;
1540  }
1541 
1542 ended_scalar:
1543 
1544  sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1545  sc->needs_filter = needs_filter;
1546 
1547  _c4dbgpf("scalar was {}", _prs(sc->scalar));
1548 
1549  return true;
1550 }
1551 
1552 template<class EventHandler>
1553 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1554 {
1555  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1556  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1557  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1558  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1559  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1560  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1561  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1562 }
1563 
1564 template<class EventHandler>
1565 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1566 {
1567  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1568  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1569  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1570  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1571  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1572  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1573 }
1574 
1575 template<class EventHandler>
1576 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1577 {
1578  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY), m_evt_handler->m_curr->pos);
1579  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1580 }
1581 
1582 
1583 //-----------------------------------------------------------------------------
1584 
1585 template<class EventHandler>
1586 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1587 {
1588  substr rem{}; // declare here because of the goto
1589  size_t nlpos{}; // declare here because of the goto
1590  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1591  if(pos >= _buf().len)
1592  goto next_is_empty;
1593 
1594  // look for the next newline chars, and jump to the right of those
1595  rem = _from_next_line(_buf().sub(pos));
1596  if(rem.empty())
1597  goto next_is_empty;
1598 
1599  // now get everything up to and including the following newline chars
1600  nlpos = rem.first_of("\r\n");
1601  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1602  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1603  rem = rem.left_of(nlpos, /*include_pos*/true);
1604 
1605  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1606  return rem;
1607 
1608 next_is_empty:
1609  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1610  return {};
1611 }
1612 
1613 //-----------------------------------------------------------------------------
1614 
1615 template<class EventHandler>
1616 void ParseEngine<EventHandler>::_scan_line()
1617 {
1618  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1619  m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1620  else
1621  m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1622 }
1623 
1624 template<class EventHandler>
1625 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1626 {
1627  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1628  m_evt_handler->m_curr->pos.line,
1629  m_evt_handler->m_curr->line_contents.full.len,
1630  ahead, m_evt_handler->m_curr->pos.col,
1631  m_evt_handler->m_curr->pos.col+ahead,
1632  m_evt_handler->m_curr->pos.offset,
1633  m_evt_handler->m_curr->pos.offset+ahead);
1634  m_evt_handler->m_curr->pos.offset += ahead;
1635  m_evt_handler->m_curr->pos.col += ahead;
1636  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1637  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1638 }
1639 
1640 template<class EventHandler>
1641 void ParseEngine<EventHandler>::_line_ended()
1642 {
1643  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1644  m_evt_handler->m_curr->pos.line,
1645  m_evt_handler->m_curr->line_contents.full.len,
1646  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1647  m_evt_handler->m_curr->pos.col, 1);
1648  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1649  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1650  ++m_evt_handler->m_curr->pos.line;
1651  m_evt_handler->m_curr->pos.col = 1;
1652 }
1653 
1654 template<class EventHandler>
1655 void ParseEngine<EventHandler>::_line_ended_undo()
1656 {
1657  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1658  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1659  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1660  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1661  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1662  m_evt_handler->m_curr->pos.offset -= delta;
1663  --m_evt_handler->m_curr->pos.line;
1664  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1665  // don't forget to undo also the changes to the remainder of the line
1666  //_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= _buf().len || _buf()[m_evt_handler->m_curr->pos.offset] == '\n' || _buf()[m_evt_handler->m_curr->pos.offset] == '\r', m_evt_handler->m_curr->pos);
1667  m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1668 }
1669 
1670 
1671 //-----------------------------------------------------------------------------
1672 template<class EventHandler>
1673 void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
1674 {
1675  m_evt_handler->m_curr->indref = indentation;
1676  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1677 }
1678 
1679 template<class EventHandler>
1680 void ParseEngine<EventHandler>::_save_indentation()
1681 {
1682  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1683  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1684  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1685 }
1686 
1687 template<class EventHandler>
1688 void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1689 {
1690  _c4dbgpf("SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1691  m_prev_val_end = m_evt_handler->m_curr->pos.line;
1692 }
1693 
1694 
1695 //-----------------------------------------------------------------------------
1696 
1697 template<class EventHandler>
1698 void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1699 {
1700  _c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1701  m_evt_handler->actually_val_is_first_key_of_new_map_block();
1702  addrem_flags(RMAP|RVAL|RBLCK, RKCL|RSEQ|RUNK);
1703  _set_indentation(orig_indent);
1704  _maybe_skip_whitespace_tokens();
1705 }
1706 
1707 template<class EventHandler>
1708 void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent, bool multiline)
1709 {
1710  // this is called AFTER ending the flow container,
1711  // so now we're at the parent container's scope
1712  if(has_all(RMAP|RBLCK) && has_none(RKCL|RVAL|RNXT))
1713  {
1714  _c4dbgp("flow container: end as vanilla block map key!");
1715  if(C4_UNLIKELY(multiline))
1716  _c4err("multiline key is invalid");
1717  if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1718  _c4err("could not find ':' colon after key");
1719  _maybe_skip_whitespace_tokens();
1720  addrem_flags(RVAL, RKEY|RKCL|RNXT);
1721  }
1722  else if(has_none(RFLOW))
1723  {
1724  _c4dbgp("end_flow_container: now not in flow!");
1725  if(has_any(RUNK|RSEQ|RKCL) && _maybe_scan_following_colon())
1726  {
1727  if(C4_UNLIKELY(multiline))
1728  _c4err("multiline key is invalid");
1729  _flow_container_was_a_key(orig_indent);
1730  }
1731  else
1732  {
1733  _c4dbgp("end_flow_container: end map as key!");
1734  }
1735  }
1736  else if(has_any(RSEQ))
1737  {
1738  _c4dbgp("end_flow_container: now in a flow seq");
1739  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1740  _mark_seqflow_val_end();
1741  }
1742 }
1743 
1744 template<class EventHandler>
1745 void ParseEngine<EventHandler>::_end_map_flow()
1746 {
1747  bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1748  size_t orig_indent = m_evt_handler->m_curr->indref;
1749  _c4dbgpf("mapflow: end, multiline={}", multiline);
1750  m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1751  _end_flow_container(orig_indent, multiline);
1752 }
1753 
1754 template<class EventHandler>
1755 void ParseEngine<EventHandler>::_end_seq_flow()
1756 {
1757  bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1758  size_t orig_indent = m_evt_handler->m_curr->indref;
1759  _c4dbgpf("seqflow: end, multiline={}", multiline);
1760  m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1761  _end_flow_container(orig_indent, multiline);
1762 }
1763 
1764 template<class EventHandler>
1765 void ParseEngine<EventHandler>::_end_map_blck()
1766 {
1767  _c4dbgp("mapblck: end");
1768  if(has_any(RKCL|RVAL))
1769  {
1770  _c4dbgp("mapblck: set missing val");
1771  _handle_annotations_before_blck_val_scalar();
1772  m_evt_handler->set_val_scalar_plain_empty();
1773  }
1774  else if(has_any(QMRK))
1775  {
1776  _c4dbgp("mapblck: set missing keyval");
1777  _handle_annotations_before_blck_key_scalar();
1778  m_evt_handler->set_key_scalar_plain_empty();
1779  _handle_annotations_before_blck_val_scalar();
1780  m_evt_handler->set_val_scalar_plain_empty();
1781  }
1782  m_evt_handler->end_map_block();
1783 }
1784 
1785 template<class EventHandler>
1786 void ParseEngine<EventHandler>::_end_seq_blck()
1787 {
1788  if(has_any(RVAL))
1789  {
1790  _c4dbgp("seqblck: set missing val");
1791  _handle_annotations_before_blck_val_scalar();
1792  m_evt_handler->set_val_scalar_plain_empty();
1793  }
1794  m_evt_handler->end_seq_block();
1795 }
1796 
1797 template<class EventHandler>
1798 void ParseEngine<EventHandler>::_end2_map()
1799 {
1800  _c4dbgp("map: end");
1801  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1802  if(has_any(RBLCK))
1803  {
1804  _end_map_blck();
1805  }
1806  else
1807  {
1808  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1809  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1810  m_evt_handler->_pop();
1811  }
1812 }
1813 
1814 template<class EventHandler>
1815 void ParseEngine<EventHandler>::_end2_seq()
1816 {
1817  _c4dbgp("seq: end");
1818  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1819  if(has_any(RBLCK))
1820  {
1821  _end_seq_blck();
1822  }
1823  else
1824  {
1825  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1826  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1827  m_evt_handler->_pop();
1828  }
1829 }
1830 
1831 template<class EventHandler>
1832 void ParseEngine<EventHandler>::_begin2_doc()
1833 {
1834  _c4dbgp("begin_doc");
1835  m_has_directives_yaml = false;
1836  m_has_directives = false;
1837  m_doc_empty = true;
1838  add_flags(RDOC);
1839  m_evt_handler->begin_doc();
1840  m_evt_handler->m_curr->indref = 0; // ?
1841 }
1842 
1843 template<class EventHandler>
1844 void ParseEngine<EventHandler>::_begin2_doc_expl()
1845 {
1846  _c4dbgp("begin_doc_expl");
1847  m_has_directives_yaml = false;
1848  m_has_directives = false;
1849  m_doc_empty = true;
1850  add_flags(RDOC);
1851  m_evt_handler->begin_doc_expl();
1852  m_evt_handler->m_curr->indref = 0; // ?
1853 }
1854 
1855 template<class EventHandler>
1856 void ParseEngine<EventHandler>::_end2_doc()
1857 {
1858  _c4dbgp("doc: end");
1859  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1860  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1861  {
1862  _c4dbgp("doc was empty; add empty val");
1863  _handle_annotations_before_blck_val_scalar();
1864  m_evt_handler->set_val_scalar_plain_empty();
1865  }
1866  m_evt_handler->end_doc();
1867  m_bom_len = 0;
1868 }
1869 
1870 template<class EventHandler>
1871 void ParseEngine<EventHandler>::_end2_doc_expl()
1872 {
1873  _c4dbgp("doc: end");
1874  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1875  {
1876  _c4dbgp("doc: no children; add empty val");
1877  _handle_annotations_before_blck_val_scalar();
1878  m_evt_handler->set_val_scalar_plain_empty();
1879  }
1880  m_evt_handler->end_doc_expl();
1881  m_bom_len = 0;
1882 }
1883 
1884 template<class EventHandler>
1885 void ParseEngine<EventHandler>::_maybe_begin_doc()
1886 {
1887  if(has_none(RDOC))
1888  {
1889  _c4dbgp("doc must be started");
1890  _begin2_doc();
1891  }
1892 }
1893 template<class EventHandler>
1894 void ParseEngine<EventHandler>::_maybe_end_doc()
1895 {
1896  if(has_any(RDOC))
1897  {
1898  _c4dbgp("doc must be finished");
1899  _end2_doc();
1900  }
1901  else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1902  {
1903  _c4dbgp("no doc to finish, but pending annotations");
1904  m_evt_handler->begin_doc();
1905  _handle_annotations_before_blck_val_scalar();
1906  m_evt_handler->set_val_scalar_plain_empty();
1907  m_evt_handler->end_doc();
1908  }
1909 }
1910 
1911 template<class EventHandler>
1912 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1913 {
1914  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1915  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags & RDOC, m_evt_handler->m_curr->pos);
1916  _c4dbgp("root is RDOC");
1917  if(m_evt_handler->m_curr->level != 0)
1918  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1919  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1920 }
1921 
1922 /** Check whether the current parse tokens are trailing on the
1923  * previous doc, and raise an error if they are */
1924 template<class EventHandler>
1925 void ParseEngine<EventHandler>::_check_trailing_doc_token()
1926 {
1927  const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1928  const bool isndoc = (m_evt_handler->m_curr->flags & NDOC) != 0;
1929  const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1930  _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1931  if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1932  _c4err("parse error");
1933 }
1934 
1935 template<class EventHandler>
1936 void ParseEngine<EventHandler>::_end_doc_suddenly()
1937 {
1938  _c4dbgp("end doc suddenly");
1939  _end_doc_suddenly__pop();
1940  _end2_doc_expl();
1941  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1942 }
1943 
1944 template<class EventHandler>
1945 void ParseEngine<EventHandler>::_check_doc_end_tokens() const
1946 {
1947  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1948  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(". \t"), m_evt_handler->m_curr->pos);
1949  if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
1950  {
1951  _c4err("parse error");
1952  }
1953 }
1954 
1955 template<class EventHandler>
1956 void ParseEngine<EventHandler>::_start_doc_suddenly()
1957 {
1958  _c4dbgp("start doc suddenly");
1959  _end_doc_suddenly__pop();
1960  _end2_doc();
1961  _begin2_doc_expl();
1962 }
1963 
1964 template<class EventHandler>
1965 void ParseEngine<EventHandler>::_end_stream()
1966 {
1967  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1968  if(C4_UNLIKELY(has_all(RSEQ|RFLOW)))
1969  _c4err("missing terminating ]");
1970  else if(C4_UNLIKELY(has_all(RMAP|RFLOW)))
1971  _c4err("missing terminating }");
1972  if(m_evt_handler->m_stack.size() > 1)
1973  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1974  if(has_all(RDOC))
1975  {
1976  _end2_doc();
1977  }
1978  else if(has_all(RTOP|RUNK))
1979  {
1980  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1981  {
1982  if(m_doc_empty)
1983  {
1984  m_evt_handler->begin_doc();
1985  _handle_annotations_before_blck_val_scalar();
1986  m_evt_handler->set_val_scalar_plain_empty();
1987  m_evt_handler->end_doc();
1988  }
1989  }
1990  }
1991  m_evt_handler->end_stream();
1992  if(C4_UNLIKELY(m_has_directives))
1993  _c4err("directives cannot be used without a document");
1994 }
1995 
1996 
1997 template<class EventHandler>
1998 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1999 {
2000  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2001  while(m_evt_handler->m_curr != popto)
2002  {
2003  if(has_any(RSEQ))
2004  {
2005  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2006  _end2_seq();
2007  }
2008  else if(has_any(RMAP))
2009  {
2010  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2011  _end2_map();
2012  }
2013  else
2014  {
2015  break;
2016  }
2017  }
2018  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2019 }
2020 
2021 template<class EventHandler>
2022 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2023 {
2024  // search the stack frame to jump to based on its indentation
2025  using state_type = typename EventHandler::state;
2026  state_type const* popto = nullptr;
2027  auto &stack = m_evt_handler->m_stack;
2028  _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2029  _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2030  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2031  #ifdef RYML_DBG
2032  _print_state_stack();
2033  #endif
2034  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2035  {
2036  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2037  if(s->indref == ind)
2038  {
2039  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
2040  popto = s;
2041  break;
2042  }
2043  }
2044  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2045  {
2046  _c4err("parse error: incorrect indentation?");
2047  }
2048  _handle_indentation_pop(popto);
2049 }
2050 
2051 template<class EventHandler>
2052 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2053 {
2054  // search the stack frame to jump to based on its indentation
2055  using state_type = typename EventHandler::state;
2056  auto &stack = m_evt_handler->m_stack;
2057  _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2058  _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2059  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2060  state_type const* popto = nullptr;
2061  #ifdef RYML_DBG
2062  char flagbuf_[128];
2063  _print_state_stack(flagbuf_);
2064  #endif
2065  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
2066  {
2067  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2068  if(s->indref < ind)
2069  {
2070  break;
2071  }
2072  else if(s->indref == ind)
2073  {
2074  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
2075  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
2076  {
2077  break;
2078  }
2079  popto = s;
2080  if(has_all(RSEQ|RBLCK, s))
2081  {
2082  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2083  const size_t first = rem.first_not_of(' ');
2084  _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first == npos, m_evt_handler->m_curr->pos);
2085  rem = rem.right_of(first, true);
2086  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
2087  if(rem.begins_with('-') && _is_blck_token(rem))
2088  {
2089  _c4dbgp("parent was indentless seq");
2090  break;
2091  }
2092  }
2093  }
2094  }
2095  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2096  {
2097  _c4err("parse error: incorrect indentation?");
2098  }
2099  _handle_indentation_pop(popto);
2100 }
2101 
2102 
2103 //-----------------------------------------------------------------------------
2104 template<class EventHandler>
2105 void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2106 {
2107  if(C4_UNLIKELY(has_all(RMAP|RBLCK|RKEY)))
2108  {
2109  _c4err("multiline quoted keys are invalid");
2110  }
2111  else // check contextual indentation
2112  {
2113  const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(RMAP|RSEQ) && has_any(RBLCK)));
2114  _c4dbgpf("indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2115  if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2116  {
2117  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2118  m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(' '),
2119  m_evt_handler->m_curr->pos);
2120  csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2121  _c4dbgpf("trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem, true));
2122  if(C4_UNLIKELY(!!trimmed.len))
2123  {
2124  _c4err("bad indentation");
2125  }
2126  }
2127  }
2128 }
2129 
2130 
2131 //-----------------------------------------------------------------------------
2132 template<class EventHandler>
2133 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2134 {
2135  // quoted scalars can spread over multiple lines!
2136  // nice explanation here: http://yaml-multiline.info/
2137 
2138  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('\''), m_evt_handler->m_curr->pos);
2139 
2140  // a span to the end of the file, skipping the opening quote
2141  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2142  _line_progressed(1); // advance over the opening quote
2143 
2144  bool needs_filter = false;
2145  size_t pos = npos; // find the pos of the matching quote
2146  while( ! _finished_file())
2147  {
2148  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2149  _c4dbgpf("scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2150  if(C4_UNLIKELY(_is_doc_token(line)))
2151  _c4err("token can not appear at line begin");
2152  for(size_t i = 0; i < line.len; ++i)
2153  {
2154  const char curr = line.str[i];
2155  if(curr == '\'') // single quotes are escaped with two single quotes
2156  {
2157  const char next = i+1 < line.len ? line.str[i+1] : '~';
2158  if(next != '\'') // so just look for the first quote
2159  { // without another after it
2160  _line_progressed(i + 1); // progress beyond the quote
2161  pos = i + (size_t)(line.str - s.str); // set pos to before the quote
2162  goto found_close;
2163  }
2164  else
2165  {
2166  needs_filter = true; // needs filter to remove escaped quotes
2167  ++i; // skip the escaped quote
2168  }
2169  }
2170  }
2171 
2172  needs_filter = true;
2173  _line_progressed(line.len);
2174  _line_ended();
2175  _scan_line();
2176  _check_valid_newline_in_quoted_scalar();
2177  }
2178 
2179  _c4err("reached end of file while looking for closing quote");
2180 
2181 found_close:
2182 
2183  _c4dbgpf("found closing quote at: {}", pos);
2184  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2185  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2186  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2187  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2188  _set_first_strict(s, pos);
2189 
2190  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
2191 
2192  return ScannedScalar { s, needs_filter };
2193 }
2194 
2195 
2196 //-----------------------------------------------------------------------------
2197 template<class EventHandler>
2198 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2199 {
2200  // quoted scalars can spread over multiple lines!
2201  // nice explanation here: http://yaml-multiline.info/
2202 
2203  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('"'), m_evt_handler->m_curr->pos);
2204 
2205  // a span to the end of the file, skipping the opening quote
2206  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2207  _line_progressed(1); // advance over the opening quote
2208 
2209  bool needs_filter = false;
2210  size_t pos = npos; // find the pos of the matching quote
2211  while( ! _finished_file())
2212  {
2213  #if defined(__GNUC__) && (/*__GNUC__ == 12 || */__GNUC__ == 13)
2214  C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem); // prevent hoisting
2215  #endif
2216  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2217  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2218  if(C4_UNLIKELY(_is_doc_token(rem)))
2219  _c4err("token can not appear at line begin");
2220  for(size_t i = 0; i < rem.len; ++i)
2221  {
2222  const char curr = rem.str[i];
2223  // every \ is an escape
2224  if(curr == '\\')
2225  {
2226  const char next = i+1 < rem.len ? rem.str[i+1] : '~';
2227  needs_filter = true;
2228  if(next == '"' || next == '\\')
2229  ++i;
2230  }
2231  else if(curr == '"')
2232  {
2233  _line_progressed(i + 1); // progress beyond the quote
2234  pos = i + (size_t)(rem.str - s.str); // set pos to before the quote
2235  goto found_close;
2236  }
2237  }
2238 
2239  // leading whitespace also needs filtering
2240  needs_filter = true;
2241  _line_progressed(rem.len);
2242  _line_ended();
2243  _scan_line();
2244  _check_valid_newline_in_quoted_scalar();
2245  }
2246 
2247  _c4err("reached end of file while looking for closing quote");
2248 
2249 found_close:
2250 
2251  _c4dbgpf("found closing quote at: {}", pos);
2252  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2253  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2254  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2255  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2256  _set_first_strict(s, pos);
2257 
2258  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2259 
2260  return ScannedScalar{s, needs_filter};
2261 }
2262 
2263 
2264 //-----------------------------------------------------------------------------
2265 template<class EventHandler>
2266 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2267 {
2268  _c4dbgpf("blck: indref={}", indref);
2269  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref != npos, m_evt_handler->m_curr->pos);
2270 
2271  // nice explanation here: http://yaml-multiline.info/
2272  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2273  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'), m_evt_handler->m_curr->pos);
2274 
2275  _c4dbgpf("blck: specs={}", _prs(s));
2276 
2277  // parse the spec
2278  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2279  size_t indentation = npos; // have to find out if no spec is given
2280  if(s.len > 1)
2281  {
2282  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"), m_evt_handler->m_curr->pos);
2283  csubstr t = s.sub(1);
2284  _c4dbgpf("blck: spec is multichar: '{}'", t);
2285  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2286  size_t pos = t.first_of("-+");
2287  _c4dbgpf("blck: spec chomp char at {}", pos);
2288  if(pos != npos)
2289  {
2290  if(t[pos] == '-')
2291  chomp = CHOMP_STRIP;
2292  else if(t[pos] == '+')
2293  chomp = CHOMP_KEEP;
2294  if(pos == 0)
2295  t = t.sub(1);
2296  else
2297  t = t.first(pos);
2298  }
2299  // from here to the end, only digits are considered
2300  pos = t.first_not_of("0123456789");
2301  csubstr digits = t.first(pos);
2302  if( ! digits.empty())
2303  {
2304  if(C4_UNLIKELY(digits.len > 1))
2305  _c4err("parse error: invalid indentation");
2306  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2307  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2308  _c4err("parse error: could not read indentation as decimal"); // LCOV_EXCL_LINE
2309  if(C4_UNLIKELY( ! indentation))
2310  _c4err("parse error: null indentation");
2311  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2312  indentation += m_evt_handler->m_curr->indref;
2313  }
2314  else
2315  {
2316  if(C4_UNLIKELY(t.len && (!t.begins_with_any(" \t") || !t.sub(pos).triml(" \t").begins_with('#'))))
2317  _c4err("parse error: invalid token");
2318  }
2319  }
2320 
2321  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2322 
2323  // finish the current line
2324  _line_progressed(s.len);
2325  _line_ended();
2326  _scan_line();
2327 
2328  // start with a zero-length block, already pointing at the right place
2329  substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset, size_t(0));
2330  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2331 
2332  // read every full line into a raw block,
2333  // from which newlines are to be stripped as needed.
2334  //
2335  // If no explicit indentation was given, pick it from the first
2336  // non-empty line. See
2337  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2338  size_t num_lines = 0;
2339  size_t first = m_evt_handler->m_curr->pos.line;
2340  size_t provisional_indentation = npos;
2341  LineContents lc;
2342  while(( ! _finished_file()))
2343  {
2344  // peek next line, but do not advance immediately
2345  lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2346  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2347  C4_DONT_OPTIMIZE(lc.rem);
2348  #endif
2349  _c4dbgpf("blck: peeking at {}", _prs(lc.rem.trimr("\r\n"), true));
2350  // evaluate termination conditions
2351  if(indentation != npos)
2352  {
2353  _c4dbgpf("blck: indentation={}", indentation);
2354  // stop when the line is deindented and not empty
2355  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2356  {
2357  if(raw_block.len)
2358  {
2359  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2360  }
2361  else
2362  {
2363  _c4err("indentation decreased without any scalar");
2364  }
2365  break;
2366  }
2367  else if(indentation == 0)
2368  {
2369  _c4dbgpf("blck: noindent. lc.rem={}", _prs(lc.rem));
2370  if(_is_doc_token(lc.rem))
2371  {
2372  _c4dbgp("blck: stop. indentation=0 and doc ended");
2373  break;
2374  }
2375  }
2376  }
2377  else
2378  {
2379  const size_t fns = lc.rem.first_not_of(' ');
2380  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2381  if(fns != npos) // non-empty line
2382  {
2383  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2384  if(C4_UNLIKELY(lc.full.begins_with('\t')))
2385  _c4err("parse error");
2386  if(provisional_indentation == npos)
2387  {
2388  if(lc.indentation < indref)
2389  {
2390  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2391  if(raw_block.len == 0)
2392  {
2393  _c4dbgp("blck: was empty, undo next line");
2394  _line_ended_undo();
2395  }
2396  break;
2397  }
2398  else if(lc.indentation == m_evt_handler->m_curr->indref)
2399  {
2400  if(has_any(RSEQ|RMAP))
2401  {
2402  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2403  break;
2404  }
2405  }
2406  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2407  indentation = lc.indentation;
2408  }
2409  else
2410  {
2411  if(lc.indentation >= provisional_indentation)
2412  {
2413  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2414  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2415  indentation = lc.indentation;
2416  }
2417  else
2418  {
2419  if(lc.indentation >= indref)
2420  _c4err("parse error: first non-empty block line should have at least the original indentation");
2421  _c4dbgp("blck: finished");
2422  break;
2423  }
2424  }
2425  }
2426  else // empty line
2427  {
2428  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2429  if(provisional_indentation != npos)
2430  {
2431  if(lc.rem.len >= provisional_indentation)
2432  {
2433  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2434  provisional_indentation = lc.rem.len;
2435  }
2436  }
2437  else
2438  {
2439  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2440  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2441  if(provisional_indentation == npos)
2442  {
2443  provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2444  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2445  }
2446  if(provisional_indentation < indref)
2447  {
2448  provisional_indentation = indref;
2449  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2450  }
2451  }
2452  }
2453  }
2454  // advance now that we know the folded scalar continues
2455  m_evt_handler->m_curr->line_contents = lc;
2456  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2457  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2458  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2459  _line_ended();
2460  ++num_lines;
2461  }
2462  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2463  C4_UNUSED(num_lines);
2464  C4_UNUSED(first);
2465 
2466  if(indentation == npos)
2467  {
2468  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2469  indentation = provisional_indentation;
2470  }
2471 
2472  if(num_lines)
2473  _line_ended_undo();
2474 
2475  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2476 
2477  sb->scalar = raw_block;
2478  sb->indentation = indentation;
2479  sb->chomp = chomp;
2480 }
2481 
2482 
2483 //-----------------------------------------------------------------------------
2484 //-----------------------------------------------------------------------------
2485 //-----------------------------------------------------------------------------
2486 /** @cond dev */
2487 
2488 // a debugging scaffold:
2489 #if 0
2490 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2491 #else
2492 #define _c4dbgfws(...)
2493 #endif
2494 
2495 template<class EventHandler>
2496 template<class FilterProcessor>
2497 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2498 {
2499  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2500  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t', m_evt_handler->m_curr->pos);
2501 
2502  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2503  if(first_pos != npos)
2504  {
2505  const char first_char = proc.src[first_pos];
2506  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2507  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2508  {
2509  _c4dbgfws("whitespace is trailing on line", "");
2510  proc.skip(first_pos - proc.rpos);
2511  }
2512  else // a legit whitespace
2513  {
2514  proc.copy();
2515  _c4dbgfws("legit whitespace. sofar={}", _prs(proc.sofar()));
2516  }
2517  return true;
2518  }
2519  _c4dbgfws("whitespace is trailing on line", "");
2520  return false;
2521 }
2522 
2523 template<class EventHandler>
2524 template<class FilterProcessor>
2525 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2526 {
2527  if(!_filter_ws_handle_to_first_non_space(proc))
2528  {
2529  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2530  proc.copy(proc.src.len - proc.rpos);
2531  }
2532 }
2533 
2534 template<class EventHandler>
2535 template<class FilterProcessor>
2536 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2537 {
2538  if(!_filter_ws_handle_to_first_non_space(proc))
2539  {
2540  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2541  proc.skip(proc.src.len - proc.rpos);
2542  }
2543 }
2544 
2545 #undef _c4dbgfws
2546 
2547 
2548 //-----------------------------------------------------------------------------
2549 //-----------------------------------------------------------------------------
2550 //-----------------------------------------------------------------------------
2551 /* plain scalars */
2552 
2553 // a debugging scaffold:
2554 #if 0
2555 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2556 #else
2557 #define _c4dbgfps(fmt, ...)
2558 #endif
2559 
2560 template<class EventHandler>
2561 template<class FilterProcessor>
2562 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2563 {
2564  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2565 
2566  _c4dbgfps("found newline. sofar={}", _prs(proc.sofar()));
2567  size_t ii = proc.rpos;
2568  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2569  if(numnl_following)
2570  {
2571  proc.set('\n', numnl_following);
2572  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2573  }
2574  else
2575  {
2576  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2577  if(ret != npos)
2578  {
2579  proc.set(' ');
2580  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2581  }
2582  else
2583  {
2584  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2585  ii = proc.src.len;
2586  }
2587  }
2588  proc.rpos = ii;
2589 }
2590 
2591 template<class EventHandler>
2592 template<class FilterProcessor>
2593 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2594 {
2595  _RYML_ASSERT_PARSE_(this->callbacks(), indentation != npos, m_evt_handler->m_curr->pos);
2596  _c4dbgfps("before={}", _prs(proc.src));
2597 
2598  while(proc.has_more_chars())
2599  {
2600  const char curr = proc.curr();
2601  _c4dbgfps("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2602  switch(curr)
2603  {
2604  case ' ':
2605  _RYML_WITH_TAB_TOKENS(case '\t':)
2606  _c4dbgfps("whitespace", curr);
2607  _filter_ws_skip_trailing(proc);
2608  break;
2609  case '\n':
2610  _c4dbgfps("newline", curr);
2611  _filter_nl_plain(proc, /*indentation*/indentation);
2612  break;
2613  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2614  _c4dbgfps("carriage return, ignore", curr);
2615  proc.skip();
2616  break;
2617  default:
2618  proc.copy();
2619  break;
2620  }
2621  }
2622 
2623  _c4dbgfps("after={}", _prs(proc.sofar()));
2624 
2625  return proc.result();
2626 }
2627 
2628 #undef _c4dbgfps
2629 
2630 
2631 template<class EventHandler>
2632 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2633 {
2634  FilterProcessorSrcDst proc(scalar, dst);
2635  return _filter_plain(proc, indentation);
2636 }
2637 
2638 template<class EventHandler>
2639 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2640 {
2641  FilterProcessorInplaceEndExtending proc(dst, cap);
2642  return _filter_plain(proc, indentation);
2643 }
2644 
2645 
2646 //-----------------------------------------------------------------------------
2647 //-----------------------------------------------------------------------------
2648 //-----------------------------------------------------------------------------
2649 /* single quoted */
2650 
2651 // a debugging scaffold:
2652 #if 0
2653 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2654 #else
2655 #define _c4dbgfsq(fmt, ...)
2656 #endif
2657 
2658 template<class EventHandler>
2659 template<class FilterProcessor>
2660 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2661 {
2662  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2663 
2664  _c4dbgfsq("found newline. sofar={}", _prs(proc.sofar()));
2665  size_t ii = proc.rpos;
2666  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2667  if(numnl_following)
2668  {
2669  proc.set('\n', numnl_following);
2670  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2671  }
2672  else
2673  {
2674  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2675  if(ret != npos)
2676  {
2677  proc.set(' ');
2678  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2679  }
2680  else
2681  {
2682  proc.set(' ');
2683  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2684  }
2685  }
2686  proc.rpos = ii;
2687 }
2688 
2689 template<class EventHandler>
2690 template<class FilterProcessor>
2691 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2692 {
2693  _c4dbgfsq("before={}", _prs(proc.src));
2694 
2695  // from the YAML spec for double-quoted scalars:
2696  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2697  while(proc.has_more_chars())
2698  {
2699  const char curr = proc.curr();
2700  _c4dbgfsq("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2701  switch(curr)
2702  {
2703  case ' ':
2704  case '\t':
2705  _c4dbgfsq("whitespace", curr);
2706  _filter_ws_copy_trailing(proc);
2707  break;
2708  case '\n':
2709  _c4dbgfsq("newline", curr);
2710  _filter_nl_squoted(proc);
2711  break;
2712  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2713  _c4dbgfsq("skip cr", curr);
2714  proc.skip();
2715  break;
2716  case '\'':
2717  _c4dbgfsq("squote", curr);
2718  if(proc.next() == '\'')
2719  {
2720  _c4dbgfsq("two consecutive squotes", curr);
2721  proc.skip();
2722  proc.copy();
2723  }
2724  else
2725  {
2726  _c4err("filter error");
2727  }
2728  break;
2729  default:
2730  proc.copy();
2731  break;
2732  }
2733  }
2734 
2735  _c4dbgfsq(": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2736 
2737  return proc.result();
2738 }
2739 
2740 #undef _c4dbgfsq
2741 
2742 template<class EventHandler>
2743 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2744 {
2745  FilterProcessorSrcDst proc(scalar, dst);
2746  return _filter_squoted(proc);
2747 }
2748 
2749 template<class EventHandler>
2750 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2751 {
2752  FilterProcessorInplaceEndExtending proc(dst, cap);
2753  return _filter_squoted(proc);
2754 }
2755 
2756 
2757 //-----------------------------------------------------------------------------
2758 //-----------------------------------------------------------------------------
2759 //-----------------------------------------------------------------------------
2760 /* double quoted */
2761 
2762 // a debugging scaffold:
2763 #if 0
2764 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2765 #else
2766 #define _c4dbgfdq(...)
2767 #endif
2768 
2769 template<class EventHandler>
2770 template<class FilterProcessor>
2771 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2772 {
2773  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2774 
2775  _c4dbgfdq("found newline. sofar={}", _prs(proc.sofar()));
2776  size_t ii = proc.rpos;
2777  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2778  if(numnl_following)
2779  {
2780  proc.set('\n', numnl_following);
2781  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2782  }
2783  else
2784  {
2785  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2786  if(ret != npos)
2787  {
2788  proc.set(' ');
2789  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2790  }
2791  else
2792  {
2793  proc.set(' ');
2794  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2795  }
2796  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2797  {
2798  _c4dbgfdq("backslash at [{}]", ii);
2799  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2800  if(next == ' ' || next == '\t')
2801  {
2802  _c4dbgfdq("extend skip to backslash", "");
2803  ++ii;
2804  }
2805  }
2806  }
2807  proc.rpos = ii;
2808 }
2809 
2810 template<class EventHandler>
2811 template<class FilterProcessor>
2812 void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc, size_t sz)
2813 {
2814  const size_t szp1 = sz + 1u;
2815  if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2816  _c4err("codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2817  char readbuf[8];
2818  csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2819  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2820  uint32_t codepoint_val = {};
2821  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2822  _c4err("failed to parse codepoint. scalar pos={}", proc.rpos);
2823  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2824  if(C4_UNLIKELY(numbytes == 0))
2825  _c4err("failed to decode code point={}", proc.rpos);
2826  _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2827  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/szp1);
2828  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2829 }
2830 
2831 template<class EventHandler>
2832 template<class FilterProcessor>
2833 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2834 {
2835  char next = proc.next();
2836  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2837  if(next == '\r')
2838  {
2839  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2840  {
2841  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2842  next = '\n';
2843  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2844  }
2845  }
2846 
2847  if(next == '\n')
2848  {
2849  size_t ii = proc.rpos + 2;
2850  for( ; ii < proc.src.len; ++ii)
2851  {
2852  // skip leading whitespace
2853  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2854  ;
2855  else
2856  break;
2857  }
2858  proc.skip(ii - proc.rpos);
2859  }
2860  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2861  {
2862  // escapes for json compatibility
2863  proc.translate_esc(next);
2864  _c4dbgfdq("here, used '{}'", _c4prc(next));
2865  }
2866  else if(next == '\r')
2867  {
2868  proc.skip();
2869  }
2870  else if(next == 'n')
2871  {
2872  proc.translate_esc('\n');
2873  }
2874  else if(next == 'r')
2875  {
2876  proc.translate_esc('\r');
2877  }
2878  else if(next == 't')
2879  {
2880  proc.translate_esc('\t');
2881  }
2882  else if(next == '\\')
2883  {
2884  proc.translate_esc('\\');
2885  }
2886  else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2887  {
2888  _filter_dquoted_backslash_decode(proc, 2u);
2889  }
2890  else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2891  {
2892  _filter_dquoted_backslash_decode(proc, 4u);
2893  }
2894  else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2895  {
2896  _filter_dquoted_backslash_decode(proc, 8u);
2897  }
2898  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2899  else if(next == '0')
2900  {
2901  proc.translate_esc('\0');
2902  }
2903  else if(next == 'b') // backspace
2904  {
2905  proc.translate_esc('\b');
2906  }
2907  else if(next == 'f') // form feed
2908  {
2909  proc.translate_esc('\f');
2910  }
2911  else if(next == 'a') // bell character
2912  {
2913  proc.translate_esc('\a');
2914  }
2915  else if(next == 'v') // vertical tab
2916  {
2917  proc.translate_esc('\v');
2918  }
2919  else if(next == 'e') // escape character
2920  {
2921  proc.translate_esc('\x1b');
2922  }
2923  else if(next == '_') // unicode non breaking space \u00a0
2924  {
2925  // https://www.compart.com/en/unicode/U+00a0
2926  const char payload[] = {
2927  _RYML_CHCONST(-0x3e, 0xc2),
2928  _RYML_CHCONST(-0x60, 0xa0),
2929  };
2930  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2931  }
2932  else if(next == 'N') // unicode next line \u0085
2933  {
2934  // https://www.compart.com/en/unicode/U+0085
2935  const char payload[] = {
2936  _RYML_CHCONST(-0x3e, 0xc2),
2937  _RYML_CHCONST(-0x7b, 0x85),
2938  };
2939  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2940  }
2941  else if(next == 'L') // unicode line separator \u2028
2942  {
2943  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2944  const char payload[] = {
2945  _RYML_CHCONST(-0x1e, 0xe2),
2946  _RYML_CHCONST(-0x80, 0x80),
2947  _RYML_CHCONST(-0x58, 0xa8),
2948  };
2949  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2950  }
2951  else if(next == 'P') // unicode paragraph separator \u2029
2952  {
2953  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2954  const char payload[] = {
2955  _RYML_CHCONST(-0x1e, 0xe2),
2956  _RYML_CHCONST(-0x80, 0x80),
2957  _RYML_CHCONST(-0x57, 0xa9),
2958  };
2959  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2960  }
2961  else if(next == '\0')
2962  {
2963  proc.skip();
2964  }
2965  else
2966  {
2967  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2968  }
2969  _c4dbgfdq("backslash...sofar={}", _prs(proc.sofar()));
2970 }
2971 
2972 
2973 template<class EventHandler>
2974 template<class FilterProcessor>
2975 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2976 {
2977  _c4dbgfdq("before={}", _prs(proc.src));
2978  // from the YAML spec for double-quoted scalars:
2979  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2980  while(proc.has_more_chars())
2981  {
2982  const char curr = proc.curr();
2983  _c4dbgfdq("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
2984  switch(curr)
2985  {
2986  case ' ':
2987  case '\t':
2988  {
2989  _c4dbgfdq("whitespace", curr);
2990  _filter_ws_copy_trailing(proc);
2991  break;
2992  }
2993  case '\n':
2994  {
2995  _c4dbgfdq("newline", curr);
2996  _filter_nl_dquoted(proc);
2997  break;
2998  }
2999  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
3000  {
3001  _c4dbgfdq("carriage return, ignore", curr);
3002  proc.skip();
3003  break;
3004  }
3005  case '\\':
3006  {
3007  _filter_dquoted_backslash(proc);
3008  break;
3009  }
3010  default:
3011  {
3012  proc.copy();
3013  break;
3014  }
3015  }
3016  }
3017  _c4dbgfdq("after={}", _prs(proc.sofar()));
3018  return proc.result();
3019 }
3020 
3021 #undef _c4dbgfdq
3022 
3023 
3024 template<class EventHandler>
3025 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
3026 {
3027  FilterProcessorSrcDst proc(scalar, dst);
3028  return _filter_dquoted(proc);
3029 }
3030 
3031 template<class EventHandler>
3032 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
3033 {
3034  FilterProcessorInplaceMidExtending proc(dst, cap);
3035  return _filter_dquoted(proc);
3036 }
3037 
3038 
3039 //-----------------------------------------------------------------------------
3040 //-----------------------------------------------------------------------------
3041 //-----------------------------------------------------------------------------
3042 // block filtering helpers
3043 
3044 C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
3045 {
3046  if(indentation + 1 > s.len)
3047  return npos;
3048  for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
3049  {
3050  if(s.str[i] == '\n')
3051  {
3052  csubstr rem = s.sub(i + 1);
3053  size_t first = rem.first_not_of(' ');
3054  first = (first != npos) ? first : rem.len;
3055  if(first > indentation)
3056  return i;
3057  }
3058  }
3059  return npos;
3060 }
3061 
3062 template<class EventHandler>
3063 template<class FilterProcessor>
3064 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
3065 {
3066  _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3067  _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos, m_evt_handler->m_curr->pos);
3068 
3069  // a debugging scaffold:
3070  #if 0
3071  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3072  #else
3073  #define _c4dbgchomp(...)
3074  #endif
3075 
3076  // advance to the last line having spaces beyond the indentation
3077  {
3078  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3079  if(last != npos)
3080  {
3081  _c4dbgchomp("found newline and larger indentation. last={}", last);
3082  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
3083  _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3084  // remove indentation spaces, copy the rest
3085  while((proc.rpos < last) && proc.has_more_chars())
3086  {
3087  const char curr = proc.curr();
3088  _c4dbgchomp("curr='{}'", _c4prc(curr));
3089  switch(curr)
3090  {
3091  case '\n':
3092  {
3093  _c4dbgchomp("newline! remlen={}", proc.rem().len);
3094  proc.copy();
3095  // are there spaces after the newline?
3096  csubstr at_next_line = proc.rem();
3097  if(at_next_line.begins_with(' '))
3098  {
3099  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
3100  // there are spaces.
3101  size_t first_non_space = at_next_line.first_not_of(' ');
3102  _c4dbgchomp("first_non_space={}", first_non_space);
3103  if(first_non_space == npos)
3104  {
3105  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
3106  first_non_space = at_next_line.len;
3107  }
3108  if(first_non_space <= indentation)
3109  {
3110  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
3111  proc.skip(first_non_space);
3112  }
3113  else
3114  {
3115  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
3116  proc.skip(indentation);
3117  // copy the spaces after the indentation
3118  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3119  proc.copy(first_non_space - indentation);
3120  }
3121  }
3122  break;
3123  }
3124  case '\r':
3125  proc.skip();
3126  break;
3127  }
3128  }
3129  }
3130  }
3131 
3132  // from now on, we only have line ends (or indentation spaces)
3133  switch(chomp)
3134  {
3135  case CHOMP_CLIP:
3136  {
3137  bool had_one = false;
3138  while(proc.has_more_chars())
3139  {
3140  const char curr = proc.curr();
3141  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
3142  switch(curr)
3143  {
3144  case '\n':
3145  {
3146  _c4dbgchomp("copy newline!", curr);
3147  proc.copy();
3148  proc.set_at_end();
3149  had_one = true;
3150  break;
3151  }
3152  case ' ':
3153  case '\r':
3154  _c4dbgchomp("skip!", curr);
3155  proc.skip();
3156  break;
3157  }
3158  }
3159  if(!had_one) // there were no newline characters. add one.
3160  {
3161  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3162  proc.set('\n');
3163  }
3164  break;
3165  }
3166  case CHOMP_KEEP:
3167  {
3168  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3169  while(proc.has_more_chars())
3170  {
3171  const char curr = proc.curr();
3172  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3173  switch(curr)
3174  {
3175  case '\n':
3176  _c4dbgchomp("copy newline!", curr);
3177  proc.copy();
3178  break;
3179  case ' ':
3180  case '\r':
3181  _c4dbgchomp("skip!", curr);
3182  proc.skip();
3183  break;
3184  }
3185  }
3186  break;
3187  }
3188  case CHOMP_STRIP:
3189  {
3190  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3191  // nothing to do!
3192  break;
3193  }
3194  }
3195 
3196  #undef _c4dbgchomp
3197 }
3198 
3199 
3200 // a debugging scaffold:
3201 #if 0
3202 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3203 #else
3204 #define _c4dbgfb(...)
3205 #endif
3206 
3207 template<class EventHandler>
3208 template<class FilterProcessor>
3209 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3210 {
3211  csubstr rem = proc.rem(); // remaining
3212  if(rem.len)
3213  {
3214  size_t first = rem.first_not_of(' ');
3215  if(first != npos)
3216  {
3217  _c4dbgfb("{} spaces follow before next nonws character", first);
3218  if(first < indentation)
3219  {
3220  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3221  proc.skip(first);
3222  }
3223  else
3224  {
3225  _c4dbgfb("skip {} spaces from indentation", indentation);
3226  proc.skip(indentation);
3227  }
3228  }
3229  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3230  else
3231  {
3232  _c4dbgfb("all spaces to the end: {} spaces", first);
3233  first = rem.len;
3234  if(first)
3235  {
3236  if(first < indentation)
3237  {
3238  _c4dbgfb("skip everything", first);
3239  proc.skip(proc.src.len - proc.rpos);
3240  }
3241  else
3242  {
3243  _c4dbgfb("skip {} spaces from indentation", indentation);
3244  proc.skip(indentation);
3245  }
3246  }
3247  }
3248  #endif
3249  }
3250 }
3251 
3252 template<class EventHandler>
3253 template<class FilterProcessor>
3254 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3255 {
3256  csubstr contents = proc.src.trimr(" \n\r");
3257  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3258  if(!contents.len)
3259  {
3260  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3261  if(chomp == CHOMP_KEEP && proc.src.len)
3262  {
3263  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3264  while(proc.has_more_chars())
3265  {
3266  const char curr = proc.curr();
3267  if(curr == '\n')
3268  proc.copy();
3269  else
3270  proc.skip();
3271  }
3272  if(!proc.wpos)
3273  {
3274  proc.set('\n');
3275  }
3276  }
3277  }
3278  return contents.len;
3279 }
3280 
3281 template<class EventHandler>
3282 template<class FilterProcessor>
3283 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3284 {
3285  _c4dbgfb("contents_len={}", contents_len);
3286 
3287  _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3288 
3289  // extend contents to just before the first newline at the end,
3290  // in case it is preceded by spaces
3291  size_t firstnewl = proc.src.first_of('\n', contents_len);
3292  if(firstnewl != npos)
3293  {
3294  contents_len = firstnewl;
3295  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3296  }
3297  else
3298  {
3299  contents_len = proc.src.len;
3300  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3301  }
3302 
3303  return contents_len;
3304 }
3305 
3306 #undef _c4dbgfb
3307 
3308 
3309 //-----------------------------------------------------------------------------
3310 //-----------------------------------------------------------------------------
3311 //-----------------------------------------------------------------------------
3312 
3313 // a debugging scaffold:
3314 #if 0
3315 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3316 #else
3317 #define _c4dbgfbl(...)
3318 #endif
3319 
3320 template<class EventHandler>
3321 template<class FilterProcessor>
3322 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3323 {
3324  _c4dbgfbl("indentation={} before={}", indentation, _prs(proc.src));
3325 
3326  size_t contents_len = _handle_all_whitespace(proc, chomp);
3327  if(!contents_len)
3328  return proc.result();
3329 
3330  contents_len = _extend_to_chomp(proc, contents_len);
3331 
3332  _c4dbgfbl("to filter={}", _prs(proc.src.first(contents_len)));
3333 
3334  _filter_block_indentation(proc, indentation);
3335 
3336  // now filter the bulk
3337  while(proc.has_more_chars(/*maxpos*/contents_len))
3338  {
3339  const char curr = proc.curr();
3340  _c4dbgfbl("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3341  switch(curr)
3342  {
3343  case '\n':
3344  {
3345  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3346  proc.copy(); // copy the newline
3347  _filter_block_indentation(proc, indentation);
3348  break;
3349  }
3350  case '\r':
3351  proc.skip();
3352  break;
3353  default:
3354  proc.copy();
3355  break;
3356  }
3357  }
3358 
3359  _c4dbgfbl("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3360 
3361  _filter_chomp(proc, chomp, indentation);
3362 
3363  _c4dbgfbl("final={}", _prs(proc.sofar()));
3364 
3365  return proc.result();
3366 }
3367 
3368 #undef _c4dbgfbl
3369 
3370 template<class EventHandler>
3371 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3372 {
3373  FilterProcessorSrcDst proc(scalar, dst);
3374  return _filter_block_literal(proc, indentation, chomp);
3375 }
3376 
3377 template<class EventHandler>
3378 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3379 {
3380  FilterProcessorInplaceEndExtending proc(scalar, cap);
3381  return _filter_block_literal(proc, indentation, chomp);
3382 }
3383 
3384 
3385 //-----------------------------------------------------------------------------
3386 //-----------------------------------------------------------------------------
3387 //-----------------------------------------------------------------------------
3388 
3389 // a debugging scaffold:
3390 #if 0
3391 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3392 #else
3393 #define _c4dbgfbf(...)
3394 #endif
3395 
3396 
3397 template<class EventHandler>
3398 template<class FilterProcessor>
3399 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3400 {
3401  _filter_block_indentation(proc, indentation);
3402  while(proc.has_more_chars(len))
3403  {
3404  const char curr = proc.curr();
3405  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3406  switch(curr)
3407  {
3408  case '\n':
3409  _c4dbgfbf("newline.", curr);
3410  proc.copy();
3411  _filter_block_indentation(proc, indentation);
3412  break;
3413  case '\r':
3414  proc.skip();
3415  break;
3416  case ' ':
3417  case '\t':
3418  {
3419  size_t first = proc.rem().first_not_of(" \t");
3420  _c4dbgfbf("space. first={}", first);
3421  if(first == npos)
3422  first = proc.rem().len;
3423  _c4dbgfbf("... indentation increased to {}", first);
3424  _filter_block_folded_indented_block(proc, indentation, len, first);
3425  break;
3426  }
3427  default:
3428  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3429  return;
3430  }
3431  }
3432 }
3433 
3434 template<class EventHandler>
3435 template<class FilterProcessor>
3436 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3437 {
3438  switch(num_newl)
3439  {
3440  case 1u:
3441  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3442  wpos_at_first_newl = proc.wpos;
3443  proc.skip();
3444  proc.set(' ');
3445  break;
3446  case 2u:
3447  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3448  _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl != npos, m_evt_handler->m_curr->pos);
3449  _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ', m_evt_handler->m_curr->pos);
3450  _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3451  proc.skip();
3452  proc.set_at(wpos_at_first_newl, '\n');
3453  _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n', m_evt_handler->m_curr->pos);
3454  break;
3455  default:
3456  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3457  proc.copy();
3458  break;
3459  }
3460  return wpos_at_first_newl;
3461 }
3462 
3463 template<class EventHandler>
3464 template<class FilterProcessor>
3465 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3466 {
3467  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
3468  size_t num_newl = 0;
3469  size_t wpos_at_first_newl = npos;
3470  while(proc.has_more_chars(len))
3471  {
3472  const char curr = proc.curr();
3473  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3474  switch(curr)
3475  {
3476  case '\n':
3477  {
3478  _c4dbgfbf("newline. sofar={}", num_newl);
3479  // NOTE: vs2022-32bit-release builds were giving wrong
3480  // results in this block, if it was written as either
3481  // as a switch(num_newl) or its equivalent if-form.
3482  //
3483  // For this reason, we're using a dedicated function
3484  // (**_compress), which seems to work around the issue.
3485  //
3486  // The manifested problem was that somewhere between the
3487  // assignment to curr and this point, proc.wpos (the
3488  // write-position of the processor) jumped to npos, which
3489  // made the write wrap-around! To make things worse,
3490  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3491  // problem go away!
3492  //
3493  // The only way to make the problem appear with prints
3494  // enabled was by disabling all prints in this function
3495  // (including in the block which was moved to the compress
3496  // function) and then selectively enabling only some of
3497  // those prints.
3498  //
3499  // This may be due to some bug in the cl-x86 optimizer; or
3500  // it may be triggered by some UB which may be
3501  // inadvertedly present in this function or in the filter
3502  // processor. This is despite our best efforts to weed out
3503  // any such UB problem: neither clang-tidy nor none of the
3504  // sanitizers, or gcc's -fanalyzer pointed to any problems
3505  // in this code.
3506  //
3507  // In the end, moving this block to a separate function
3508  // was the only way to bury the problem. But it may
3509  // resurface again, as The Undead, rising to from the
3510  // grave to haunt us with his terrible presence.
3511  //
3512  // We may have to revisit this. With a stake, and lots of
3513  // garlic.
3514  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3515  _filter_block_indentation(proc, indentation);
3516  break;
3517  }
3518  case ' ':
3519  case '\t':
3520  {
3521  size_t first = proc.rem().first_not_of(" \t");
3522  _c4dbgfbf("space. first={}", first);
3523  if(first == npos)
3524  first = proc.rem().len;
3525  _c4dbgfbf("... indentation increased to {}", first);
3526  if(num_newl)
3527  {
3528  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3529  proc.set_at(wpos_at_first_newl, '\n');
3530  }
3531  if(num_newl > 1u)
3532  {
3533  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3534  proc.set('\n');
3535  }
3536  _filter_block_folded_indented_block(proc, indentation, len, first);
3537  num_newl = 0;
3538  wpos_at_first_newl = npos;
3539  break;
3540  }
3541  case '\r':
3542  proc.skip();
3543  break;
3544  default:
3545  _c4dbgfbf("not space, not newline. stop.", 0);
3546  return;
3547  }
3548  }
3549 }
3550 
3551 
3552 template<class EventHandler>
3553 template<class FilterProcessor>
3554 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3555 {
3556  _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos), m_evt_handler->m_curr->pos);
3557  if(curr_indentation)
3558  proc.copy(curr_indentation);
3559  while(proc.has_more_chars(len))
3560  {
3561  const char curr = proc.curr();
3562  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3563  switch(curr)
3564  {
3565  case '\n':
3566  {
3567  proc.copy();
3568  _filter_block_indentation(proc, indentation);
3569  csubstr rem = proc.rem();
3570  const size_t first = rem.first_not_of(' ');
3571  _c4dbgfbf("newline. firstns={}", first);
3572  if(first == 0)
3573  {
3574  const char c = rem[first];
3575  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3576  if(c == '\n' || c == '\r')
3577  {
3578  ;
3579  }
3580  else
3581  {
3582  _c4dbgfbf("done with indented block", first);
3583  goto endloop;
3584  }
3585  }
3586  else if(first != npos)
3587  {
3588  proc.copy(first);
3589  _c4dbgfbf("copy all {} spaces", first);
3590  }
3591  break;
3592  }
3593  break;
3594  case '\r':
3595  proc.skip();
3596  break;
3597  default:
3598  proc.copy();
3599  break;
3600  }
3601  }
3602  endloop:
3603  return;
3604 }
3605 
3606 
3607 template<class EventHandler>
3608 template<class FilterProcessor>
3609 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3610 {
3611  _c4dbgfbf("indentation={} before={}", indentation, _prs(proc.src));
3612 
3613  size_t contents_len = _handle_all_whitespace(proc, chomp);
3614  if(!contents_len)
3615  return proc.result();
3616 
3617  contents_len = _extend_to_chomp(proc, contents_len);
3618 
3619  _c4dbgfbf("to filter={}", _prs(proc.src.first(contents_len)));
3620 
3621  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3622 
3623  // now filter the bulk
3624  while(proc.has_more_chars(/*maxpos*/contents_len))
3625  {
3626  const char curr = proc.curr();
3627  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3628  switch(curr)
3629  {
3630  case '\n':
3631  {
3632  _c4dbgfbf("found newline", curr);
3633  _filter_block_folded_newlines(proc, indentation, contents_len);
3634  break;
3635  }
3636  case '\r':
3637  proc.skip();
3638  break;
3639  default:
3640  proc.copy();
3641  break;
3642  }
3643  }
3644 
3645  _c4dbgfbf("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3646 
3647  _filter_chomp(proc, chomp, indentation);
3648 
3649  _c4dbgfbf("final={}", proc.sofar().len, _prs(proc.sofar()));
3650 
3651  return proc.result();
3652 }
3653 
3654 #undef _c4dbgfbf
3655 
3656 template<class EventHandler>
3657 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3658 {
3659  FilterProcessorSrcDst proc(scalar, dst);
3660  return _filter_block_folded(proc, indentation, chomp);
3661 }
3662 
3663 template<class EventHandler>
3664 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3665 {
3666  FilterProcessorInplaceEndExtending proc(scalar, cap);
3667  return _filter_block_folded(proc, indentation, chomp);
3668 }
3669 
3670 
3671 //-----------------------------------------------------------------------------
3672 //-----------------------------------------------------------------------------
3673 //-----------------------------------------------------------------------------
3674 
3675 template<class EventHandler>
3676 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3677 {
3678  _c4dbgpf("filtering plain scalar: s={}", _prs(s));
3679  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3680  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3681  _c4dbgpf("filtering plain scalar: success! s={}", _prs(r.get()));
3682  return r.get();
3683 }
3684 
3685 //-----------------------------------------------------------------------------
3686 
3687 template<class EventHandler>
3688 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3689 {
3690  _c4dbgpf("filtering squo scalar: s={}", _prs(s));
3691  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3692  _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3693  _c4dbgpf("filtering squo scalar: success! s={}", _prs(r.get()));
3694  return r.get();
3695 }
3696 
3697 
3698 //-----------------------------------------------------------------------------
3699 
3700 template<class EventHandler>
3701 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3702 {
3703  _c4dbgpf("filtering dquo scalar: s={}", _prs(s));
3704  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3705  if(C4_LIKELY(r.valid()))
3706  {
3707  _c4dbgpf("filtering dquo scalar: success! s={}", _prs(r.get()));
3708  return r.get();
3709  }
3710  else
3711  {
3712  const size_t len = r.required_len();
3713  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3714  substr dst = _alloc_arena(len, &s);
3715  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3716  if(dst.str)
3717  {
3718  _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3719  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3720  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3721  _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos); // may be smaller!
3722  _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3723  _c4dbgpf("filtering dquo scalar: success! s={}", _prs(rsd.get()));
3724  return rsd.get();
3725  }
3726  return dst;
3727  }
3728 }
3729 
3730 
3731 //-----------------------------------------------------------------------------
3732 
3733 template<class EventHandler>
3734 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3735 {
3736  if(s.is_sub(_buf()))
3737  {
3738  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3739  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3740  if(s.len)
3741  memmove(s.str - 1, s.str, s.len);
3742  --s.str;
3743  s.str[s.len] = '\n';
3744  ++s.len;
3745  return s;
3746  }
3747  else
3748  {
3749  substr dst = _alloc_arena(s.len + 1, &s);
3750  if(s.len)
3751  memcpy(dst.str, s.str, s.len);
3752  dst[s.len] = '\n';
3753  return dst;
3754  }
3755 }
3756 
3757 template<class EventHandler>
3758 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3759 {
3760  _c4dbgpf("filtering block literal scalar: s={}", _prs(s));
3761  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3762  csubstr result;
3763  if(C4_LIKELY(r.valid()))
3764  {
3765  result = r.get();
3766  }
3767  else
3768  {
3769  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3770  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3771  // this can only happen when adding a single newline in clip mode.
3772  // so we shift left the scalar by one place
3773  result = _move_scalar_left_and_add_newline(s);
3774  }
3775  _c4dbgpf("filtering block literal scalar: success! s={}", _prs(result));
3776  return result;
3777 }
3778 
3779 
3780 //-----------------------------------------------------------------------------
3781 template<class EventHandler>
3782 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3783 {
3784  _c4dbgpf("filtering block folded scalar: s={}", _prs(s));
3785  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3786  csubstr result;
3787  if(C4_LIKELY(r.valid()))
3788  {
3789  result = r.get();
3790  }
3791  else
3792  {
3793  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3794  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3795  // this can only happen when adding a single newline in clip mode.
3796  // so we shift left the scalar by one place
3797  result = _move_scalar_left_and_add_newline(s);
3798  }
3799  _c4dbgpf("filtering block folded scalar: success! s={}", _prs(result));
3800  return result;
3801 }
3802 
3803 
3804 //-----------------------------------------------------------------------------
3805 
3806 template<class EventHandler>
3807 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3808 {
3809  if(sc.needs_filter)
3810  {
3811  if(m_options.scalar_filtering())
3812  {
3813  return _filter_scalar_plain(sc.scalar, indentation);
3814  }
3815  else
3816  {
3817  _c4dbgp("plain scalar left unfiltered");
3818  m_evt_handler->mark_key_scalar_unfiltered();
3819  }
3820  }
3821  else
3822  {
3823  _c4dbgp("plain scalar doesn't need filtering");
3824  }
3825  return sc.scalar;
3826 }
3827 
3828 template<class EventHandler>
3829 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3830 {
3831  if(sc.needs_filter)
3832  {
3833  if(m_options.scalar_filtering())
3834  {
3835  return _filter_scalar_plain(sc.scalar, indentation);
3836  }
3837  else
3838  {
3839  _c4dbgp("plain scalar left unfiltered");
3840  m_evt_handler->mark_val_scalar_unfiltered();
3841  }
3842  }
3843  else
3844  {
3845  _c4dbgp("plain scalar doesn't need filtering");
3846  }
3847  return sc.scalar;
3848 }
3849 
3850 
3851 //-----------------------------------------------------------------------------
3852 
3853 template<class EventHandler>
3854 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3855 {
3856  if(sc.needs_filter)
3857  {
3858  if(m_options.scalar_filtering())
3859  {
3860  return _filter_scalar_squot(sc.scalar);
3861  }
3862  else
3863  {
3864  _c4dbgp("squo key scalar left unfiltered");
3865  m_evt_handler->mark_key_scalar_unfiltered();
3866  }
3867  }
3868  else
3869  {
3870  _c4dbgp("squo key scalar doesn't need filtering");
3871  }
3872  return sc.scalar;
3873 }
3874 
3875 template<class EventHandler>
3876 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3877 {
3878  if(sc.needs_filter)
3879  {
3880  if(m_options.scalar_filtering())
3881  {
3882  return _filter_scalar_squot(sc.scalar);
3883  }
3884  else
3885  {
3886  _c4dbgp("squo val scalar left unfiltered");
3887  m_evt_handler->mark_val_scalar_unfiltered();
3888  }
3889  }
3890  else
3891  {
3892  _c4dbgp("squo val scalar doesn't need filtering");
3893  }
3894  return sc.scalar;
3895 }
3896 
3897 
3898 //-----------------------------------------------------------------------------
3899 
3900 template<class EventHandler>
3901 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3902 {
3903  if(sc.needs_filter)
3904  {
3905  if(m_options.scalar_filtering())
3906  {
3907  return _filter_scalar_dquot(sc.scalar);
3908  }
3909  else
3910  {
3911  _c4dbgp("dquo scalar left unfiltered");
3912  m_evt_handler->mark_key_scalar_unfiltered();
3913  }
3914  }
3915  else
3916  {
3917  _c4dbgp("dquo scalar doesn't need filtering");
3918  }
3919  return sc.scalar;
3920 }
3921 
3922 template<class EventHandler>
3923 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3924 {
3925  if(sc.needs_filter)
3926  {
3927  if(m_options.scalar_filtering())
3928  {
3929  return _filter_scalar_dquot(sc.scalar);
3930  }
3931  else
3932  {
3933  _c4dbgp("dquo scalar left unfiltered");
3934  m_evt_handler->mark_val_scalar_unfiltered();
3935  }
3936  }
3937  else
3938  {
3939  _c4dbgp("dquo scalar doesn't need filtering");
3940  }
3941  return sc.scalar;
3942 }
3943 
3944 
3945 //-----------------------------------------------------------------------------
3946 
3947 template<class EventHandler>
3948 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3949 {
3950  if(m_options.scalar_filtering())
3951  {
3952  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3953  }
3954  else
3955  {
3956  _c4dbgp("literal scalar left unfiltered");
3957  m_evt_handler->mark_key_scalar_unfiltered();
3958  }
3959  return sb.scalar;
3960 }
3961 
3962 template<class EventHandler>
3963 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3964 {
3965  if(m_options.scalar_filtering())
3966  {
3967  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3968  }
3969  else
3970  {
3971  _c4dbgp("literal scalar left unfiltered");
3972  m_evt_handler->mark_val_scalar_unfiltered();
3973  }
3974  return sb.scalar;
3975 }
3976 
3977 
3978 //-----------------------------------------------------------------------------
3979 
3980 template<class EventHandler>
3981 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3982 {
3983  if(m_options.scalar_filtering())
3984  {
3985  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3986  }
3987  else
3988  {
3989  _c4dbgp("folded scalar left unfiltered");
3990  m_evt_handler->mark_key_scalar_unfiltered();
3991  }
3992  return sb.scalar;
3993 }
3994 
3995 template<class EventHandler>
3996 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3997 {
3998  if(m_options.scalar_filtering())
3999  {
4000  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4001  }
4002  else
4003  {
4004  _c4dbgp("folded scalar left unfiltered");
4005  m_evt_handler->mark_val_scalar_unfiltered();
4006  }
4007  return sb.scalar;
4008 }
4009 
4010 
4011 //-----------------------------------------------------------------------------
4012 //-----------------------------------------------------------------------------
4013 //-----------------------------------------------------------------------------
4014 
4015 #ifdef RYML_DBG // !!! <----------------------------------
4016 
4017 template<class EventHandler>
4018 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on)
4019 {
4020  ParserState *s = m_evt_handler->m_curr;
4021  char buf1_[64], buf2_[64], buf3_[64];
4022  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4023  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4024  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4025  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4026  s->flags |= on;
4027 }
4028 
4029 template<class EventHandler>
4030 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off)
4031 {
4032  ParserState *s = m_evt_handler->m_curr;
4033  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4034  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4035  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4036  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4037  csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4038  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4039  _RYML_ASSERT_BASIC((on & off) == ParserFlag_t(0));
4040  s->flags &= ~off;
4041  s->flags |= on;
4042 }
4043 
4044 template<class EventHandler>
4045 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off)
4046 {
4047  ParserState *s = m_evt_handler->m_curr;
4048  char buf1_[64], buf2_[64], buf3_[64];
4049  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4050  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4051  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4052  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4053  s->flags &= ~off;
4054 }
4055 
4056 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
4057 {
4058  size_t pos = 0;
4059  bool gotone = false;
4060 
4061  #define _prflag(fl) \
4062  if((flags & fl) == (fl)) \
4063  { \
4064  if(gotone) \
4065  { \
4066  if(pos + 1 < buf.len) \
4067  buf[pos] = '|'; \
4068  ++pos; \
4069  } \
4070  csubstr fltxt = #fl; \
4071  if(pos + fltxt.len <= buf.len) \
4072  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4073  pos += fltxt.len; \
4074  gotone = true; \
4075  }
4076 
4077  _prflag(RTOP);
4078  _prflag(RUNK);
4079  _prflag(RMAP);
4080  _prflag(RSEQ);
4081  _prflag(RFLOW);
4082  _prflag(RBLCK);
4083  _prflag(QMRK);
4084  _prflag(RKEY);
4085  _prflag(RVAL);
4086  _prflag(RKCL);
4087  _prflag(RNXT);
4088  _prflag(SSCL);
4089  _prflag(QSCL);
4090  _prflag(RSET);
4091  _prflag(RDOC);
4092  _prflag(NDOC);
4093  _prflag(USTY);
4094  _prflag(RSEQIMAP);
4095 
4096  #undef _prflag
4097 
4098  if(pos == 0)
4099  if(buf.len > 0)
4100  buf[pos++] = '0';
4101 
4102  _RYML_CHECK_BASIC(pos <= buf.len);
4103 
4104  return buf.first(pos);
4105 }
4106 
4107 #endif // RYML_DBG !!! <----------------------------------
4108 
4109 
4110 //-----------------------------------------------------------------------------
4111 //-----------------------------------------------------------------------------
4112 //-----------------------------------------------------------------------------
4113 
4114 template<class EventHandler>
4115 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
4116 {
4117  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4118  return _buf().sub(loc.offset);
4119 }
4120 
4121 template<class EventHandler>
4122 Location ParseEngine<EventHandler>::val_location(const char *val) const
4123 {
4124  if(C4_UNLIKELY(val == nullptr))
4125  return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4126  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4127  // NOTE: if any of these checks fails, the parser needs to be
4128  // instantiated with locations enabled.
4129  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4130  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4131  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4132  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4133  // NOTE: the pointer needs to belong to the buffer that was used to parse.
4134  csubstr src = _buf();
4135  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4136  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4137  // ok. search the first stored newline after the given ptr
4138  using lineptr_type = size_t const* C4_RESTRICT;
4139  lineptr_type lineptr = nullptr;
4140  size_t offset = (size_t)(val - src.begin());
4141  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4142  {
4143  // just do a linear search if the size is small.
4144  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4145  {
4146  if(*curr > offset)
4147  {
4148  lineptr = curr;
4149  break;
4150  }
4151  }
4152  }
4153  else
4154  {
4155  // do a bisection search if the size is not small.
4156  //
4157  // We could use std::lower_bound but this is simple enough and
4158  // spares the costly include of <algorithm>.
4159  size_t count = m_newline_offsets_size;
4160  lineptr = m_newline_offsets;
4161  while(count)
4162  {
4163  size_t step = count >> 1;
4164  lineptr_type it = lineptr + step;
4165  if(*it < offset)
4166  {
4167  lineptr = ++it;
4168  count -= step + 1;
4169  }
4170  else
4171  {
4172  count = step;
4173  }
4174  }
4175  }
4176  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4177  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4178  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4179  Location loc;
4180  loc.name = m_evt_handler->m_curr->pos.name;
4181  loc.offset = offset;
4182  loc.line = (size_t)(lineptr - m_newline_offsets);
4183  if(lineptr > m_newline_offsets)
4184  loc.col = (offset - *(lineptr-1) - 1u);
4185  else
4186  loc.col = offset;
4187  return loc;
4188 }
4189 
4190 template<class EventHandler>
4191 void ParseEngine<EventHandler>::_prepare_locations()
4192 {
4193  csubstr src = _buf();
4194  size_t numnewlines = 1u + src.count('\n');
4195  _resize_locations(numnewlines);
4196  m_newline_offsets_size = 0;
4197  for(size_t i = 0; i < src.len; i++)
4198  if(src.str[i] == '\n')
4199  m_newline_offsets[m_newline_offsets_size++] = i; // NOLINT
4200  m_newline_offsets[m_newline_offsets_size++] = src.len; // NOLINT
4201  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4202 }
4203 
4204 template<class EventHandler>
4205 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4206 {
4207  numnewlines = numnewlines >= 16 ? numnewlines : 16;
4208  if(numnewlines > m_newline_offsets_capacity)
4209  {
4210  if(m_newline_offsets)
4211  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4212  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4213  m_newline_offsets_capacity = numnewlines;
4214  }
4215 }
4216 
4217 template<class EventHandler>
4218 bool ParseEngine<EventHandler>::_locations_dirty() const
4219 {
4220  return !m_newline_offsets_size;
4221 }
4222 
4223 
4224 //-----------------------------------------------------------------------------
4225 //-----------------------------------------------------------------------------
4226 //-----------------------------------------------------------------------------
4227 
4228 template<class EventHandler>
4229 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4230 {
4231  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4232  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4233  {
4234  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4235  {
4236  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4237  _skipchars(" \t");
4238  }
4239  // comments
4240  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4241  {
4242  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4243  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4244  }
4245  }
4246 }
4247 
4248 
4249 template<class EventHandler>
4250 void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4251 {
4252  _c4dbgpf("flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4253  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4254  if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4255  {
4256  csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4257  _c4dbgpf("flow: after indentation={}", _prs(trimmed));
4258  if(trimmed.len && trimmed.triml(" \t").len)
4259  {
4260  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4261  _c4err("bad indentation");
4262  }
4263  }
4264 }
4265 
4266 template<class EventHandler>
4267 size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4268 {
4269  const size_t mark = m_evt_handler->m_curr->pos.offset;
4270  const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
4271  _c4dbgpf("block: mark={} firstpos={}", mark, firstpos);
4272  if(firstpos != npos)
4273  {
4274  _c4dbgp("block: non empty line");
4275  _line_progressed(firstpos);
4276  return mark;
4277  }
4278  else
4279  {
4280  _c4dbgp("block: rest of line is whitespace");
4281  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4282  return npos;
4283  }
4284 }
4285 
4286 template<class EventHandler>
4287 void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(size_t start_mark, size_t end_mark)
4288 {
4289  _c4dbgpf("block: start_mark={} end_mark={}", start_mark, end_mark);
4290  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4291  if(end_mark != start_mark)
4292  {
4293  csubstr leading = _buf().range(start_mark, end_mark);
4294  _c4dbgpf("block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading, true));
4295  if(leading.find('\t') != npos)
4296  _c4err("invalid tab character to the left");
4297  }
4298 }
4299 
4300 
4301 //-----------------------------------------------------------------------------
4302 
4303 
4304 template<class EventHandler>
4305 void ParseEngine<EventHandler>::_handle_colon()
4306 {
4307  size_t curr = m_evt_handler->m_curr->pos.line;
4308  if(C4_UNLIKELY(m_prev_colon != npos && curr == m_prev_colon))
4309  {
4310  _c4dbgpf("colon: prevline={} currline={}", m_prev_colon, curr);
4311  _c4err("two colons on same line");
4312  }
4313  _c4dbgpf("colon: set prevline={}->{}", m_prev_colon, curr);
4314  m_prev_colon = curr;
4315 }
4316 
4317 template<class EventHandler>
4318 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4319 {
4320  _c4dbgpf("store annotation[{}]: {}", dst->num_entries, _prs(str));
4321  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4322  dst->annotations[dst->num_entries].str = str;
4323  dst->annotations[dst->num_entries].indentation = {};
4324  dst->annotations[dst->num_entries].line = {};
4325  dst->annotations[dst->num_entries].orig = {};
4326  ++dst->num_entries;
4327 }
4328 
4329 template<class EventHandler>
4330 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4331 {
4332  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4333  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4334  if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4335  {
4336  _c4err("parse error");
4337  }
4338  dst->annotations[dst->num_entries].str = str;
4339  dst->annotations[dst->num_entries].indentation = indentation;
4340  dst->annotations[dst->num_entries].line = line;
4341  dst->annotations[dst->num_entries].orig = {};
4342  ++dst->num_entries;
4343 }
4344 
4345 template<class EventHandler>
4346 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line, csubstr orig)
4347 {
4348  _c4dbgpf("store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4349  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4350  if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4351  {
4352  _c4err("parse error");
4353  }
4354  dst->annotations[dst->num_entries].str = str;
4355  dst->annotations[dst->num_entries].indentation = indentation;
4356  dst->annotations[dst->num_entries].line = line;
4357  dst->annotations[dst->num_entries].orig = orig;
4358  ++dst->num_entries;
4359 }
4360 
4361 template<class EventHandler>
4362 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4363 {
4364  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4365 }
4366 
4367 template<class EventHandler>
4368 bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4369 {
4370  if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4371  return false;
4372  _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4373  if(m_pending_tags.num_entries)
4374  {
4375  _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4376  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4377  {
4378  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4379  _clear_annotations(&m_pending_tags);
4380  }
4381  else
4382  {
4383  _c4err("too many tags");
4384  }
4385  }
4386  if(m_pending_anchors.num_entries)
4387  {
4388  _c4dbgpf("handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4389  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4390  {
4391  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4392  _clear_annotations(&m_pending_anchors);
4393  }
4394  else
4395  {
4396  _c4err("too many anchors");
4397  }
4398  }
4399  m_evt_handler->set_key_scalar_plain_empty();
4400  m_evt_handler->set_val_scalar_plain_empty();
4401  return true;
4402 }
4403 
4404 template<class EventHandler>
4405 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4406 {
4407  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4408  if(m_pending_tags.num_entries)
4409  {
4410  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4411  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4412  {
4413  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4414  _clear_annotations(&m_pending_tags);
4415  }
4416  else
4417  {
4418  _c4err("too many tags"); // LCOV_EXCL_LINE
4419  }
4420  }
4421  if(m_pending_anchors.num_entries)
4422  {
4423  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4424  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4425  {
4426  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4427  _clear_annotations(&m_pending_anchors);
4428  }
4429  else
4430  {
4431  _c4err("too many anchors"); // LCOV_EXCL_LINE
4432  }
4433  }
4434 }
4435 
4436 template<class EventHandler>
4437 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4438 {
4439  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4440  if(m_pending_tags.num_entries)
4441  {
4442  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4443  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4444  {
4445  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4446  _clear_annotations(&m_pending_tags);
4447  }
4448  else
4449  {
4450  _c4err("too many tags");
4451  }
4452  }
4453  if(m_pending_anchors.num_entries)
4454  {
4455  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4456  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4457  {
4458  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4459  _clear_annotations(&m_pending_anchors);
4460  }
4461  else
4462  {
4463  _c4err("too many anchors");
4464  }
4465  }
4466 }
4467 
4468 template<class EventHandler>
4469 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4470 {
4471  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4472  if(m_pending_tags.num_entries == 2)
4473  {
4474  _c4dbgp("2 tags, setting entry 0");
4475  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4476  }
4477  else if(m_pending_tags.num_entries == 1)
4478  {
4479  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4480  if(m_pending_tags.annotations[0].line < current_line)
4481  {
4482  _c4dbgp("...tag is for the map. setting it.");
4483  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4484  _clear_annotations(&m_pending_tags);
4485  }
4486  }
4487  //
4488  if(m_pending_anchors.num_entries == 2)
4489  {
4490  _c4dbgp("2 anchors, setting entry 0");
4491  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4492  }
4493  else if(m_pending_anchors.num_entries == 1)
4494  {
4495  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4496  if(m_pending_anchors.annotations[0].line < current_line)
4497  {
4498  _c4dbgp("...anchor is for the map. setting it.");
4499  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4500  _clear_annotations(&m_pending_anchors);
4501  }
4502  }
4503 }
4504 
4505 template<class EventHandler>
4506 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4507 {
4508  _c4dbgp("annotations_before_start_mapblck_as_key");
4509  switch(m_pending_tags.num_entries)
4510  {
4511  case 1u:
4512  _c4dbgpf("annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4513  if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4514  {
4515  _c4dbgp("annotations_after_start_mapblck_as_key: is map tag");
4516  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4517  _clear_annotations(&m_pending_tags);
4518  }
4519  break;
4520  case 2u:
4521  _c4dbgpf("annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4522  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4523  break;
4524  }
4525  switch(m_pending_anchors.num_entries)
4526  {
4527  case 1u:
4528  _c4dbgpf("annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4529  if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4530  {
4531  _c4dbgp("annotations_after_start_mapblck_as_key: is map anchor");
4532  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4533  _clear_annotations(&m_pending_anchors);
4534  }
4535  break;
4536  case 2u:
4537  _c4dbgpf("annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4538  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4539  break;
4540  }
4541 }
4542 
4543 template<class EventHandler>
4544 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4545 {
4546  _c4dbgp("annotations_after_start_mapblck");
4547  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4548  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4549  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4550  {
4551  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4552  switch(m_pending_tags.num_entries)
4553  {
4554  case 1u:
4555  _c4dbgpf("annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4556  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4557  _clear_annotations(&m_pending_tags);
4558  break;
4559  case 2u:
4560  _c4dbgpf("annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4561  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4562  _clear_annotations(&m_pending_tags);
4563  break;
4564  }
4565  switch(m_pending_anchors.num_entries)
4566  {
4567  case 1u:
4568  _c4dbgpf("annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4569  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4570  _clear_annotations(&m_pending_anchors);
4571  break;
4572  case 2u:
4573  _c4dbgpf("annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4574  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4575  _clear_annotations(&m_pending_anchors);
4576  break;
4577  }
4578  }
4579  _set_indentation(key_indentation);
4580 }
4581 
4582 template<class EventHandler>
4583 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4584 {
4585  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4586  // select the left-most annotation on the max line
4587  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4588  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4589  {
4590  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4591  if(ann.line > curr->line)
4592  curr = &ann;
4593  else if(ann.indentation < curr->indentation)
4594  curr = &ann;
4595  }
4596  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4597  {
4598  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4599  if(ann.line > curr->line)
4600  curr = &ann;
4601  else if(ann.indentation < curr->indentation)
4602  curr = &ann;
4603  }
4604  return curr->line < val_line ? val_indentation : curr->indentation;
4605 }
4606 
4607 template<class EventHandler>
4608 void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4609 {
4610  if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4611  m_evt_handler->set_key_ref(alias);
4612  else
4613  _c4err("aliases cannot have anchors or tags");
4614 }
4615 
4616 template<class EventHandler>
4617 void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4618 {
4619  if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4620  m_evt_handler->set_val_ref(alias);
4621  else
4622  _c4err("aliases cannot have anchors or tags");
4623 }
4624 
4625 template<class EventHandler>
4626 csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4627 {
4628  _c4dbgpf("resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4629  _c4assert(tag.is_sub(_buf()));
4630  TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4631  if(ret)
4632  {
4633  _c4dbgpf("resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4634  return ret.resolved;
4635  }
4636  _c4dbgpf("resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4637  size_t bufsz = 0;
4638  substr buf = m_evt_handler->arena_rem();
4639  TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4640  csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4641  m_evt_handler->m_curr->pos,
4642  m_evt_handler->m_stack.m_callbacks);
4643  _c4dbgpf("resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4644  _c4assert((bufsz > buf.len) == (!ttag.str));
4645  _c4assert(!!bufsz == (ttag.len == bufsz));
4646  // try again if the arena size was not enough
4647  if(!ttag.str)
4648  {
4649  _c4dbgpf("tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4650  _c4assert(ttag.len == bufsz);
4651  buf = _alloc_arena(bufsz, &tag);
4652  if(buf.str) // the alloc may fail eg with the ints handler
4653  {
4654  ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4655  m_evt_handler->m_curr->pos,
4656  m_evt_handler->m_stack.m_callbacks);
4657  }
4658  _c4assert(ttag.len == bufsz);
4659  _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4660  }
4661  else if(bufsz) // if we succeeded writing into the arena, grow it as needed
4662  {
4663  _c4dbgp("tag required arena. update size");
4664  _c4assert(ttag.len == bufsz);
4665  _c4assert(ttag.is_sub(buf));
4666  (void)_alloc_arena(bufsz);
4667  }
4668  C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127) // conditional expression is constant
4669  if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4670  {
4671  _c4dbgpf("handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4672  // is the resolved tag not in any of those buffers?
4673  if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4674  {
4675  _c4dbgpf("copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4676  buf = _alloc_arena(ttag.len, &tag);
4677  if(buf.str) // the alloc may fail eg with the ints handler
4678  memcpy(buf.str, ttag.str, ttag.len);
4679  ttag.str = buf.str; // keep the current len!
4680  _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4681  }
4682  }
4683  C4_SUPPRESS_WARNING_MSVC_POP
4684  _c4dbgpf("resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4685  _c4assert(ttag.len > 0);
4686  // cache the hard-earned result!
4687  m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4688  return ttag;
4689 }
4690 
4691 template<class EventHandler>
4692 bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT version) const
4693 {
4694  _c4assert(directive->begins_with("%YAML"));
4695  size_t version_start = directive->first_not_of(" \t", 5);
4696  if(version_start != npos)
4697  {
4698  csubstr digits = "0123456789";
4699  size_t major_end = directive->first_not_of(digits, version_start);
4700  if(major_end != npos && directive->str[major_end] == '.') // single dot
4701  {
4702  size_t minor_end = directive->first_not_of(digits, major_end + 1);
4703  if(minor_end == npos)
4704  minor_end = directive->len;
4705  _set_first_strict(*directive, minor_end);
4706  *version = directive->range(version_start, minor_end);
4707  _c4dbgpf("%YAML: version={} full={}", *version, _prs(*directive, true));
4708  return true;
4709  }
4710  }
4711  return false;
4712 }
4713 
4714 template<class EventHandler>
4715 bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix) const
4716 {
4717  _c4assert(directive->begins_with("%TAG"));
4718  csubstr whitespace = " \t";
4719  size_t handle_start = directive->first_not_of(whitespace, 4);
4720  if(handle_start != npos && directive->str[handle_start] == '!')
4721  {
4722  size_t handle_end = directive->first_of(whitespace, handle_start);
4723  if(handle_end != npos)
4724  {
4725  size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4726  if(prefix_start != npos)
4727  {
4728  size_t prefix_end = directive->first_of(whitespace, prefix_start);
4729  if(prefix_end == npos)
4730  prefix_end = directive->len;
4731  _set_first_strict(*directive, prefix_end);
4732  *handle = directive->range(handle_start, handle_end);
4733  *prefix = directive->range(prefix_start, prefix_end);
4734  _c4dbgpf("%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive, true));
4735  if(is_valid_tag_handle(*handle))
4736  return true;
4737  }
4738  }
4739  }
4740  return false;
4741 }
4742 
4743 template<class EventHandler>
4744 void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4745 {
4746  _c4dbgpf("handle_directive: rem={}", _prs(directive, true));
4747  _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with('%'));
4748  _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4749  const char *err = nullptr;
4750  csubstr rem;
4751  size_t pos;
4752  auto isdirective = [](csubstr str, csubstr dir) {
4753  if(str.begins_with(dir))
4754  {
4755  csubstr rest = str.sub(dir.len);
4756  return (!rest.len || rest.str[0] == ' ' || rest.str[0] == '\t');
4757  }
4758  return false;
4759  };
4760  if(isdirective(directive, "%TAG"))
4761  {
4762  csubstr handle;
4763  csubstr prefix;
4764  if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4765  {
4766  err = "invalid %TAG directive";
4767  goto directive_error; // NOLINT
4768  }
4769  m_evt_handler->add_directive_tag(handle, prefix);
4770  }
4771  else if(isdirective(directive, "%YAML"))
4772  {
4773  csubstr version;
4774  if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &version)))
4775  {
4776  err = "invalid %YAML directive";
4777  goto directive_error; // NOLINT
4778  }
4779  if(C4_UNLIKELY(m_has_directives_yaml))
4780  {
4781  err = "multiple %YAML directives";
4782  goto directive_error; // NOLINT
4783  }
4784  m_has_directives_yaml = true;
4785  m_evt_handler->add_directive_yaml(version);
4786  }
4787  m_has_directives = true;
4788  rem = m_evt_handler->m_curr->line_contents.rem;
4789  pos = rem.first_not_of(" \t", directive.len);
4790  pos = pos != npos ? pos : rem.len;
4791  _line_progressed(pos);
4792  rem = rem.sub(pos);
4793  _c4dbgpf("handle_directive: rest={}", _prs(rem));
4794  if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
4795  {
4796  err = "invalid tokens after directive";
4797  goto directive_error; // NOLINT
4798  }
4799 directive_error:
4800  if(C4_UNLIKELY(err != nullptr))
4801  _c4err(err);
4802 }
4803 
4804 template<class EventHandler>
4805 bool ParseEngine<EventHandler>::_handle_bom()
4806 {
4807  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4808  if(rem.len)
4809  {
4810  const csubstr rest = rem.sub(1);
4811  // https://yaml.org/spec/1.2.2/#52-character-encodings
4812  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4813  if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4814  {
4815  _c4dbgp("byte order mark: UTF32BE");
4816  _handle_bom(UTF32BE);
4817  _line_progressed(4);
4818  m_bom_len = 4;
4819  return true;
4820  }
4821  else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4822  {
4823  _c4dbgp("byte order mark: UTF32LE");
4824  _handle_bom(UTF32LE);
4825  _line_progressed(4);
4826  m_bom_len = 4;
4827  return true;
4828  }
4829  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4830  {
4831  _c4dbgp("byte order mark: UTF16BE");
4832  _handle_bom(UTF16BE);
4833  _line_progressed(2);
4834  m_bom_len = 2;
4835  return true;
4836  }
4837  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4838  {
4839  _c4dbgp("byte order mark: UTF16LE");
4840  _handle_bom(UTF16LE);
4841  _line_progressed(2);
4842  m_bom_len = 2;
4843  return true;
4844  }
4845  else if(rem.begins_with("\xef\xbb\xbf"))
4846  {
4847  _c4dbgp("byte order mark: UTF8");
4848  _handle_bom(UTF8);
4849  _line_progressed(3);
4850  m_bom_len = 3;
4851  return true;
4852  }
4853  #undef _rymlisascii
4854  }
4855  return false;
4856 }
4857 
4858 template<class EventHandler>
4859 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4860 {
4861  if(m_encoding == NOBOM)
4862  {
4863  if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4864  m_encoding = enc;
4865  else
4866  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4867  }
4868  else if(enc != m_encoding)
4869  {
4870  _c4err("byte order mark can only be set once");
4871  }
4872 }
4873 
4874 
4875 //-----------------------------------------------------------------------------
4876 
4877 template<class EventHandler>
4878 void ParseEngine<EventHandler>::_handle_seq_json()
4879 {
4880 seqjson_start:
4881  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4882 
4883  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
4884  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
4885  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
4886  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
4887  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
4888 
4889  _handle_flow_skip_whitespace();
4890  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4891  if(!rem.len)
4892  goto seqjson_again;
4893 
4894  if(has_any(RVAL))
4895  {
4896  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
4897  const char first = rem.str[0];
4898  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4899  switch(first)
4900  {
4901  case '"':
4902  {
4903  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4904  ScannedScalar sc = _scan_scalar_dquot();
4905  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4906  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4907  addrem_flags(RNXT, RVAL);
4908  break;
4909  }
4910  case '[':
4911  {
4912  _c4dbgp("seqjson[RVAL]: start child seqjson");
4913  addrem_flags(RNXT, RVAL);
4914  m_evt_handler->begin_seq_val_flow();
4915  addrem_flags(RVAL, RNXT);
4916  _line_progressed(1);
4917  break;
4918  }
4919  case '{':
4920  {
4921  _c4dbgp("seqjson[RVAL]: start child mapjson");
4922  addrem_flags(RNXT, RVAL);
4923  m_evt_handler->begin_map_val_flow();
4924  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4925  _line_progressed(1);
4926  goto seqjson_finish;
4927  }
4928  case ']': // this happens on a trailing comma like ", ]"
4929  {
4930  _c4dbgp("seqjson[RVAL]: end!");
4931  rem_flags(RSEQ);
4932  _end_seq_flow();
4933  _line_progressed(1);
4934  if(!has_all(RSEQ|RFLOW))
4935  goto seqjson_finish;
4936  break;
4937  }
4938  default:
4939  {
4940  ScannedScalar sc;
4941  if(_scan_scalar_seq_json(&sc))
4942  {
4943  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4944  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4945  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4946  addrem_flags(RNXT, RVAL);
4947  }
4948  else
4949  {
4950  _c4err("parse error");
4951  }
4952  }
4953  }
4954  }
4955  else // RNXT
4956  {
4957  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
4958  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
4959  const char first = rem.str[0];
4960  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4961  switch(first)
4962  {
4963  case ',':
4964  {
4965  _c4dbgp("seqjson[RNXT]: expect next val");
4966  addrem_flags(RVAL, RNXT);
4967  m_evt_handler->add_sibling();
4968  _line_progressed(1);
4969  break;
4970  }
4971  case ']':
4972  {
4973  _c4dbgp("seqjson[RNXT]: end!");
4974  _end_seq_flow();
4975  _line_progressed(1);
4976  goto seqjson_finish;
4977  }
4978  default:
4979  _c4err("parse error");
4980  }
4981  }
4982 
4983  seqjson_again:
4984  _c4dbgt("seqjson: go again", 0);
4985  if(_finished_line())
4986  {
4987  if(C4_LIKELY(!_finished_file()))
4988  {
4989  _line_ended();
4990  _scan_line();
4991  _c4dbgnextline();
4992  }
4993  else
4994  {
4995  _c4err("missing terminating ]");
4996  }
4997  }
4998  goto seqjson_start;
4999 
5000  seqjson_finish:
5001  _c4dbgp("seqjson: finish");
5002 }
5003 
5004 
5005 //-----------------------------------------------------------------------------
5006 
5007 template<class EventHandler>
5008 void ParseEngine<EventHandler>::_handle_map_json()
5009 {
5010 mapjson_start:
5011  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5012 
5013  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5014  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5015  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5016  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT), m_evt_handler->m_curr->pos);
5017  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
5018 
5019  _handle_flow_skip_whitespace();
5020  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5021  if(!rem.len)
5022  goto mapjson_again;
5023 
5024  if(has_any(RKEY))
5025  {
5026  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5027  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5028  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5029  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5030  const char first = rem.str[0];
5031  _c4dbgpf("mapjson[RKEY]: '{}'", first);
5032  switch(first)
5033  {
5034  case '"':
5035  {
5036  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
5037  ScannedScalar sc = _scan_scalar_dquot();
5038  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5039  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5040  addrem_flags(RKCL, RKEY);
5041  break;
5042  }
5043  case '}': // this happens on a trailing comma like ", }"
5044  {
5045  _c4dbgp("mapjson[RKEY]: end!");
5046  _end_map_flow();
5047  _line_progressed(1);
5048  goto mapjson_finish;
5049  }
5050  default:
5051  _c4err("parse error");
5052  }
5053  }
5054  else if(has_any(RVAL))
5055  {
5056  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5057  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5058  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5059  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5060  const char first = rem.str[0];
5061  _c4dbgpf("mapjson[RVAL]: '{}'", first);
5062  switch(first)
5063  {
5064  case '"':
5065  {
5066  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
5067  ScannedScalar sc = _scan_scalar_dquot();
5068  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5069  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5070  addrem_flags(RNXT, RVAL);
5071  break;
5072  }
5073  case '[':
5074  {
5075  _c4dbgp("mapjson[RVAL]: start val seqjson");
5076  addrem_flags(RNXT, RVAL);
5077  m_evt_handler->begin_seq_val_flow();
5078  _set_indentation(m_evt_handler->m_parent->indref);
5079  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5080  _line_progressed(1);
5081  goto mapjson_finish;
5082  }
5083  case '{':
5084  {
5085  _c4dbgp("mapjson[RVAL]: start val mapjson");
5086  addrem_flags(RNXT, RVAL);
5087  m_evt_handler->begin_map_val_flow();
5088  _set_indentation(m_evt_handler->m_parent->indref);
5089  addrem_flags(RKEY, RNXT);
5090  _line_progressed(1);
5091  // keep going in this function
5092  break;
5093  }
5094  default:
5095  {
5096  ScannedScalar sc;
5097  if(_scan_scalar_map_json(&sc))
5098  {
5099  _c4dbgp("mapjson[RVAL]: plain scalar.");
5100  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5101  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5102  addrem_flags(RNXT, RVAL);
5103  }
5104  else
5105  {
5106  _c4err("parse error");
5107  }
5108  break;
5109  }
5110  }
5111  }
5112  else if(has_any(RKCL)) // read the key colon
5113  {
5114  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5115  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5116  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5117  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5118  const char first = rem.str[0];
5119  _c4dbgpf("mapjson[RKCL]: '{}'", first);
5120  if(first == ':')
5121  {
5122  _c4dbgp("mapjson[RKCL]: found the colon");
5123  addrem_flags(RVAL, RKCL);
5124  _line_progressed(1);
5125  }
5126  else
5127  {
5128  _c4err("parse error");
5129  }
5130  }
5131  else if(has_any(RNXT))
5132  {
5133  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5134  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5135  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5136  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5137  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
5138  if(rem.begins_with(','))
5139  {
5140  _c4dbgp("mapjson[RNXT]: expect next keyval");
5141  m_evt_handler->add_sibling();
5142  addrem_flags(RKEY, RNXT);
5143  _line_progressed(1);
5144  }
5145  else if(rem.begins_with('}'))
5146  {
5147  _c4dbgp("mapjson[RNXT]: end!");
5148  _end_map_flow();
5149  _line_progressed(1);
5150  goto mapjson_finish;
5151  }
5152  else
5153  {
5154  _c4err("parse error"); // LCOV_EXCL_LINE
5155  }
5156  }
5157 
5158  mapjson_again:
5159  _c4dbgt("mapjson: go again", 0);
5160  if(_finished_line())
5161  {
5162  if(C4_LIKELY(!_finished_file()))
5163  {
5164  _line_ended();
5165  _scan_line();
5166  _c4dbgnextline();
5167  }
5168  else
5169  {
5170  _c4err("missing terminating }");
5171  }
5172  }
5173  goto mapjson_start;
5174 
5175  mapjson_finish:
5176  _c4dbgp("mapjson: finish");
5177 }
5178 
5179 
5180 //-----------------------------------------------------------------------------
5181 
5182 template<class EventHandler>
5183 void ParseEngine<EventHandler>::_handle_seq_imap()
5184 {
5185 seqimap_start:
5186  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5187 
5188  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP), m_evt_handler->m_curr->pos);
5189  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5190  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL), m_evt_handler->m_curr->pos);
5191  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL), m_evt_handler->m_curr->pos);
5192  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5193 
5194  _handle_flow_skip_whitespace();
5195  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5196  if(!rem.len)
5197  goto seqimap_again;
5198 
5199  if(has_any(RVAL))
5200  {
5201  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
5202  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5203  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5204  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5205  const char first = rem.str[0];
5206  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
5207  ScannedScalar sc;
5208  if(first == '\'')
5209  {
5210  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
5211  sc = _scan_scalar_squot();
5212  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5213  _handle_annotations_before_blck_val_scalar();
5214  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5215  _end_map_flow();
5216  goto seqimap_finish;
5217  }
5218  else if(first == '"')
5219  {
5220  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
5221  sc = _scan_scalar_dquot();
5222  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5223  _handle_annotations_before_blck_val_scalar();
5224  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5225  _end_map_flow();
5226  goto seqimap_finish;
5227  }
5228  // block scalars (ie | and >) cannot appear in flow containers
5229  else if(_scan_scalar_plain_map_flow(&sc))
5230  {
5231  _c4dbgp("seqimap[RVAL]: it's a scalar.");
5232  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5233  _handle_annotations_before_blck_val_scalar();
5234  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5235  _end_map_flow();
5236  goto seqimap_finish;
5237  }
5238  else if(first == '[')
5239  {
5240  _c4dbgp("seqimap[RVAL]: start child seqflow");
5241  addrem_flags(RNXT, RVAL);
5242  _handle_annotations_before_blck_val_scalar();
5243  m_evt_handler->begin_seq_val_flow();
5244  addrem_flags(RVAL, RNXT|RSEQIMAP);
5245  _set_indentation(m_evt_handler->m_parent->indref);
5246  _line_progressed(1);
5247  goto seqimap_finish;
5248  }
5249  else if(first == '{')
5250  {
5251  _c4dbgp("seqimap[RVAL]: start child mapflow");
5252  addrem_flags(RNXT, RVAL);
5253  _handle_annotations_before_blck_val_scalar();
5254  m_evt_handler->begin_map_val_flow();
5255  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
5256  _set_indentation(m_evt_handler->m_parent->indref);
5257  _line_progressed(1);
5258  goto seqimap_finish;
5259  }
5260  else if(first == ',' || first == ']')
5261  {
5262  _c4dbgp("seqimap[RVAL]: finish without val.");
5263  _handle_annotations_before_blck_val_scalar();
5264  m_evt_handler->set_val_scalar_plain_empty();
5265  _end_map_flow();
5266  goto seqimap_finish;
5267  }
5268  else if(first == '*')
5269  {
5270  csubstr ref = _scan_ref_seq();
5271  _c4dbgpf("seqimap[RVAL]: ref! {}", _prs(ref));
5272  _handle_valref(ref);
5273  addrem_flags(RNXT, RVAL);
5274  }
5275  else if(first == '&')
5276  {
5277  csubstr anchor = _scan_anchor();
5278  _c4dbgpf("seqimap[RVAL]: anchor! {}", _prs(anchor));
5279  _add_annotation(&m_pending_anchors, anchor);
5280  }
5281  else if(first == '!')
5282  {
5283  csubstr tag = _scan_tag();
5284  _c4dbgpf("seqimap[RVAL]: tag! {}", _prs(tag));
5285  _add_annotation(&m_pending_tags, tag);
5286  }
5287  else
5288  {
5289  _c4err("parse error"); // LCOV_EXCL_LINE
5290  }
5291  }
5292  else if(has_any(RNXT))
5293  {
5294  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5295  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5296  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5297  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5298  const char first = rem.str[0];
5299  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
5300  if(first == ',' || first == ']')
5301  {
5302  // we may get here because a map or a seq started and we
5303  // return later
5304  _c4dbgp("seqimap: done");
5305  _end_map_flow();
5306  goto seqimap_finish;
5307  }
5308  else
5309  {
5310  _c4err("parse error"); // LCOV_EXCL_LINE
5311  }
5312  }
5313  else if(has_any(QMRK))
5314  {
5315  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK), m_evt_handler->m_curr->pos);
5316  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5317  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5318  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5319  const char first = rem.str[0];
5320  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
5321  ScannedScalar sc;
5322  if(first == '\'')
5323  {
5324  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
5325  sc = _scan_scalar_squot();
5326  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5327  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5328  addrem_flags(RKCL, QMRK);
5329  goto seqimap_again;
5330  }
5331  else if(first == '"')
5332  {
5333  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
5334  sc = _scan_scalar_dquot();
5335  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5336  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5337  addrem_flags(RKCL, QMRK);
5338  goto seqimap_again;
5339  }
5340  // block scalars (ie | and >) cannot appear in flow containers
5341  else if(_scan_scalar_plain_map_flow(&sc))
5342  {
5343  _c4dbgp("seqimap[QMRK]: it's a scalar.");
5344  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5345  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5346  addrem_flags(RKCL, QMRK);
5347  goto seqimap_again;
5348  }
5349  else if(first == '[')
5350  {
5351  _c4dbgp("seqimap[QMRK]: start child seqflow");
5352  addrem_flags(RKCL, QMRK);
5353  m_evt_handler->begin_seq_key_flow();
5354  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
5355  _set_indentation(m_evt_handler->m_parent->indref);
5356  _line_progressed(1);
5357  goto seqimap_finish;
5358  }
5359  else if(first == '{')
5360  {
5361  _c4dbgp("seqimap[QMRK]: start child mapflow");
5362  addrem_flags(RKCL, QMRK);
5363  m_evt_handler->begin_map_key_flow();
5364  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
5365  _set_indentation(m_evt_handler->m_parent->indref);
5366  _line_progressed(1);
5367  goto seqimap_finish;
5368  }
5369  else if(first == ',' || first == ']')
5370  {
5371  _c4dbgp("seqimap[QMRK]: finish without key.");
5372  m_evt_handler->set_key_scalar_plain_empty();
5373  m_evt_handler->set_val_scalar_plain_empty();
5374  _end_map_flow();
5375  goto seqimap_finish;
5376  }
5377  else if(first == '&')
5378  {
5379  csubstr anchor = _scan_anchor();
5380  _c4dbgp("seqimap[QMRK]: anchor!");
5381  m_evt_handler->set_key_anchor(anchor);
5382  }
5383  else if(first == '*')
5384  {
5385  csubstr ref = _scan_ref_seq();
5386  _c4dbgp("seqimap[QMRK]: ref!");
5387  _handle_keyref(ref);
5388  addrem_flags(RKCL, QMRK);
5389  }
5390  else
5391  {
5392  _c4err("parse error"); // LCOV_EXCL_LINE
5393  }
5394  }
5395  else if(has_any(RKCL))
5396  {
5397  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5398  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5399  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5400  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL), m_evt_handler->m_curr->pos);
5401  const char first = rem.str[0];
5402  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
5403  if(first == ':')
5404  {
5405  _c4dbgp("seqimap[RKCL]: found ':'");
5406  addrem_flags(RVAL, RKCL);
5407  _line_progressed(1);
5408  goto seqimap_again;
5409  }
5410  else if(first == ',' || first == ']')
5411  {
5412  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
5413  m_evt_handler->set_val_scalar_plain_empty();
5414  _end_map_flow();
5415  goto seqimap_finish;
5416  }
5417  else
5418  {
5419  _c4err("parse error"); // LCOV_EXCL_LINE
5420  }
5421  }
5422 
5423  seqimap_again:
5424  _c4dbgt("seqimap: go again", 0);
5425  if(_finished_line())
5426  {
5427  if(C4_LIKELY(!_finished_file()))
5428  {
5429  _line_ended();
5430  _scan_line();
5431  _c4dbgnextline();
5432  }
5433  else
5434  {
5435  _c4err("parse error");
5436  }
5437  }
5438  goto seqimap_start;
5439 
5440  seqimap_finish:
5441  _c4dbgp("seqimap: finish");
5442 }
5443 
5444 
5445 //-----------------------------------------------------------------------------
5446 
5447 template<class EventHandler>
5448 void ParseEngine<EventHandler>::_handle_seq_flow()
5449 {
5450 seqflow_start:
5451  _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5452 
5453  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5454  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
5455  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5456  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
5457  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
5458  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
5459 
5460  if(m_evt_handler->m_curr->at_line_beginning())
5461  {
5462  _handle_flow_line_beginning();
5463  }
5464 
5465  _handle_flow_skip_whitespace();
5466  if(!m_evt_handler->m_curr->line_contents.rem.len)
5467  goto seqflow_again;
5468 
5469  if(has_any(RVAL))
5470  {
5471  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5472  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5473  ScannedScalar sc;
5474  if(first == '\'')
5475  {
5476  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5477  sc = _scan_scalar_squot();
5478  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5479  _handle_annotations_before_blck_val_scalar();
5480  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5481  addrem_flags(RNXT, RVAL);
5482  _mark_seqflow_val_end();
5483  }
5484  else if(first == '"')
5485  {
5486  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5487  sc = _scan_scalar_dquot();
5488  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5489  _handle_annotations_before_blck_val_scalar();
5490  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5491  addrem_flags(RNXT, RVAL);
5492  _mark_seqflow_val_end();
5493  }
5494  // block scalars (ie | and >) cannot appear in flow containers
5495  else if(_scan_scalar_plain_seq_flow(&sc))
5496  {
5497  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5498  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5499  _handle_annotations_before_blck_val_scalar();
5500  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5501  addrem_flags(RNXT, RVAL);
5502  _mark_seqflow_val_end();
5503  }
5504  else if(first == '[')
5505  {
5506  _c4dbgp("seqflow[RVAL]: start child seqflow");
5507  addrem_flags(RNXT, RVAL);
5508  _handle_annotations_before_blck_val_scalar();
5509  m_evt_handler->begin_seq_val_flow();
5510  _set_indentation(m_evt_handler->m_parent->indref);
5511  addrem_flags(RVAL, RNXT);
5512  _line_progressed(1);
5513  }
5514  else if(first == '{')
5515  {
5516  _c4dbgp("seqflow[RVAL]: start child mapflow");
5517  addrem_flags(RNXT, RVAL);
5518  _handle_annotations_before_blck_val_scalar();
5519  m_evt_handler->begin_map_val_flow();
5520  _set_indentation(m_evt_handler->m_parent->indref);
5521  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5522  _line_progressed(1);
5523  goto seqflow_finish;
5524  }
5525  else if(first == ']') // this happens on cases such as [] or [.., ]
5526  {
5527  _c4dbgp("seqflow[RVAL]: end!");
5528  if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5529  {
5530  _c4dbgp("seqflow[RVAL]: add pending annotations");
5531  _handle_annotations_before_blck_val_scalar();
5532  m_evt_handler->set_val_scalar_plain_empty();
5533  }
5534  _line_progressed(1);
5535  _end_seq_flow();
5536  goto seqflow_finish;
5537  }
5538  else if(first == '*')
5539  {
5540  csubstr ref = _scan_ref_seq();
5541  _c4dbgpf("seqflow[RVAL]: ref! {}", _prs(ref));
5542  _handle_valref(ref);
5543  addrem_flags(RNXT, RVAL);
5544  }
5545  else if(first == '&')
5546  {
5547  csubstr anchor = _scan_anchor();
5548  _c4dbgpf("seqflow[RVAL]: anchor! {}", _prs(anchor));
5549  _add_annotation(&m_pending_anchors, anchor);
5550  }
5551  else if(first == '!')
5552  {
5553  csubstr tag = _scan_tag();
5554  _c4dbgpf("seqflow[RVAL]: tag! {}", _prs(tag));
5555  _add_annotation(&m_pending_tags, tag);
5556  }
5557  else if(first == ':')
5558  {
5559  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5560  addrem_flags(RNXT, RVAL);
5561  m_evt_handler->begin_map_val_flow();
5562  _set_indentation(m_evt_handler->m_parent->indref);
5563  _handle_annotations_before_blck_key_scalar();
5564  m_evt_handler->set_key_scalar_plain_empty();
5565  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5566  _line_progressed(1);
5567  goto seqflow_finish;
5568  }
5569  else if(first == '?')
5570  {
5571  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5572  addrem_flags(RNXT, RVAL);
5573  m_evt_handler->begin_map_val_flow();
5574  _set_indentation(m_evt_handler->m_parent->indref);
5575  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5576  _line_progressed(1);
5577  _maybe_skip_whitespace_tokens();
5578  goto seqflow_finish;
5579  }
5580  else if(first == ',')
5581  {
5582  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5583  {
5584  _c4dbgp("seqflow[RVAL]: add pending annotations");
5585  _handle_annotations_before_blck_val_scalar();
5586  m_evt_handler->set_val_scalar_plain_empty();
5587  addrem_flags(RNXT, RVAL);
5588  _mark_seqflow_val_end();
5589  }
5590  else
5591  {
5592  _c4err("parse error");
5593  }
5594  }
5595  else
5596  {
5597  _c4err("parse error");
5598  }
5599  }
5600  else // RNXT
5601  {
5602  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5603  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5604  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5605  if(first == ',')
5606  {
5607  _c4dbgp("seqflow[RNXT]: expect next val");
5608  addrem_flags(RVAL, RNXT);
5609  m_evt_handler->add_sibling();
5610  _line_progressed(1);
5611  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5612  {
5613  _c4err("parse error: invalid comment after comma");
5614  }
5615  _mark_seqflow_val_end();
5616  }
5617  else if(first == ']')
5618  {
5619  _c4dbgp("seqflow[RNXT]: end!");
5620  _line_progressed(1);
5621  _end_seq_flow();
5622  goto seqflow_finish;
5623  }
5624  else if(first == ':')
5625  {
5626  _c4dbgpf("seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5627  if(m_prev_val_end != NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5628  {
5629  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5630  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5631  _set_indentation(m_evt_handler->m_parent->indref);
5632  _line_progressed(1);
5633  addrem_flags(RSEQIMAP|RVAL, RNXT);
5634  goto seqflow_finish;
5635  }
5636  else
5637  {
5638  _c4err("parse error");
5639  }
5640  }
5641  else
5642  {
5643  _c4err("parse error");
5644  }
5645  }
5646 
5647  seqflow_again:
5648  _c4dbgt("seqflow: go again", 0);
5649  if(_finished_line())
5650  {
5651  if(C4_LIKELY(!_finished_file()))
5652  {
5653  _line_ended();
5654  _scan_line();
5655  _c4dbgnextline();
5656  }
5657  else
5658  {
5659  _c4err("missing terminating ]");
5660  }
5661  }
5662  goto seqflow_start;
5663 
5664  seqflow_finish:
5665  _c4dbgp("seqflow: finish");
5666 }
5667 
5668 
5669 //-----------------------------------------------------------------------------
5670 
5671 template<class EventHandler>
5672 void ParseEngine<EventHandler>::_handle_map_flow()
5673 {
5674 mapflow_start:
5675  _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5676 
5677  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5678  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5679  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
5680  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
5681 
5682  if(m_evt_handler->m_curr->at_line_beginning())
5683  {
5684  _handle_flow_line_beginning();
5685  }
5686 
5687  _handle_flow_skip_whitespace();
5688  if(!m_evt_handler->m_curr->line_contents.rem.len)
5689  goto mapflow_again;
5690 
5691  if(has_any(RKEY))
5692  {
5693  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5694  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5695  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5696  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5697  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5698  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5699  ScannedScalar sc;
5700  if(first == '\'')
5701  {
5702  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5703  sc = _scan_scalar_squot();
5704  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5705  _handle_annotations_before_blck_key_scalar();
5706  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5707  addrem_flags(RKCL, RKEY|QMRK);
5708  }
5709  else if(first == '"')
5710  {
5711  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5712  sc = _scan_scalar_dquot();
5713  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5714  _handle_annotations_before_blck_key_scalar();
5715  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5716  addrem_flags(RKCL, RKEY|QMRK);
5717  }
5718  // block scalars (ie | and >) cannot appear in flow containers
5719  else if(_scan_scalar_plain_map_flow(&sc))
5720  {
5721  _c4dbgp("mapflow[RKEY]: plain scalar");
5722  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5723  _handle_annotations_before_blck_key_scalar();
5724  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5725  addrem_flags(RKCL, RKEY|QMRK);
5726  }
5727  else if(first == '?')
5728  {
5729  _c4dbgp("mapflow[RKEY]: explicit key");
5730  _handle_annotations_before_blck_key_scalar();
5731  addrem_flags(QMRK, RKEY);
5732  _line_progressed(1);
5733  _maybe_skip_whitespace_tokens();
5734  }
5735  else if(first == ':')
5736  {
5737  _c4dbgp("mapflow[RKEY]: setting empty key");
5738  _handle_annotations_before_blck_key_scalar();
5739  m_evt_handler->set_key_scalar_plain_empty();
5740  addrem_flags(RVAL, RKEY|QMRK);
5741  _line_progressed(1);
5742  _maybe_skip_whitespace_tokens();
5743  }
5744  else if(first == ',')
5745  {
5746  _c4dbgp("mapflow[RKEY]: comma!");
5747  if(!_handle_annotations_before_unexpected_flow_token_rkey())
5748  _c4err("unexpected comma");
5749  addrem_flags(RNXT, RKEY|QMRK);
5750  // keep going in this function
5751  }
5752  else if(first == '}') // this happens on a trailing comma like ", }"
5753  {
5754  _c4dbgp("mapflow[RKEY]: end!");
5755  (void)_handle_annotations_before_unexpected_flow_token_rkey();
5756  _line_progressed(1);
5757  _end_map_flow();
5758  goto mapflow_finish;
5759  }
5760  else if(first == '&')
5761  {
5762  csubstr anchor = _scan_anchor();
5763  _c4dbgpf("mapflow[RKEY]: key anchor! {}", _prs(anchor));
5764  _add_annotation(&m_pending_anchors, anchor);
5765  }
5766  else if(first == '!')
5767  {
5768  csubstr tag = _scan_tag();
5769  _c4dbgpf("mapflow[RKEY]: tag! {}", _prs(tag));
5770  _add_annotation(&m_pending_tags, tag);
5771  }
5772  else if(first == '*')
5773  {
5774  csubstr ref = _scan_ref_map();
5775  _c4dbgpf("mapflow[RKEY]: key ref! {}", _prs(ref));
5776  _handle_keyref(ref);
5777  addrem_flags(RKCL, RKEY);
5778  }
5779  else if(first == '[')
5780  {
5781  // RYML's tree cannot store container keys, but that's
5782  // handled inside the tree event handler. Other handler
5783  // types may be able to handle it.
5784  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5785  _handle_annotations_before_blck_key_scalar();
5786  addrem_flags(RKCL, RKEY);
5787  m_evt_handler->begin_seq_key_flow();
5788  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5789  _set_indentation(m_evt_handler->m_parent->indref);
5790  _line_progressed(1);
5791  goto mapflow_finish;
5792  }
5793  else if(first == '{')
5794  {
5795  // RYML's tree cannot store container keys, but that's
5796  // handled inside the tree event handler. Other handler
5797  // types may be able to handle it.
5798  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5799  _handle_annotations_before_blck_key_scalar();
5800  addrem_flags(RKCL, RKEY);
5801  m_evt_handler->begin_map_key_flow();
5802  addrem_flags(RKEY, RVAL|RKCL);
5803  _set_indentation(m_evt_handler->m_parent->indref);
5804  _line_progressed(1);
5805  // keep going in this function
5806  }
5807  else
5808  {
5809  _c4err("parse error"); // LCOV_EXCL_LINE
5810  }
5811  }
5812  else if(has_any(RKCL)) // read the key colon
5813  {
5814  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5815  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5816  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5817  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5818  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5819  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5820  if(first == ':')
5821  {
5822  _c4dbgp("mapflow[RKCL]: found the colon");
5823  addrem_flags(RVAL, RKCL);
5824  _line_progressed(1);
5825  }
5826  else if(first == '}')
5827  {
5828  _c4dbgp("mapflow[RKCL]: end with missing val!");
5829  addrem_flags(RVAL, RKCL);
5830  m_evt_handler->set_val_scalar_plain_empty();
5831  _line_progressed(1);
5832  _end_map_flow();
5833  goto mapflow_finish;
5834  }
5835  else if(first == ',')
5836  {
5837  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5838  m_evt_handler->set_val_scalar_plain_empty();
5839  m_evt_handler->add_sibling();
5840  addrem_flags(RKEY, RKCL);
5841  _line_progressed(1);
5842  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5843  {
5844  _c4err("parse error: invalid comment after comma");
5845  }
5846  }
5847  else
5848  {
5849  _c4err("parse error");
5850  }
5851  }
5852  else if(has_any(RVAL))
5853  {
5854  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5855  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5856  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5857  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5858  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5859  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5860  ScannedScalar sc;
5861  if(first == '\'')
5862  {
5863  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5864  sc = _scan_scalar_squot();
5865  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5866  _handle_annotations_before_blck_val_scalar();
5867  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5868  addrem_flags(RNXT, RVAL);
5869  }
5870  else if(first == '"')
5871  {
5872  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5873  sc = _scan_scalar_dquot();
5874  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5875  _handle_annotations_before_blck_val_scalar();
5876  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5877  addrem_flags(RNXT, RVAL);
5878  }
5879  // block scalars (ie | and >) cannot appear in flow containers
5880  else if(_scan_scalar_plain_map_flow(&sc))
5881  {
5882  _c4dbgp("mapflow[RVAL]: plain scalar.");
5883  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5884  _handle_annotations_before_blck_val_scalar();
5885  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5886  addrem_flags(RNXT, RVAL);
5887  }
5888  else if(first == '[')
5889  {
5890  _c4dbgp("mapflow[RVAL]: start val seqflow");
5891  addrem_flags(RNXT, RVAL);
5892  _handle_annotations_before_blck_val_scalar();
5893  m_evt_handler->begin_seq_val_flow();
5894  _set_indentation(m_evt_handler->m_parent->indref);
5895  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5896  _line_progressed(1);
5897  goto mapflow_finish;
5898  }
5899  else if(first == '{')
5900  {
5901  _c4dbgp("mapflow[RVAL]: start val mapflow");
5902  addrem_flags(RNXT, RVAL);
5903  _handle_annotations_before_blck_val_scalar();
5904  m_evt_handler->begin_map_val_flow();
5905  _set_indentation(m_evt_handler->m_parent->indref);
5906  addrem_flags(RKEY, RNXT);
5907  _line_progressed(1);
5908  // keep going in this function
5909  }
5910  else if(first == '}')
5911  {
5912  _c4dbgp("mapflow[RVAL]: end!");
5913  _handle_annotations_before_blck_val_scalar();
5914  m_evt_handler->set_val_scalar_plain_empty();
5915  _line_progressed(1);
5916  _end_map_flow();
5917  goto mapflow_finish;
5918  }
5919  else if(first == ',')
5920  {
5921  _c4dbgp("mapflow[RVAL]: empty val!");
5922  _handle_annotations_before_blck_val_scalar();
5923  m_evt_handler->set_val_scalar_plain_empty();
5924  addrem_flags(RNXT, RVAL);
5925  // keep going in this function
5926  }
5927  else if(first == '*')
5928  {
5929  csubstr ref = _scan_ref_map();
5930  _c4dbgpf("mapflow[RVAL]: key ref! {}", _prs(ref));
5931  _handle_valref(ref);
5932  addrem_flags(RNXT, RVAL);
5933  }
5934  else if(first == '&')
5935  {
5936  csubstr anchor = _scan_anchor();
5937  _c4dbgpf("mapflow[RVAL]: key anchor! {}", _prs(anchor));
5938  _add_annotation(&m_pending_anchors, anchor);
5939  }
5940  else if(first == '!')
5941  {
5942  csubstr tag = _scan_tag();
5943  _c4dbgpf("mapflow[RVAL]: tag! {}", _prs(tag));
5944  _add_annotation(&m_pending_tags, tag);
5945  }
5946  else
5947  {
5948  _c4err("parse error");
5949  }
5950  }
5951  else if(has_any(RNXT))
5952  {
5953  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5954  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5955  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5956  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5957  _c4dbgpf("mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5958  if(m_evt_handler->m_curr->line_contents.rem.begins_with(','))
5959  {
5960  _c4dbgp("mapflow[RNXT]: expect next keyval");
5961  m_evt_handler->add_sibling();
5962  addrem_flags(RKEY, RNXT);
5963  _line_progressed(1);
5964  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5965  {
5966  _c4err("parse error: invalid comment after comma");
5967  }
5968  }
5969  else if(m_evt_handler->m_curr->line_contents.rem.begins_with('}'))
5970  {
5971  _c4dbgp("mapflow[RNXT]: end!");
5972  _line_progressed(1);
5973  _end_map_flow();
5974  goto mapflow_finish;
5975  }
5976  else
5977  {
5978  _c4err("parse error");
5979  }
5980  }
5981  else if(has_any(QMRK))
5982  {
5983  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5984  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5985  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5986  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5987  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5988  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5989  ScannedScalar sc;
5990  if(first == '\'')
5991  {
5992  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5993  sc = _scan_scalar_squot();
5994  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5995  _handle_annotations_before_blck_key_scalar();
5996  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5997  addrem_flags(RKCL, QMRK);
5998  }
5999  else if(first == '"')
6000  {
6001  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
6002  sc = _scan_scalar_dquot();
6003  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6004  _handle_annotations_before_blck_key_scalar();
6005  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6006  addrem_flags(RKCL, QMRK);
6007  }
6008  // block scalars (ie | and >) cannot appear in flow containers
6009  else if(_scan_scalar_plain_map_flow(&sc))
6010  {
6011  _c4dbgp("mapflow[QMRK]: plain scalar");
6012  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6013  _handle_annotations_before_blck_key_scalar();
6014  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6015  addrem_flags(RKCL, QMRK);
6016  }
6017  else if(first == ':')
6018  {
6019  _c4dbgp("mapflow[QMRK]: setting empty key");
6020  _handle_annotations_before_blck_key_scalar();
6021  m_evt_handler->set_key_scalar_plain_empty();
6022  addrem_flags(RVAL, QMRK);
6023  _line_progressed(1);
6024  _maybe_skip_whitespace_tokens();
6025  }
6026  else if(first == '}') // this happens on a trailing comma like ", }"
6027  {
6028  _c4dbgp("mapflow[QMRK]: end!");
6029  _handle_annotations_before_blck_key_scalar();
6030  m_evt_handler->set_key_scalar_plain_empty();
6031  m_evt_handler->set_val_scalar_plain_empty();
6032  _end_map_flow();
6033  _line_progressed(1);
6034  goto mapflow_finish;
6035  }
6036  else if(first == ',')
6037  {
6038  _c4dbgp("mapflow[QMRK]: empty key+val!");
6039  _handle_annotations_before_blck_key_scalar();
6040  m_evt_handler->set_key_scalar_plain_empty();
6041  m_evt_handler->set_val_scalar_plain_empty();
6042  addrem_flags(RNXT, QMRK);
6043  }
6044  else if(first == '&')
6045  {
6046  csubstr anchor = _scan_anchor();
6047  _c4dbgpf("mapflow[QMRK]: key anchor! {}", _prs(anchor));
6048  _add_annotation(&m_pending_anchors, anchor);
6049  }
6050  else if(first == '*')
6051  {
6052  csubstr ref = _scan_ref_map();
6053  _c4dbgpf("mapflow[QMRK]: key ref! {}", _prs(ref));
6054  _handle_keyref(ref);
6055  addrem_flags(RKCL, QMRK);
6056  }
6057  else if(first == '[')
6058  {
6059  // RYML's tree cannot store container keys, but that's
6060  // handled inside the tree sink. Other sink types may be
6061  // able to handle it.
6062  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
6063  addrem_flags(RKCL, QMRK);
6064  _handle_annotations_before_blck_key_scalar();
6065  m_evt_handler->begin_seq_key_flow();
6066  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6067  _set_indentation(m_evt_handler->m_parent->indref);
6068  _line_progressed(1);
6069  goto mapflow_finish;
6070  }
6071  else if(first == '{')
6072  {
6073  // RYML's tree cannot store container keys, but that's
6074  // handled inside the tree sink. Other sink types may be
6075  // able to handle it.
6076  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
6077  addrem_flags(RKCL, QMRK);
6078  _handle_annotations_before_blck_key_scalar();
6079  m_evt_handler->begin_map_key_flow();
6080  _set_indentation(m_evt_handler->m_parent->indref);
6081  addrem_flags(RKEY, RKCL);
6082  _line_progressed(1);
6083  // keep going in this function
6084  }
6085  else if(first == '!')
6086  {
6087  csubstr tag = _scan_tag();
6088  _c4dbgpf("mapflow[QMRK]: tag! {}", _prs(tag));
6089  _add_annotation(&m_pending_tags, tag);
6090  }
6091  else
6092  {
6093  _c4err("parse error"); // LCOV_EXCL_LINE
6094  }
6095  }
6096 
6097  mapflow_again:
6098  _c4dbgt("mapflow: go again", 0);
6099  if(_finished_line())
6100  {
6101  if(C4_LIKELY(!_finished_file()))
6102  {
6103  _line_ended();
6104  _scan_line();
6105  _c4dbgnextline();
6106  }
6107  else
6108  {
6109  _c4err("missing terminating }");
6110  }
6111  }
6112  goto mapflow_start;
6113 
6114  mapflow_finish:
6115  _c4dbgp("mapflow: finish");
6116 }
6117 
6118 
6119 //-----------------------------------------------------------------------------
6120 
6121 template<class EventHandler>
6122 void ParseEngine<EventHandler>::_handle_seq_block()
6123 {
6124 seqblck_start:
6125  _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6126 
6127  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
6128  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6129  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
6130  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
6131 
6132  _maybe_skip_comment_strict();
6133  if(!m_evt_handler->m_curr->line_contents.rem.len)
6134  goto seqblck_again;
6135 
6136  if(has_any(RVAL))
6137  {
6138  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6139  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6140  if(m_evt_handler->m_curr->at_line_beginning())
6141  {
6142  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6143  if(m_evt_handler->m_curr->indentation_ge_extra())
6144  {
6145  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6146  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6147  if(!m_evt_handler->m_curr->line_contents.rem.len)
6148  goto seqblck_again;
6149  }
6150  else if(m_evt_handler->m_curr->indentation_lt_extra())
6151  {
6152  _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6153  if(m_evt_handler->m_curr->indentation_eq())
6154  {
6155  _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6156  _handle_annotations_before_blck_val_scalar();
6157  m_evt_handler->set_val_scalar_plain_empty();
6158  addrem_flags(RNXT, RVAL);
6159  goto seqblck_again;
6160  }
6161  else
6162  {
6163  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6164  _c4dbgp("seqblck[RVAL]: smaller indentation!");
6165  _handle_indentation_pop_from_block_seq();
6166  goto seqblck_finish;
6167  }
6168  }
6169  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6170  {
6171  _c4dbgp("seqblck[RVAL]: empty line!");
6172  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6173  goto seqblck_again;
6174  }
6175  }
6176  _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6177  const size_t startmark = _handle_block_skip_leading_whitespace();
6178  _c4dbgpf("seqblck[RVAL]: startmark={}", startmark);
6179  if(startmark == npos)
6180  {
6181  _c4dbgp("seqblck[RVAL]: whitespace only");
6182  goto seqblck_again;
6183  }
6184  const size_t tabmark = _handle_block_get_whitespace_mark();
6185  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6186  _c4dbgpf("seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6187  const size_t startline = m_evt_handler->m_curr->pos.line;
6188  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6189  ScannedScalar sc;
6190  if(first == '\'')
6191  {
6192  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
6193  sc = _scan_scalar_squot();
6194  if(!_maybe_scan_following_colon())
6195  {
6196  _c4dbgp("seqblck[RVAL]: set as val");
6197  _handle_annotations_before_blck_val_scalar();
6198  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6199  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6200  addrem_flags(RNXT, RVAL);
6201  }
6202  else
6203  {
6204  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6205  _handle_block_check_leading_tabs(startmark);
6206  addrem_flags(RNXT, RVAL);
6207  _handle_annotations_before_start_mapblck(startline);
6208  _handle_colon();
6209  m_evt_handler->begin_map_val_block();
6210  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6211  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6212  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6213  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6214  _maybe_skip_whitespace_tokens();
6215  goto seqblck_finish;
6216  }
6217  }
6218  else if(first == '"')
6219  {
6220  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
6221  sc = _scan_scalar_dquot();
6222  if(!_maybe_scan_following_colon())
6223  {
6224  _c4dbgp("seqblck[RVAL]: set as val");
6225  _handle_annotations_before_blck_val_scalar();
6226  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6227  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6228  addrem_flags(RNXT, RVAL);
6229  }
6230  else
6231  {
6232  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6233  addrem_flags(RNXT, RVAL);
6234  _handle_block_check_leading_tabs(startmark);
6235  _handle_annotations_before_start_mapblck(startline);
6236  _handle_colon();
6237  m_evt_handler->begin_map_val_block();
6238  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6239  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6240  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6241  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6242  _maybe_skip_whitespace_tokens();
6243  goto seqblck_finish;
6244  }
6245  }
6246  // block scalars can only appear as keys when in QMRK scope
6247  // (ie, after ? tokens), so no need to scan following colon in
6248  // here.
6249  else if(first == '|')
6250  {
6251  _c4dbgp("seqblck[RVAL]: block-literal scalar");
6252  ScannedBlock sb;
6253  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6254  _handle_annotations_before_blck_val_scalar();
6255  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6256  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6257  addrem_flags(RNXT, RVAL);
6258  }
6259  else if(first == '>')
6260  {
6261  _c4dbgp("seqblck[RVAL]: block-folded scalar");
6262  ScannedBlock sb;
6263  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6264  _handle_annotations_before_blck_val_scalar();
6265  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6266  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6267  addrem_flags(RNXT, RVAL);
6268  }
6269  else if(_scan_scalar_plain_seq_blck(&sc))
6270  {
6271  _c4dbgp("seqblck[RVAL]: plain scalar.");
6272  if(!_maybe_scan_following_colon())
6273  {
6274  _c4dbgp("seqblck[RVAL]: set as val");
6275  _handle_annotations_before_blck_val_scalar();
6276  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6277  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6278  addrem_flags(RNXT, RVAL);
6279  }
6280  else
6281  {
6282  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6283  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6284  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6285  _handle_block_check_leading_tabs(startmark, tabmark);
6286  addrem_flags(RNXT, RVAL);
6287  _handle_annotations_before_start_mapblck(startline);
6288  _handle_colon();
6289  m_evt_handler->begin_map_val_block();
6290  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6291  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6292  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6293  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6294  _maybe_skip_whitespace_tokens();
6295  goto seqblck_finish;
6296  }
6297  }
6298  else if(first == '[')
6299  {
6300  _c4dbgp("seqblck[RVAL]: start child seqflow");
6301  addrem_flags(RNXT, RVAL);
6302  _handle_annotations_before_blck_val_scalar();
6303  m_evt_handler->begin_seq_val_flow();
6304  addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
6305  _line_progressed(1);
6306  _set_indentation(m_evt_handler->m_parent->indref + 1u);
6307  goto seqblck_finish;
6308  }
6309  else if(first == '{')
6310  {
6311  _c4dbgp("seqblck[RVAL]: start child mapflow");
6312  addrem_flags(RNXT, RVAL);
6313  _handle_annotations_before_blck_val_scalar();
6314  m_evt_handler->begin_map_val_flow();
6315  addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
6316  _line_progressed(1);
6317  _set_indentation(m_evt_handler->m_parent->indref + 1u);
6318  goto seqblck_finish;
6319  }
6320  else if(first == '-')
6321  {
6322  _c4dbgp("seqblck[RVAL]: dash");
6323  _handle_block_check_leading_tabs(startmark);
6324  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6325  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6326  _c4dbgp("seqblck[RVAL]: start child seqblck");
6327  _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6328  addrem_flags(RNXT, RVAL);
6329  _handle_annotations_before_blck_val_scalar();
6330  m_evt_handler->begin_seq_val_block();
6331  addrem_flags(RVAL, RNXT);
6332  _set_indentation(startindent);
6333  // keep going on inside this function
6334  _line_progressed(1);
6335  }
6336  else if(first == ':')
6337  {
6338  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
6339  addrem_flags(RNXT, RVAL);
6340  _handle_annotations_before_start_mapblck(startline);
6341  _handle_colon();
6342  m_evt_handler->begin_map_val_block();
6343  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6344  m_evt_handler->set_key_scalar_plain_empty();
6345  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6346  _line_progressed(1);
6347  _maybe_skip_whitespace_tokens();
6348  goto seqblck_finish;
6349  }
6350  else if(first == '&')
6351  {
6352  const csubstr anchor = _scan_anchor();
6353  _c4dbgpf("seqblck[RVAL]: anchor! {}", _prs(anchor));
6354  // we need to buffer the anchors, as there may be two
6355  // consecutive anchors in here
6356  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6357  }
6358  else if(first == '*')
6359  {
6360  csubstr ref = _scan_ref_seq();
6361  _c4dbgpf("seqblck[RVAL]: ref! {}", _prs(ref));
6362  if(!_maybe_scan_following_colon())
6363  {
6364  _c4dbgp("seqblck[RVAL]: set ref as val!");
6365  _handle_valref(ref);
6366  addrem_flags(RNXT, RVAL);
6367  }
6368  else
6369  {
6370  _c4dbgp("seqblck[RVAL]: ref is key of map");
6371  addrem_flags(RNXT, RVAL);
6372  _handle_annotations_before_start_mapblck(startline);
6373  m_evt_handler->begin_map_val_block();
6374  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6375  _handle_keyref(ref);
6376  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6377  _set_indentation(startindent);
6378  _maybe_skip_whitespace_tokens();
6379  goto seqblck_finish;
6380  }
6381  }
6382  else if(first == '!')
6383  {
6384  csubstr tag = _scan_tag();
6385  _c4dbgpf("seqblck[RVAL]: val tag! {}", _prs(tag));
6386  // we need to buffer the tags, as there may be two
6387  // consecutive tags in here
6388  _add_annotation(&m_pending_tags, tag, startindent, startline);
6389  }
6390  else if(first == '?')
6391  {
6392  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
6393  addrem_flags(RNXT, RVAL);
6394  m_evt_handler->begin_map_val_block();
6395  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
6396  _set_indentation(startindent);
6397  _line_progressed(1);
6398  _maybe_skipchars(' ');
6399  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6400  {
6401  _c4dbgp("seqblck[RVAL]: seqblck starts after ?");
6402  addrem_flags(RKCL, QMRK);
6403  m_evt_handler->begin_seq_key_block();
6404  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6405  _save_indentation();
6406  _line_progressed(1);
6407  _maybe_skipchars(' ');
6408  }
6409  goto seqblck_finish;
6410  }
6411  else
6412  {
6413  _c4err("parse error");
6414  }
6415  }
6416  else // RNXT
6417  {
6418  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6419  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6420  //
6421  // handle indentation
6422  //
6423  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6424  if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6425  {
6426  _c4dbgp("seqblck[RNXT]: at line begin");
6427  if(m_evt_handler->m_curr->indentation_ge())
6428  {
6429  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6430  _line_progressed(m_evt_handler->m_curr->indref);
6431  if(!m_evt_handler->m_curr->line_contents.rem.len)
6432  goto seqblck_again;
6433  }
6434  else if(m_evt_handler->m_curr->indentation_lt())
6435  {
6436  _c4dbgp("seqblck[RNXT]: smaller indentation!");
6437  _handle_indentation_pop_from_block_seq();
6438  if(has_all(RSEQ|RBLCK))
6439  {
6440  _c4dbgp("seqblck[RNXT]: still seqblck!");
6441  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6442  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6443  if(!m_evt_handler->m_curr->line_contents.rem.len)
6444  goto seqblck_again; // LCOV_EXCL_LINE
6445  }
6446  else
6447  {
6448  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
6449  goto seqblck_finish;
6450  }
6451  }
6452  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6453  {
6454  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6455  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6456  if(!m_evt_handler->m_curr->line_contents.rem.len)
6457  goto seqblck_again; // LCOV_EXCL_LINE
6458  }
6459  }
6460  else
6461  {
6462  _c4dbgp("seqblck[RNXT]: NOT at line begin");
6463  if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
6464  {
6465  _c4err("parse error");
6466  }
6467  else
6468  {
6469  _skipchars(" \t");
6470  if(!m_evt_handler->m_curr->line_contents.rem.len)
6471  {
6472  _c4dbgp("seqblck[RNXT]: again");
6473  goto seqblck_again; // LCOV_EXCL_LINE
6474  }
6475  }
6476  }
6477  //
6478  // now handle the tokens
6479  //
6480  _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6481  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6482  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6483  if(first == '-')
6484  {
6485  if(m_evt_handler->m_curr->indref > 0
6486  || m_evt_handler->m_curr->line_contents.indentation > 0
6487  || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6488  {
6489  if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6490  {
6491  _c4dbgp("seqblck[RNXT]: expect next val");
6492  addrem_flags(RVAL, RNXT);
6493  m_evt_handler->add_sibling();
6494  _line_progressed(1);
6495  }
6496  else
6497  {
6498  _c4err("parse error");
6499  }
6500  }
6501  else
6502  {
6503  _c4dbgp("seqblck[RNXT]: start doc");
6504  _start_doc_suddenly();
6505  _line_progressed(3);
6506  _maybe_skip_whitespace_tokens();
6507  goto seqblck_finish;
6508  }
6509  }
6510  else if(first == ':')
6511  {
6512  // This happens for example in `- [a: b]: c` (after
6513  // terminating the seq, ie, after `]`). All other cases
6514  // (ie colon after scalars) are caught elsewhere (ie, in
6515  // RVAL state).
6516  if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags & RMAP)))
6517  {
6518  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6519  m_evt_handler->end_seq_block();
6520  goto seqblck_finish;
6521  }
6522  else
6523  {
6524  _c4err("parse error");
6525  }
6526  }
6527  else if(first == '.')
6528  {
6529  _c4dbgp("seqblck[RNXT]: maybe doc?");
6530  if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6531  {
6532  _c4dbgp("seqblck[RNXT]: end doc");
6533  _end_doc_suddenly();
6534  _line_progressed(3);
6535  _maybe_skip_whitespace_tokens();
6536  _check_doc_end_tokens();
6537  goto seqblck_finish;
6538  }
6539  else
6540  {
6541  _c4err("parse error");
6542  }
6543  }
6544  else
6545  {
6546  // may be an indentless sequence nested in a map...
6547  #ifdef RYML_DBG
6548  _print_state_stack();
6549  #endif
6550  if(m_evt_handler->m_parent
6551  && has_all(RMAP|RBLCK, m_evt_handler->m_parent)
6552  && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6553  {
6554  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6555  _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6556  _handle_indentation_pop(m_evt_handler->m_parent);
6557  _RYML_ASSERT_PARSE_(this->callbacks(), has_all(RMAP|RBLCK), m_evt_handler->m_curr->pos);
6558  m_evt_handler->add_sibling();
6559  addrem_flags(RKEY, RNXT);
6560  goto seqblck_finish;
6561  }
6562  else if(first == '\t')
6563  {
6564  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of('\t');
6565  if(pos == npos)
6566  {
6567  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6568  goto seqblck_again;
6569  }
6570  }
6571  _c4err("parse error");
6572  }
6573  }
6574 
6575  seqblck_again:
6576  _c4dbgt("seqblck: go again", 0);
6577  if(_finished_line())
6578  {
6579  m_bom_len = 0;
6580  _line_ended();
6581  _scan_line();
6582  if(_finished_file())
6583  {
6584  _c4dbgp("seqblck: finish!");
6585  _end_seq_blck();
6586  goto seqblck_finish;
6587  }
6588  _c4dbgnextline();
6589  }
6590  goto seqblck_start;
6591 
6592  seqblck_finish:
6593  _c4dbgp("seqblck: finish");
6594 }
6595 
6596 
6597 //-----------------------------------------------------------------------------
6598 
6599 template<class EventHandler>
6600 void ParseEngine<EventHandler>::_handle_map_block()
6601 {
6602 mapblck_start:
6603  _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6604 
6605  // states: RKEY -> RVAL -> RNXT
6606  // states: QMRK -> RKCL -> RVAL -> RNXT
6607  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
6608  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6609  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
6610  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
6611 
6612  _maybe_skip_comment();
6613  if(!m_evt_handler->m_curr->line_contents.rem.len)
6614  goto mapblck_again;
6615 
6616  if(has_any(RKEY))
6617  {
6618  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6619  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6620  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6621  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6622  //
6623  // handle indentation
6624  //
6625  if(m_evt_handler->m_curr->at_line_beginning())
6626  {
6627  if(m_evt_handler->m_curr->indentation_eq())
6628  {
6629  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6630  _line_progressed(m_evt_handler->m_curr->indref);
6631  if(!m_evt_handler->m_curr->line_contents.rem.len)
6632  goto mapblck_again;
6633  }
6634  else if(m_evt_handler->m_curr->indentation_lt())
6635  {
6636  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6637  _handle_indentation_pop_from_block_map();
6638  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6639  if(has_all(RMAP|RBLCK))
6640  {
6641  _c4dbgp("mapblck[RKEY]: still mapblck!");
6642  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY), m_evt_handler->m_curr->pos);
6643  if(!m_evt_handler->m_curr->line_contents.rem.len)
6644  goto mapblck_again;
6645  }
6646  else
6647  {
6648  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6649  goto mapblck_finish;
6650  }
6651  }
6652  else
6653  {
6654  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6655  _c4err("invalid indentation");
6656  }
6657  }
6658  //
6659  // now handle the tokens
6660  //
6661  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6662  const size_t startline = m_evt_handler->m_curr->pos.line;
6663  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6664  _c4dbgpf("mapblck[RKEY]: '{}'", _c4prc(first));
6665  ScannedScalar sc;
6666  if(first == '\'')
6667  {
6668  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6669  sc = _scan_scalar_squot();
6670  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6671  _handle_annotations_before_blck_key_scalar();
6672  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6673  addrem_flags(RVAL, RKEY);
6674  if(!_maybe_scan_following_colon())
6675  _c4err("could not find ':' colon after key");
6676  _handle_colon();
6677  _maybe_skip_whitespace_tokens();
6678  }
6679  else if(first == '"')
6680  {
6681  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6682  sc = _scan_scalar_dquot();
6683  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6684  _handle_annotations_before_blck_key_scalar();
6685  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6686  addrem_flags(RVAL, RKEY);
6687  if(!_maybe_scan_following_colon())
6688  _c4err("could not find ':' colon after key");
6689  _handle_colon();
6690  _maybe_skip_whitespace_tokens();
6691  }
6692  // block scalars (| and >) can not be used as keys unless they
6693  // appear in an explicit QMRK scope (ie, after the ? token),
6694  else if(C4_UNLIKELY(first == '|'))
6695  {
6696  _c4err("block map: literal keys must be enclosed in '?'");
6697  }
6698  else if(C4_UNLIKELY(first == '>'))
6699  {
6700  _c4err("block map: folded keys must be enclosed in '?'");
6701  }
6702  else if(_scan_scalar_plain_map_blck(&sc))
6703  {
6704  _c4dbgp("mapblck[RKEY]: plain scalar");
6705  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6706  _handle_annotations_before_blck_key_scalar();
6707  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6708  addrem_flags(RVAL, RKEY);
6709  if(!_maybe_scan_following_colon())
6710  _c4err("could not find ':' colon after key");
6711  _handle_colon();
6712  _maybe_skip_whitespace_tokens();
6713  }
6714  else if(first == '?')
6715  {
6716  _c4dbgp("mapblck[RKEY]: key token!");
6717  addrem_flags(QMRK, RKEY);
6718  _line_progressed(1);
6719  _maybe_skipchars(' ');
6720  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6721  {
6722  _c4dbgp("mapblck[RKEY]: seqblck starts after ?");
6723  addrem_flags(RKCL, QMRK);
6724  m_evt_handler->begin_seq_key_block();
6725  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6726  _save_indentation();
6727  _line_progressed(1);
6728  _maybe_skipchars(' ');
6729  goto mapblck_finish;
6730  }
6731  goto mapblck_again;
6732  }
6733  else if(first == ':')
6734  {
6735  _c4dbgp("mapblck[RKEY]: setting empty key");
6736  _handle_annotations_before_blck_key_scalar();
6737  m_evt_handler->set_key_scalar_plain_empty();
6738  addrem_flags(RVAL, RKEY);
6739  _line_progressed(1);
6740  _handle_colon();
6741  _maybe_skip_whitespace_tokens();
6742  }
6743  else if(first == '*')
6744  {
6745  csubstr ref = _scan_ref_map();
6746  _c4dbgpf("mapblck[RKEY]: key ref! {}", _prs(ref));
6747  _handle_keyref(ref);
6748  addrem_flags(RVAL, RKEY);
6749  if(!_maybe_scan_following_colon())
6750  _c4err("could not find ':' colon after key");
6751  _handle_colon();
6752  _maybe_skip_whitespace_tokens();
6753  }
6754  else if(first == '&')
6755  {
6756  csubstr anchor = _scan_anchor();
6757  _c4dbgpf("mapblck[RKEY]: key anchor! {}", _prs(anchor));
6758  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6759  }
6760  else if(first == '!')
6761  {
6762  csubstr tag = _scan_tag();
6763  _c4dbgpf("mapblck[RKEY]: key tag! {}", _prs(tag));
6764  _add_annotation(&m_pending_tags, tag, startindent, startline);
6765  }
6766  else if(first == '[')
6767  {
6768  // RYML's tree cannot store container keys, but that's
6769  // handled inside the tree handler. Other handlers may be
6770  // able to handle it.
6771  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6772  _handle_annotations_before_blck_key_scalar();
6773  m_evt_handler->begin_seq_key_flow();
6774  addrem_flags(RSEQ|RFLOW|RVAL, RKEY|RMAP|RBLCK);
6775  _line_progressed(1);
6776  _set_indentation(startindent);
6777  goto mapblck_finish;
6778  }
6779  else if(first == '{')
6780  {
6781  // RYML's tree cannot store container keys, but that's
6782  // handled inside the tree handler. Other handlers may be
6783  // able to handle it.
6784  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6785  _handle_annotations_before_blck_key_scalar();
6786  m_evt_handler->begin_map_key_flow();
6787  addrem_flags(RFLOW|RKEY, RBLCK);
6788  _line_progressed(1);
6789  _set_indentation(startindent);
6790  goto mapblck_finish;
6791  }
6792  else if(first == '-')
6793  {
6794  _c4dbgp("mapblck[RKEY]: maybe doc?");
6795  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6796  {
6797  _c4dbgp("mapblck[RKEY]: end+start doc");
6798  _start_doc_suddenly();
6799  _line_progressed(3);
6800  _maybe_skip_whitespace_tokens();
6801  goto mapblck_finish;
6802  }
6803  else
6804  {
6805  _c4err("parse error");
6806  }
6807  }
6808  else if(first == '.')
6809  {
6810  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6811  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6812  {
6813  _c4dbgp("mapblck[RKEY]: end doc");
6814  _end_doc_suddenly();
6815  _line_progressed(3);
6816  _maybe_skip_whitespace_tokens();
6817  _check_doc_end_tokens();
6818  goto mapblck_finish;
6819  }
6820  else
6821  {
6822  _c4err("parse error"); // LCOV_EXCL_LINE
6823  }
6824  }
6825  else
6826  {
6827  _c4err("parse error");
6828  }
6829  }
6830  else if(has_any(RVAL))
6831  {
6832  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6833  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6834  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6835  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6836  //
6837  // handle indentation
6838  //
6839  if(m_evt_handler->m_curr->at_line_beginning())
6840  {
6841  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6842  m_evt_handler->m_curr->more_indented = false;
6843  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6844  if(m_evt_handler->m_curr->indentation_eq_extra())
6845  {
6846  _c4dbgp("mapblck[RVAL]: skip indentation!");
6847  _line_progressed(m_evt_handler->m_curr->indref + 1);
6848  if(!m_evt_handler->m_curr->line_contents.rem.len)
6849  goto mapblck_again;
6850  }
6851  else if(m_evt_handler->m_curr->indentation_gt_extra())
6852  {
6853  _c4dbgp("mapblck[RVAL]: more indented!");
6854  m_evt_handler->m_curr->more_indented = true;
6855  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6856  if(!m_evt_handler->m_curr->line_contents.rem.len)
6857  goto mapblck_again; // LCOV_EXCL_LINE
6858  }
6859  else if(m_evt_handler->m_curr->indentation_lt_extra())
6860  {
6861  if(m_evt_handler->m_curr->indentation_eq())
6862  {
6863  _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6864  // watchout for indentless seqs
6865  if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6866  {
6867  _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6868  _handle_annotations_before_blck_val_scalar();
6869  m_evt_handler->set_val_scalar_plain_empty();
6870  addrem_flags(RNXT, RVAL);
6871  goto mapblck_again;
6872  }
6873  }
6874  else
6875  {
6876  _c4dbgp("mapblck[RVAL]: smaller indentation than RKEY!");
6877  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6878  _handle_indentation_pop_from_block_map();
6879  if(has_all(RMAP|RBLCK))
6880  {
6881  _c4dbgp("mapblck[RVAL]: still mapblck!");
6882  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6883  if(has_any(RNXT))
6884  {
6885  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6886  m_evt_handler->add_sibling();
6887  addrem_flags(RKEY, RNXT);
6888  }
6889  goto mapblck_again;
6890  }
6891  else
6892  {
6893  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6894  goto mapblck_finish;
6895  }
6896  }
6897  }
6898  }
6899  const size_t startcol = _handle_block_skip_leading_whitespace();
6900  if(startcol == npos)
6901  {
6902  _c4dbgp("mapblck[RVAL]: whitespace only");
6903  goto mapblck_again; // LCOV_EXCL_LINE
6904  }
6905  const size_t tabmark = _handle_block_get_whitespace_mark();
6906  //
6907  // now handle the tokens
6908  //
6909  _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6910  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6911  const size_t startline = m_evt_handler->m_curr->pos.line;
6912  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6913  _c4dbgpf("mapblck[RVAL]: '{}'", _c4prc(first));
6914  ScannedScalar sc;
6915  if(first == '\'')
6916  {
6917  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6918  sc = _scan_scalar_squot();
6919  if(!_maybe_scan_following_colon())
6920  {
6921  _c4dbgp("mapblck[RVAL]: set as val");
6922  _handle_annotations_before_blck_val_scalar();
6923  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6924  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6925  addrem_flags(RNXT, RVAL);
6926  }
6927  else
6928  {
6929  _c4assert(m_evt_handler->m_curr->indref != npos);
6930  _c4assert(startindent > m_evt_handler->m_curr->indref);
6931  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6932  _handle_block_check_leading_tabs(startcol);
6933  _handle_annotations_before_start_mapblck(startline);
6934  addrem_flags(RNXT, RVAL);
6935  _handle_colon();
6936  m_evt_handler->begin_map_val_block();
6937  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6938  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6939  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6940  _maybe_skip_whitespace_tokens();
6941  // keep the child state on RVAL
6942  addrem_flags(RVAL, RNXT);
6943  }
6944  }
6945  else if(first == '"')
6946  {
6947  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6948  sc = _scan_scalar_dquot();
6949  if(!_maybe_scan_following_colon())
6950  {
6951  _c4dbgp("mapblck[RVAL]: set as val");
6952  _handle_annotations_before_blck_val_scalar();
6953  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6954  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6955  addrem_flags(RNXT, RVAL);
6956  }
6957  else
6958  {
6959  _c4assert(m_evt_handler->m_curr->indref != npos);
6960  _c4assert(startindent > m_evt_handler->m_curr->indref);
6961  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6962  _handle_block_check_leading_tabs(startcol);
6963  _handle_annotations_before_start_mapblck(startline);
6964  addrem_flags(RNXT, RVAL);
6965  _handle_colon();
6966  m_evt_handler->begin_map_val_block();
6967  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6968  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6969  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6970  _maybe_skip_whitespace_tokens();
6971  // keep the child state on RVAL
6972  addrem_flags(RVAL, RNXT);
6973  }
6974  }
6975  // block scalars can only appear as keys when in QMRK scope
6976  // (ie, after ? tokens), so no need to scan following colon
6977  else if(first == '|')
6978  {
6979  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6980  ScannedBlock sb;
6981  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6982  _handle_annotations_before_blck_val_scalar();
6983  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6984  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6985  addrem_flags(RNXT, RVAL);
6986  }
6987  else if(first == '>')
6988  {
6989  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6990  ScannedBlock sb;
6991  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6992  _handle_annotations_before_blck_val_scalar();
6993  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6994  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6995  addrem_flags(RNXT, RVAL);
6996  }
6997  else if(_scan_scalar_plain_map_blck(&sc))
6998  {
6999  _c4dbgp("mapblck[RVAL]: plain scalar.");
7000  if(!_maybe_scan_following_colon())
7001  {
7002  _c4dbgp("mapblck[RVAL]: set as val");
7003  _handle_annotations_before_blck_val_scalar();
7004  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
7005  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7006  addrem_flags(RNXT, RVAL);
7007  }
7008  else
7009  {
7010  _c4assert(m_evt_handler->m_curr->indref != npos);
7011  _c4assert(startindent > m_evt_handler->m_curr->indref);
7012  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7013  _handle_block_check_leading_tabs(startcol, tabmark);
7014  addrem_flags(RNXT, RVAL);
7015  _handle_annotations_before_start_mapblck(startline);
7016  _handle_colon();
7017  m_evt_handler->begin_map_val_block();
7018  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7019  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7020  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7021  _maybe_skip_whitespace_tokens();
7022  // keep the child state on RVAL
7023  addrem_flags(RVAL, RNXT);
7024  }
7025  }
7026  else if(first == '-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7027  {
7028  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7029  _c4err("parse error");
7030  _c4dbgp("mapblck[RVAL]: start val seqblck");
7031  _handle_block_check_leading_tabs(startcol);
7032  addrem_flags(RNXT, RVAL);
7033  _handle_annotations_before_blck_val_scalar();
7034  m_evt_handler->begin_seq_val_block();
7035  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7036  _set_indentation(startindent);
7037  _line_progressed(1);
7038  _maybe_skip_whitespace_tokens();
7039  goto mapblck_finish;
7040  }
7041  else if(first == '[')
7042  {
7043  _c4dbgp("mapblck[RVAL]: start val seqflow");
7044  addrem_flags(RNXT, RVAL);
7045  _handle_annotations_before_blck_val_scalar();
7046  m_evt_handler->begin_seq_val_flow();
7047  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
7048  _set_indentation(m_evt_handler->m_parent->indref + 1u);
7049  _line_progressed(1);
7050  goto mapblck_finish;
7051  }
7052  else if(first == '{')
7053  {
7054  _c4dbgp("mapblck[RVAL]: start val mapflow");
7055  addrem_flags(RNXT, RVAL);
7056  _handle_annotations_before_blck_val_scalar();
7057  m_evt_handler->begin_map_val_flow();
7058  addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
7059  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7060  _set_indentation(m_evt_handler->m_parent->indref + 1u);
7061  _line_progressed(1);
7062  goto mapblck_finish;
7063  }
7064  else if(first == '*')
7065  {
7066  csubstr ref = _scan_ref_map();
7067  _c4dbgpf("mapblck[RVAL]: ref! {}", _prs(ref));
7068  if(_maybe_scan_following_colon())
7069  {
7070  _c4dbgp("mapblck[RVAL]: start child map, block");
7071  addrem_flags(RNXT, RVAL);
7072  _handle_annotations_before_blck_val_scalar();
7073  m_evt_handler->begin_map_val_block();
7074  _handle_keyref(ref);
7075  _set_indentation(startindent);
7076  // keep going in RVAL
7077  addrem_flags(RVAL, RNXT);
7078  }
7079  else
7080  {
7081  _c4dbgp("mapblck[RVAL]: was val ref");
7082  _handle_valref(ref);
7083  addrem_flags(RNXT, RVAL);
7084  }
7085  _maybe_skip_whitespace_tokens();
7086  }
7087  else if(first == '&')
7088  {
7089  csubstr anchor = _scan_anchor();
7090  _c4dbgpf("mapblck[RVAL]: anchor! {}", _prs(anchor));
7091  // we need to buffer the anchors, as there may be two
7092  // consecutive anchors in here
7093  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7094  }
7095  else if(first == '!')
7096  {
7097  csubstr tag = _scan_tag();
7098  _c4dbgpf("mapblck[RVAL]: tag! {}", _prs(tag));
7099  // we need to buffer the tags, as there may be two
7100  // consecutive tags in here
7101  _add_annotation(&m_pending_tags, tag, startindent, startline);
7102  }
7103  else if(first == '?')
7104  {
7105  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7106  _c4err("parse error");
7107  _c4dbgp("mapblck[RVAL]: start val mapblck");
7108  addrem_flags(RNXT, RVAL);
7109  _handle_annotations_before_blck_val_scalar();
7110  m_evt_handler->begin_map_val_block();
7111  addrem_flags(QMRK, RNXT);
7112  _set_indentation(startindent);
7113  _line_progressed(1);
7114  _maybe_skipchars(' ');
7115  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7116  {
7117  _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7118  addrem_flags(RKCL, QMRK);
7119  m_evt_handler->begin_seq_key_block();
7120  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7121  _save_indentation();
7122  _line_progressed(1);
7123  _maybe_skipchars(' ');
7124  goto mapblck_finish;
7125  }
7126  goto mapblck_again;
7127  }
7128  else if(first == ':')
7129  {
7130  _c4dbgp("mapblck[RVAL]: start val mapblck");
7131  addrem_flags(RNXT, RVAL);
7132  _handle_annotations_before_start_mapblck(startline);
7133  _handle_colon();
7134  m_evt_handler->begin_map_val_block();
7135  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7136  m_evt_handler->set_key_scalar_plain_empty();
7137  // keep the child state on RVAL
7138  addrem_flags(RVAL, RNXT);
7139  _line_progressed(1);
7140  _maybe_skip_whitespace_tokens();
7141  goto mapblck_again;
7142  }
7143  else
7144  {
7145  _c4err("parse error"); // LCOV_EXCL_LINE
7146  }
7147  }
7148  else if(has_any(RNXT))
7149  {
7150  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7151  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7152  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7153  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7154  //
7155  // handle indentation
7156  //
7157  if(m_evt_handler->m_curr->at_line_beginning())
7158  {
7159  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7160  if(m_evt_handler->m_curr->indentation_eq())
7161  {
7162  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7163  _line_progressed(m_evt_handler->m_curr->indref);
7164  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7165  m_evt_handler->add_sibling();
7166  addrem_flags(RKEY, RNXT);
7167  goto mapblck_again;
7168  }
7169  else if(m_evt_handler->m_curr->indentation_lt())
7170  {
7171  _c4dbgp("mapblck[RNXT]: smaller indentation!");
7172  _handle_indentation_pop_from_block_map();
7173  if(has_all(RMAP|RBLCK))
7174  {
7175  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7176  if(!has_any(RKCL))
7177  {
7178  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7179  m_evt_handler->add_sibling();
7180  addrem_flags(RKEY, RNXT);
7181  }
7182  goto mapblck_again;
7183  }
7184  else
7185  {
7186  goto mapblck_finish;
7187  }
7188  }
7189  }
7190  else
7191  {
7192  _c4dbgp("mapblck[RNXT]: NOT at line begin");
7193  if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
7194  {
7195  _c4err("parse error");
7196  }
7197  else
7198  {
7199  _skipchars(" \t");
7200  if(!m_evt_handler->m_curr->line_contents.rem.len)
7201  {
7202  _c4dbgp("seqblck[RNXT]: again");
7203  goto mapblck_again; // LCOV_EXCL_LINE
7204  }
7205  }
7206  }
7207  //
7208  // handle tokens
7209  //
7210  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7211  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7212  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
7213  if(first == ' ')
7214  {
7215  _c4dbgp("mapblck[RNXT]: skip spaces");
7216  _maybe_skip_whitespace_tokens();
7217  }
7218  else
7219  {
7220  _c4err("parse error");
7221  }
7222  }
7223  else if(has_any(QMRK))
7224  {
7225  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7226  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7227  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7228  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7229  if(_handle_map_block_qmrk())
7230  goto mapblck_again;
7231  else
7232  goto mapblck_finish;
7233  }
7234  else if(has_any(RKCL)) // read the key colon (after QMRK)
7235  {
7236  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7237  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7238  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7239  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7240  if(_handle_map_block_rkcl())
7241  goto mapblck_again;
7242  else
7243  goto mapblck_finish;
7244  }
7245 
7246  mapblck_again:
7247  _c4dbgt("mapblck: again", 0);
7248  if(_finished_line())
7249  {
7250  _line_ended();
7251  _scan_line();
7252  if(_finished_file())
7253  {
7254  _c4dbgp("mapblck: file finished!");
7255  _end_map_blck();
7256  goto mapblck_finish;
7257  }
7258  _c4dbgnextline();
7259  }
7260  goto mapblck_start;
7261 
7262  mapblck_finish:
7263  _c4dbgp("mapblck: finish");
7264 }
7265 
7266 
7267 //-----------------------------------------------------------------------------
7268 
7269 // return true if we should remain in map_block
7270 template<class EventHandler>
7271 bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7272 {
7273  //
7274  // handle indentation
7275  //
7276  if(m_evt_handler->m_curr->at_line_beginning())
7277  {
7278  _c4dbgpf("mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7279  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos, m_evt_handler->m_curr->pos);
7280  if(m_evt_handler->m_curr->indentation_eq_extra())
7281  {
7282  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7283  _line_progressed(m_evt_handler->m_curr->indref + 1);
7284  if(!m_evt_handler->m_curr->line_contents.rem.len)
7285  return true; // go again
7286  }
7287  // indentation can be larger in QMRK state
7288  else if(m_evt_handler->m_curr->indentation_gt_extra())
7289  {
7290  _c4dbgp("mapblck[QMRK]: larger indentation !");
7291  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7292  if(!m_evt_handler->m_curr->line_contents.rem.len)
7293  return true; // go again
7294  }
7295  else
7296  {
7297  _c4dbgp("mapblck[QMRK]: smaller indentation!");
7298  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7299  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7300  if(m_evt_handler->m_curr->indentation_eq()
7301  // defend against docs or indentless seqs
7302  && m_evt_handler->m_curr->line_contents.rem.str[0] != '-')
7303  {
7304  _c4dbgp("mapblck[QMRK]: QMRK finished!");
7305  _handle_annotations_before_blck_key_scalar();
7306  m_evt_handler->set_key_scalar_plain_empty();
7307  addrem_flags(RKCL, QMRK);
7308  return true; // go again
7309  }
7310  else if(m_evt_handler->m_curr->indentation_lt())
7311  {
7312  _c4dbgp("mapblck[QMRK]: indentation pop!");
7313  _handle_indentation_pop_from_block_map();
7314  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7315  if(has_all(RMAP|RBLCK))
7316  {
7317  _c4dbgp("mapblck[QMRK]: still mapblck!");
7318  return true; // go again
7319  }
7320  else
7321  {
7322  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7323  return false; // finish mapblck
7324  }
7325  }
7326  }
7327  }
7328  //
7329  // now handle the tokens
7330  //
7331  _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7332  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7333  const size_t startline = m_evt_handler->m_curr->pos.line;
7334  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7335  _c4dbgpf("mapblck[QMRK]: '{}'", first);
7336  ScannedScalar sc;
7337  if(first == '\'')
7338  {
7339  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7340  sc = _scan_scalar_squot();
7341  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7342  addrem_flags(RKCL, QMRK);
7343  if(!_maybe_scan_following_colon())
7344  {
7345  _c4dbgp("mapblck[QMRK]: set as key");
7346  _handle_annotations_before_blck_key_scalar();
7347  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7348  }
7349  else
7350  {
7351  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7352  _handle_annotations_before_start_mapblck_as_key();
7353  m_evt_handler->begin_map_key_block();
7354  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7355  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7356  _maybe_skip_whitespace_tokens();
7357  _set_indentation(startindent);
7358  // keep the child state on RVAL
7359  addrem_flags(RVAL, RKCL);
7360  }
7361  }
7362  else if(first == '"')
7363  {
7364  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7365  sc = _scan_scalar_dquot();
7366  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7367  addrem_flags(RKCL, QMRK);
7368  if(!_maybe_scan_following_colon())
7369  {
7370  _c4dbgp("mapblck[QMRK]: set as key");
7371  _handle_annotations_before_blck_key_scalar();
7372  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7373  }
7374  else
7375  {
7376  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7377  _handle_annotations_before_start_mapblck_as_key();
7378  m_evt_handler->begin_map_key_block();
7379  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7380  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7381  _maybe_skip_whitespace_tokens();
7382  _set_indentation(startindent);
7383  // keep the child state on RVAL
7384  addrem_flags(RVAL, RKCL);
7385  }
7386  }
7387  else if(first == '|')
7388  {
7389  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7390  ScannedBlock sb;
7391  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7392  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7393  _handle_annotations_before_blck_key_scalar();
7394  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7395  addrem_flags(RKCL, QMRK);
7396  }
7397  else if(first == '>')
7398  {
7399  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7400  ScannedBlock sb;
7401  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7402  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7403  _handle_annotations_before_blck_key_scalar();
7404  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7405  addrem_flags(RKCL, QMRK);
7406  }
7407  else if(_scan_scalar_plain_map_blck(&sc))
7408  {
7409  _c4dbgp("mapblck[QMRK]: plain scalar");
7410  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7411  addrem_flags(RKCL, QMRK);
7412  if(!_maybe_scan_following_colon())
7413  {
7414  _c4dbgp("mapblck[QMRK]: set as key");
7415  _handle_annotations_before_blck_key_scalar();
7416  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7417  }
7418  else
7419  {
7420  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7421  _handle_annotations_before_start_mapblck_as_key();
7422  m_evt_handler->begin_map_key_block();
7423  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7424  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7425  _maybe_skip_whitespace_tokens();
7426  _set_indentation(startindent);
7427  // keep the child state on RVAL
7428  addrem_flags(RVAL, RKCL);
7429  }
7430  }
7431  else if(first == ':')
7432  {
7433  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7434  addrem_flags(RKCL, QMRK);
7435  _handle_annotations_before_start_mapblck_as_key();
7436  m_evt_handler->begin_map_key_block();
7437  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7438  m_evt_handler->set_key_scalar_plain_empty();
7439  _line_progressed(1);
7440  _maybe_skip_whitespace_tokens();
7441  _set_indentation(startindent);
7442  // keep the child state on RVAL
7443  addrem_flags(RVAL, RKCL);
7444  }
7445  else if(first == '*')
7446  {
7447  csubstr ref = _scan_ref_map();
7448  _c4dbgpf("mapblck[QMRK]: key ref! {}", _prs(ref));
7449  addrem_flags(RKCL, QMRK);
7450  if(!_maybe_scan_following_colon())
7451  {
7452  _c4dbgp("mapblck[QMRK]: set ref as key");
7453  _handle_keyref(ref);
7454  }
7455  else
7456  {
7457  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7458  _handle_annotations_before_start_mapblck_as_key();
7459  m_evt_handler->begin_map_key_block();
7460  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7461  _handle_keyref(ref);
7462  _set_indentation(startindent);
7463  // keep the child state on RVAL
7464  addrem_flags(RVAL, RKCL|QMRK);
7465  }
7466  _maybe_skip_whitespace_tokens();
7467  }
7468  else if(first == '&')
7469  {
7470  csubstr anchor = _scan_anchor();
7471  _c4dbgpf("mapblck[QMRK]: key anchor! {}", _prs(anchor));
7472  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7473  }
7474  else if(first == '!')
7475  {
7476  csubstr tag = _scan_tag();
7477  _c4dbgpf("mapblck[QMRK]: key tag! {}", _prs(tag));
7478  _add_annotation(&m_pending_tags, tag, startindent, startline);
7479  }
7480  else if(first == '-')
7481  {
7482  _c4dbgp("mapblck[QMRK]: maybe seq or doc?");
7483  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7484  {
7485  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7486  addrem_flags(RKCL, QMRK);
7487  _handle_annotations_before_blck_key_scalar();
7488  m_evt_handler->begin_seq_key_block();
7489  addrem_flags(RVAL|RSEQ, RMAP|RKCL);
7490  _set_indentation(startindent);
7491  _line_progressed(1);
7492  }
7493  else
7494  {
7495  _c4dbgp("mapblck[QMRK]: end+start doc");
7496  _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7497  _start_doc_suddenly();
7498  _line_progressed(3);
7499  }
7500  _maybe_skip_whitespace_tokens();
7501  return false; // finish mapblck
7502  }
7503  else if(first == '[')
7504  {
7505  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7506  addrem_flags(RKCL, QMRK);
7507  _handle_annotations_before_blck_key_scalar();
7508  m_evt_handler->begin_seq_key_flow();
7509  addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|RBLCK);
7510  _set_indentation(m_evt_handler->m_parent->indref + 1);
7511  _line_progressed(1);
7512  return false; // finish mapblck
7513  }
7514  else if(first == '{')
7515  {
7516  _c4dbgp("mapblck[QMRK]: start child mapflow (!)");
7517  addrem_flags(RKCL, QMRK);
7518  _handle_annotations_before_blck_key_scalar();
7519  m_evt_handler->begin_map_key_flow();
7520  addrem_flags(RKEY|RFLOW, RVAL|RKCL|RBLCK);
7521  _set_indentation(m_evt_handler->m_parent->indref + 1);
7522  _line_progressed(1);
7523  return false; // finish mapblck
7524  }
7525  else if(first == '?')
7526  {
7527  _c4dbgpf("mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7528  _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7529  _c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7530  addrem_flags(RKCL, QMRK);
7531  _handle_annotations_before_blck_key_scalar();
7532  m_evt_handler->begin_map_key_block();
7533  addrem_flags(QMRK, RKCL);
7534  _set_indentation(startindent);
7535  // indentation_lt() should be handled elsewhere
7536  _line_progressed(1);
7537  _maybe_skipchars(' ');
7538  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7539  {
7540  _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7541  addrem_flags(RKCL, QMRK);
7542  m_evt_handler->begin_seq_key_block();
7543  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7544  _save_indentation();
7545  _line_progressed(1);
7546  _maybe_skipchars(' ');
7547  return false;
7548  }
7549  }
7550  else
7551  {
7552  _c4err("parse error");
7553  }
7554  return true; // continue in mapblck
7555 }
7556 
7557 
7558 //-----------------------------------------------------------------------------
7559 
7560 // return true if we should remain in map_block
7561 template<class EventHandler>
7562 bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7563 {
7564  //
7565  // handle indentation
7566  //
7567  if(m_evt_handler->m_curr->at_line_beginning())
7568  {
7569  if(m_evt_handler->m_curr->indentation_eq())
7570  {
7571  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7572  _line_progressed(m_evt_handler->m_curr->indref);
7573  if(!m_evt_handler->m_curr->line_contents.rem.len)
7574  return true; // continue in mapblck
7575  }
7576  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7577  {
7578  _c4err("invalid indentation");
7579  }
7580  }
7581  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7582  _c4dbgpf("mapblck[RKCL]: '{}'", first);
7583  if(first == ':')
7584  {
7585  _c4dbgp("mapblck[RKCL]: found the colon");
7586  _line_progressed(1);
7587  _maybe_skipchars(' ');
7588  #if defined(__GNUC__) && (__GNUC__ >= 12) \
7589  && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))
7590  C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7591  #endif
7592  // sequence is valid after the RKCL ':'
7593  if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7594  {
7595  addrem_flags(RVAL, RKCL);
7596  return true; // continue in mapblck
7597  }
7598  else
7599  {
7600  _c4dbgp("mapblck[RKCL]: start val seqblck");
7601  addrem_flags(RNXT, RKCL);
7602  m_evt_handler->begin_seq_val_block();
7603  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7604  _save_indentation();
7605  _line_progressed(1);
7606  _maybe_skipchars(' ');
7607  return false; // finish mapblck
7608  }
7609  }
7610  else if(first == '?')
7611  {
7612  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
7613  m_evt_handler->set_val_scalar_plain_empty();
7614  m_evt_handler->add_sibling();
7615  addrem_flags(QMRK, RKCL);
7616  _line_progressed(1);
7617  _maybe_skipchars(' ');
7618  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7619  {
7620  _c4dbgp("mapblck[RKCL]: seqblck starts after ?");
7621  addrem_flags(RKCL, QMRK);
7622  m_evt_handler->begin_seq_key_block();
7623  addrem_flags(RSEQ|RVAL, RMAP|QMRK);
7624  _save_indentation();
7625  _line_progressed(1);
7626  _maybe_skipchars(' ');
7627  return false;
7628  }
7629  }
7630  else if(first == '-')
7631  {
7632  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7633  {
7634  _c4dbgp("mapblck[RKCL]: end+start doc");
7635  _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7636  _start_doc_suddenly();
7637  _line_progressed(3);
7638  _maybe_skip_whitespace_tokens();
7639  return false; // finish mapblck
7640  }
7641  else
7642  {
7643  _c4err("parse error"); // LCOV_EXCL_LINE
7644  }
7645  }
7646  else if(first == '.')
7647  {
7648  _c4dbgp("mapblck[RKCL]: maybe end doc?");
7649  csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7650  if(rs == ".." || rs.begins_with(".. "))
7651  {
7652  _c4dbgp("mapblck[RKCL]: end+start doc");
7653  _end_doc_suddenly();
7654  _line_progressed(3);
7655  _maybe_skip_whitespace_tokens();
7656  _check_doc_end_tokens();
7657  return false; // finish mapblck
7658  }
7659  else
7660  {
7661  _c4err("parse error"); // LCOV_EXCL_LINE
7662  }
7663  }
7664  else/* if(m_was_inside_qmrk) */
7665  {
7666  _c4dbgp("mapblck[RKCL]: missing :");
7667  if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7668  _c4err("parse error"); // LCOV_EXCL_LINE
7669  m_evt_handler->set_val_scalar_plain_empty();
7670  m_evt_handler->add_sibling();
7671  addrem_flags(RKEY, RKCL);
7672  }
7673  return true;
7674 }
7675 
7676 
7677 //-----------------------------------------------------------------------------
7678 
7679 template<class EventHandler>
7680 void ParseEngine<EventHandler>::_handle_unk_json()
7681 {
7682  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7683 
7684  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7685  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7686 
7687  _maybe_skip_comment();
7688  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7689  if(!rem.len)
7690  return;
7691 
7692  size_t pos = rem.first_not_of(" \t");
7693  if(pos)
7694  {
7695  pos = pos != npos ? pos : rem.len;
7696  _c4dbgpf("skipping indentation of {}", pos);
7697  _line_progressed(pos);
7698  rem = m_evt_handler->m_curr->line_contents.rem;
7699  if(!rem.len)
7700  return;
7701  _c4dbgpf("rem is now {}", _prs(rem));
7702  }
7703 
7704  if(rem.begins_with('['))
7705  {
7706  _c4dbgp("it's a seq");
7707  _check_trailing_doc_token();
7708  _maybe_begin_doc();
7709  m_evt_handler->begin_seq_val_flow();
7710  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7711  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7712  m_doc_empty = false;
7713  _line_progressed(1);
7714  }
7715  else if(rem.begins_with('{'))
7716  {
7717  _c4dbgp("it's a map");
7718  _check_trailing_doc_token();
7719  _maybe_begin_doc();
7720  m_evt_handler->begin_map_val_flow();
7721  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7722  m_doc_empty = false;
7723  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7724  _line_progressed(1);
7725  }
7726  else if(_handle_bom())
7727  {
7728  _c4dbgp("byte order mark");
7729  }
7730  else
7731  {
7732  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
7733  _maybe_skip_whitespace_tokens();
7734  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7735  if(!s.len)
7736  return;
7737  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7738  const char first = s.str[0];
7739  ScannedScalar sc;
7740  if(first == '"')
7741  {
7742  _c4dbgp("runk_json: scanning double-quoted scalar");
7743  _check_trailing_doc_token();
7744  _maybe_begin_doc();
7745  add_flags(RDOC);
7746  m_doc_empty = false;
7747  sc = _scan_scalar_dquot();
7748  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7749  if(!_maybe_scan_following_colon())
7750  {
7751  _c4dbgp("runk_json: set as val");
7752  _handle_annotations_before_blck_val_scalar();
7753  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7754  }
7755  else
7756  {
7757  _c4err("parse error");
7758  }
7759  }
7760  else if(_scan_scalar_plain_unk(&sc))
7761  {
7762  _c4dbgp("runk_json: got a plain scalar");
7763  _check_trailing_doc_token();
7764  _maybe_begin_doc();
7765  add_flags(RDOC);
7766  m_doc_empty = false;
7767  if(!_maybe_scan_following_colon())
7768  {
7769  _c4dbgp("runk_json: set as val");
7770  _handle_annotations_before_blck_val_scalar();
7771  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7772  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7773  }
7774  else
7775  {
7776  _c4err("parse error"); // LCOV_EXCL_LINE
7777  }
7778  }
7779  else
7780  {
7781  _c4err("parse error"); // LCOV_EXCL_LINE
7782  }
7783  }
7784 }
7785 
7786 
7787 //-----------------------------------------------------------------------------
7788 
7789 template<class EventHandler>
7790 void ParseEngine<EventHandler>::_handle_unk()
7791 {
7792  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7793 
7794  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7795  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7796 
7797  _maybe_skipchars(' ');
7798  _maybe_skip_comment();
7799 
7800  if(!m_evt_handler->m_curr->line_contents.rem.len)
7801  return;
7802 
7803  _c4dbgpf("runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7804 
7805  if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7806  {
7807  _c4dbgpf("runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7808  _c4dbgp("runk: check BOM");
7809  if(_handle_bom())
7810  {
7811  m_bom_line = m_evt_handler->m_curr->pos.line;
7812  _c4dbgpf("runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7813  return;
7814  }
7815  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7816  _c4dbgpf("runk: rtop: first={}", _c4prc(first));
7817  if(first == '-')
7818  {
7819  _c4dbgp("runk: rtop: suspecting doc");
7820  if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7821  {
7822  _c4dbgp("runk: rtop: begin doc");
7823  _maybe_end_doc();
7824  _begin2_doc_expl();
7825  _set_indentation(0);
7826  addrem_flags(RDOC|RUNK, NDOC);
7827  _line_progressed(3u);
7828  _maybe_skip_whitespace_tokens();
7829  return;
7830  }
7831  }
7832  else if(first == '.')
7833  {
7834  _c4dbgp("runk: rtop: suspecting doc end");
7835  if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7836  {
7837  _c4dbgp("runk: rtop: end doc");
7838  if(has_any(RDOC))
7839  {
7840  _end2_doc_expl();
7841  }
7842  else
7843  {
7844  _c4dbgp("runk: rtop: ignore end doc");
7845  }
7846  addrem_flags(NDOC|RUNK, RDOC);
7847  _line_progressed(3u);
7848  _maybe_skip_whitespace_tokens();
7849  _check_doc_end_tokens();
7850  return;
7851  }
7852  }
7853  else if(first == '%')
7854  {
7855  _c4dbgpf("directive: {}", m_evt_handler->m_curr->line_contents.rem);
7856  if(C4_UNLIKELY(has_any(RDOC) || (!m_doc_empty && has_none(NDOC))))
7857  _c4err("need document footer before directives");
7858  _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7859  return;
7860  }
7861  }
7862 
7863  /* no else-if! */
7864 
7865  size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7866  size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7867  if(m_bom_len)
7868  {
7869  _c4dbgpf("runk: prev BOMlen={}", m_bom_len);
7870  if(m_evt_handler->m_curr->pos.line == m_bom_line)
7871  {
7872  _c4dbgpf("runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7873  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7874  remindent -= m_bom_len;
7875  }
7876  else
7877  {
7878  m_bom_len = 0;
7879  }
7880  }
7881 
7882  size_t startcol = _handle_block_skip_leading_whitespace();
7883  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7884 
7885  if(first == '[')
7886  {
7887  _c4dbgp("runk: flow seq?");
7888  _handle_unk_begin_doc();
7889  if(C4_LIKELY( ! _annotations_require_key_container()))
7890  {
7891  _c4dbgp("runk: it's a seq, flow");
7892  _handle_annotations_before_blck_val_scalar();
7893  m_evt_handler->begin_seq_val_flow();
7894  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7895  _set_indentation(0);
7896  }
7897  else
7898  {
7899  _c4dbgp("runk: start new block map, set flow seq as key (!)");
7900  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7901  m_evt_handler->begin_map_val_block();
7902  addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7903  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7904  m_evt_handler->begin_seq_key_flow();
7905  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKEY);
7906  _set_indentation(0);
7907  }
7908  _line_progressed(1);
7909  }
7910  else if(first == '{')
7911  {
7912  _c4dbgp("runk: flow map?");
7913  _handle_unk_begin_doc();
7914  if(C4_LIKELY( ! _annotations_require_key_container()))
7915  {
7916  _c4dbgp("runk: it's a map, flow");
7917  _handle_annotations_before_blck_val_scalar();
7918  m_evt_handler->begin_map_val_flow();
7919  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7920  _set_indentation(0);
7921  }
7922  else
7923  {
7924  _c4dbgp("runk: start new block map, set flow map as key (!)");
7925  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7926  m_evt_handler->begin_map_val_block();
7927  addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7928  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7929  m_evt_handler->begin_map_key_flow();
7930  addrem_flags(RMAP|RFLOW, RBLCK);
7931  _set_indentation(0);
7932  }
7933  _line_progressed(1);
7934  }
7935  else if(first == '-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7936  {
7937  _c4dbgp("runk: it's a seq, block");
7938  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7939  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7940  _handle_unk_begin_doc();
7941  _handle_annotations_before_blck_val_scalar();
7942  m_evt_handler->begin_seq_val_block();
7943  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7944  _set_indentation(startindent);
7945  _line_progressed(1);
7946  _maybe_skipchars(' ');
7947  }
7948  else if(first == '?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7949  {
7950  _c4dbgp("runk: it's a map + this key is complex");
7951  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7952  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7953  _handle_block_check_leading_tabs(startcol);
7954  _handle_unk_begin_doc();
7955  _handle_annotations_before_blck_val_scalar();
7956  m_evt_handler->begin_map_val_block();
7957  addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
7958  _set_indentation(startindent);
7959  _line_progressed(1);
7960  _maybe_skipchars(' ');
7961  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7962  {
7963  _c4dbgp("runk: seqblck key starts after ?");
7964  addrem_flags(RKCL, QMRK);
7965  m_evt_handler->begin_seq_key_block();
7966  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7967  _save_indentation();
7968  _line_progressed(1);
7969  _maybe_skipchars(' ');
7970  }
7971  }
7972  else if(first == ':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7973  {
7974  if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7975  {
7976  _c4dbgp("runk: it's a map with an empty key");
7977  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7978  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7979  _handle_block_check_leading_tabs(startcol);
7980  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7981  _handle_unk_begin_doc();
7982  _handle_annotations_before_start_mapblck(startline);
7983  _handle_colon();
7984  m_evt_handler->begin_map_val_block();
7985  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7986  m_evt_handler->set_key_scalar_plain_empty();
7987  _set_indentation(startindent);
7988  }
7989  else
7990  {
7991  _c4err("block colon cannot occur on a new line unless ? is used");
7992  }
7993  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7994  _line_progressed(1);
7995  _maybe_skip_whitespace_tokens();
7996  }
7997  else if(first == '&')
7998  {
7999  csubstr anchor = _scan_anchor();
8000  _c4dbgpf("anchor! {}", _prs(anchor));
8001  const size_t line = m_evt_handler->m_curr->pos.line;
8002  _handle_unk_begin_doc();
8003  _add_annotation(&m_pending_anchors, anchor, remindent, line);
8004  _set_indentation(0);
8005  }
8006  else if(first == '*')
8007  {
8008  csubstr ref = _scan_ref_map();
8009  _c4dbgpf("runk: ref! {}", _prs(ref));
8010  _handle_unk_begin_doc();
8011  if(!_maybe_scan_following_colon())
8012  {
8013  _c4dbgp("runk: set val ref");
8014  _handle_valref(ref);
8015  }
8016  else
8017  {
8018  _c4dbgp("runk: start new block map, set ref as key");
8019  _handle_block_check_leading_tabs(startcol);
8020  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8021  _handle_annotations_before_start_mapblck(startline);
8022  m_evt_handler->begin_map_val_block();
8023  _handle_keyref(ref);
8024  _maybe_skip_whitespace_tokens();
8025  _set_indentation(0);
8026  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8027  }
8028  }
8029  else if(first == '!')
8030  {
8031  csubstr tag_orig;
8032  csubstr tag = _scan_tag(&tag_orig);
8033  _c4dbgpf("runk: val tag! {}", _prs(tag));
8034  // we need to buffer the tags, as there may be two
8035  // consecutive tags in here
8036  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8037  const size_t line = m_evt_handler->m_curr->pos.line;
8038  _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8039  }
8040  else
8041  {
8042  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8043  const size_t startscalar = _handle_block_get_whitespace_mark();
8044  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8045  auto beginmap = [&](size_t startindent_){
8046  if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8047  _c4err("multiline scalars cannot be used as implicit keys");
8048  _handle_block_check_leading_tabs(startcol, startscalar);
8049  _handle_annotations_before_start_mapblck(startline);
8050  _handle_colon();
8051  m_evt_handler->begin_map_val_block();
8052  _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8053  };
8054  auto after_beginmap = [&](size_t startindent_){
8055  _maybe_skip_whitespace_tokens();
8056  _set_indentation(startindent_);
8057  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8058  };
8059  if(first == '|')
8060  {
8061  _c4dbgp("runk: block-literal scalar");
8062  _handle_unk_begin_doc();
8063  ScannedBlock sb;
8064  _scan_block(&sb, startindent);
8065  _handle_annotations_before_blck_val_scalar();
8066  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8067  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8068  }
8069  else if(first == '>')
8070  {
8071  _c4dbgp("runk: block-folded scalar");
8072  _handle_unk_begin_doc();
8073  ScannedBlock sb;
8074  _scan_block(&sb, startindent);
8075  _handle_annotations_before_blck_val_scalar();
8076  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8077  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8078  }
8079  else if(first == '\'')
8080  {
8081  _c4dbgp("runk: single-quoted scalar");
8082  _handle_unk_begin_doc();
8083  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8084  size_t col = m_evt_handler->m_curr->pos.col;
8085  ScannedScalar sc = _scan_scalar_squot();
8086  if(!_maybe_scan_following_colon())
8087  {
8088  _c4dbgp("runk: set as val");
8089  _handle_annotations_before_blck_val_scalar();
8090  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8091  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8092  }
8093  else
8094  {
8095  _c4dbgp("runk: start new block map, set single-quoted scalar as key");
8096  if(!firsttoken)
8097  startindent = _handle_unk_check_left_tokens(startindent, col);
8098  beginmap(startindent);
8099  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8100  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8101  after_beginmap(startindent);
8102  }
8103  }
8104  else if(first == '"')
8105  {
8106  _c4dbgp("runk: double-quoted scalar");
8107  _handle_unk_begin_doc();
8108  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8109  size_t col = m_evt_handler->m_curr->pos.col;
8110  ScannedScalar sc = _scan_scalar_dquot();
8111  if(!_maybe_scan_following_colon())
8112  {
8113  _c4dbgp("runk: set as val");
8114  _handle_annotations_before_blck_val_scalar();
8115  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8116  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8117  }
8118  else
8119  {
8120  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
8121  if(!firsttoken)
8122  startindent = _handle_unk_check_left_tokens(startindent, col);
8123  beginmap(startindent);
8124  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8125  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8126  after_beginmap(startindent);
8127  }
8128  }
8129  else
8130  {
8131  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8132  size_t col = m_evt_handler->m_curr->pos.col;
8133  ScannedScalar sc;
8134  if(_scan_scalar_plain_unk(&sc))
8135  {
8136  _c4dbgp("runk: plain scalar");
8137  _handle_unk_begin_doc();
8138  if(!_maybe_scan_following_colon())
8139  {
8140  _c4dbgp("runk: set as val");
8141  _handle_annotations_before_blck_val_scalar();
8142  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8143  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8144  }
8145  else
8146  {
8147  _c4dbgp("runk: start new block map, set plain scalar as key");
8148  if(!firsttoken)
8149  startindent = _handle_unk_check_left_tokens(startindent, col);
8150  beginmap(startindent);
8151  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8152  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8153  after_beginmap(startindent);
8154  }
8155  }
8156  else
8157  {
8158  _c4err("parse error"); // LCOV_EXCL_LINE
8159  }
8160  }
8161  }
8162 }
8163 
8164 template<class EventHandler>
8165 void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8166 {
8167  _c4dbgp("runk: begin doc");
8168  _check_trailing_doc_token();
8169  _maybe_begin_doc();
8170  add_flags(RDOC);
8171  m_doc_empty = false;
8172 }
8173 
8174 template<class EventHandler>
8175 size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(size_t realindent, size_t col, bool skip_annotations)
8176 {
8177  _c4assert(col >= 1);
8178  col -= 1;
8179  _c4assert(col >= m_bom_len);
8180  csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8181  size_t pos = 0;
8182  _c4dbgpf("runk: check left tokens: s={}", _prs(s, /*escape*/true));
8183  if(skip_annotations)
8184  {
8185  _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8186  _c4dbgpf("runk: skip annotations: realindent={} pos={}", realindent, pos);
8187  }
8188  size_t firstns = s.first_not_of(' ', pos);
8189  if(firstns == npos)
8190  firstns = s.len;
8191  _c4dbgpf("runk: check left tokens:\n"
8192  " tokens={} skipped={}\n"
8193  " bomlen={} first={} col={}\n"
8194  " (bomlen+first)={} vs {}=col\n"
8195  " startindent={} lineindent={}"
8196  , _prs(s, /*escape*/true), _prs(s.sub(firstns), /*escape*/true)
8197  , m_bom_len, firstns, col
8198  , m_bom_len+firstns, col,
8199  realindent, m_evt_handler->m_curr->line_contents.indentation);
8200  if(m_bom_len + firstns != col)
8201  _c4err("parse error");
8202  if(!skip_annotations)
8203  realindent = firstns;
8204  _c4dbgpf("runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8205  return realindent;
8206 }
8207 
8208 
8209 /** skip annotations which are pending on the same line */
8210 template<class EventHandler>
8211 void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s, size_t *indent, size_t *first_non_token_pos)
8212 {
8213  csubstr first, second;
8214  uint32_t total = _get_annotations_same_line(s, &first, &second);
8215  _c4dbgpf("runk: before skip: {}", _prs(s, true));
8216  size_t pos = s.first_not_of(" \t");
8217  if(pos == npos)
8218  pos = s.len;
8219  if(!total)
8220  {
8221  *indent = *first_non_token_pos = pos;
8222  return;
8223  }
8224  _c4assert(!s.sub(pos).begins_with_any(" \t"));
8225  _c4dbgpf("runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos), true));
8226  _c4dbgpf("runk: first annotation: {}", first);
8227  _c4assert(first.len);
8228  _c4assert(first.is_sub(s));
8229  _c4assert(first.is_sub(s.sub(pos)));
8230  _c4assert(s.sub(pos).begins_with(first));
8231  *indent = pos;
8232  pos += first.len;
8233  _c4dbgpf("runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos), true));
8234  if(total > 1)
8235  {
8236  _c4dbgpf("runk: second annotation: {}", second);
8237  _c4assert(total == 2);
8238  _c4assert(second.len);
8239  _c4assert(second.is_sub(s));
8240  _c4assert(second.is_sub(s.sub(pos)));
8241  csubstr spos = s.sub(pos);
8242  size_t more = spos.first_not_of(" \t");
8243  _c4assert(more != npos); // because the annotations are on the same line
8244  _c4dbgpf("runk: next nonspace: {}", pos + more);
8245  pos += more;
8246  _c4dbgpf("runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos), true));
8247  _c4assert(s.sub(pos).begins_with(second));
8248  pos += second.len;
8249  _c4dbgpf("runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos), true));
8250  }
8251  *first_non_token_pos = pos;
8252 }
8253 
8254 
8255 template<class EventHandler>
8256 uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_) const
8257 {
8258  _c4assert(!m_evt_handler->m_curr->at_first_token());
8259  (void)token_soup;
8260  using EntryPtr = typename Annotation::Entry const* C4_RESTRICT;
8261  EntryPtr first = nullptr;
8262  EntryPtr second = nullptr;
8263  uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8264  if(total)
8265  {
8266  _c4dbgpf("there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8267  auto valid_if_same_line = [this](EntryPtr entry){
8268  _c4dbgpf("pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8269  return (entry->line == m_evt_handler->m_curr->pos.line) ? entry : nullptr;
8270  };
8271  // now select annotations only on the same line
8272  total = 0;
8273  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8274  total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8275  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8276  total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8277  _c4dbgpf("{} annotations on same line", total);
8278  _c4assert(total > 0); // because this function is only called
8279  // while not at the first token. That
8280  // means we must have same-line
8281  // annotations.
8282  auto get_first_on_same_line = [this](EntryPtr not_this_one){
8283  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8284  if(&m_pending_anchors.annotations[i] != not_this_one
8285  && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8286  return &m_pending_anchors.annotations[i];
8287  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8288  if(&m_pending_tags.annotations[i] != not_this_one
8289  && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8290  return &m_pending_tags.annotations[i];
8291  return (EntryPtr)nullptr; // LCOV_EXCL_LINE
8292  };
8293  _c4assert(total >= 1);
8294  // assign to first
8295  first = get_first_on_same_line(nullptr);
8296  _c4assert(first);
8297  _c4dbgpf("first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8298  if(total > 1)
8299  {
8300  _c4assert(total == 2);
8301  // assign to second
8302  second = get_first_on_same_line(first);
8303  _c4assert(second);
8304  _c4dbgpf("second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8305  }
8306  auto extract_string = [&](EntryPtr e){
8307  // tags can be null when the arena ran out of space
8308  if(!e->str.str || e->str.begins_with_any("!<"))
8309  {
8310  csubstr tag = e->orig;
8311  _c4assert(tag.str);
8312  _c4assert(tag.len);
8313  _c4assert(tag.is_sub(token_soup));
8314  _c4dbgpf("tag: {} -> {}", _maybe_null_str(e->str), tag);
8315  return tag;
8316  }
8317  csubstr anchor = e->str;
8318  _c4assert(anchor.len);
8319  _c4assert(anchor.str);
8320  _c4assert(anchor.is_sub(token_soup));
8321  _c4assert(!anchor.begins_with('&'));
8322  _c4assert(anchor.str - token_soup.str > 0);
8323  // add back the anchor's &
8324  --anchor.str;
8325  ++anchor.len;
8326  _c4assert(anchor.begins_with('&'));
8327  _c4dbgpf("anchor: {} -> {}", e->str, anchor);
8328  return anchor;
8329  };
8330  *first_ = first ? extract_string(first) : nullptr;
8331  *second_ = second ? extract_string(second) : nullptr;
8332  if(total > 1 && (first_->str > second_->str))
8333  {
8334  csubstr tmp = *first_;
8335  *first_ = *second_;
8336  *second_ = tmp;
8337  _c4dbgpf("swap first and second: {} -> {}", *first_, *second_);
8338  }
8339  }
8340  return total;
8341 }
8342 
8343 
8344 //-----------------------------------------------------------------------------
8345 
8346 template<class EventHandler>
8347 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
8348 {
8349  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8350 
8351  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW), m_evt_handler->m_curr->pos);
8352 
8353  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8354  if(has_any(RNXT))
8355  {
8356  _c4dbgp("usty[RNXT]: finishing!");
8357  _end_stream();
8358  }
8359  #endif
8360 
8361  _maybe_skip_comment();
8362  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8363  if(!rem.len)
8364  return;
8365 
8366  size_t pos = rem.first_not_of(" \t");
8367  if(pos)
8368  {
8369  pos = pos != npos ? pos : rem.len;
8370  _c4dbgpf("skipping indentation of {}", pos);
8371  _line_progressed(pos);
8372  rem = m_evt_handler->m_curr->line_contents.rem;
8373  if(!rem.len)
8374  return;
8375  _c4dbgpf("rem is now {}", _prs(rem));
8376  }
8377 
8378  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8379  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8380  char first = rem.str[0];
8381  if(has_any(RSEQ)) // destination is a sequence
8382  {
8383  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP), m_evt_handler->m_curr->pos);
8384  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
8385  if(first == '[')
8386  {
8387  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
8388  add_flags(RNXT);
8389  m_evt_handler->_push();
8390  addrem_flags(RFLOW|RVAL, RNXT|USTY);
8391  _set_indentation(startindent);
8392  _line_progressed(1);
8393  _maybe_skip_whitespace_tokens();
8394  }
8395  else if(first == '-' && _is_blck_token(rem))
8396  {
8397  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
8398  add_flags(RNXT);
8399  m_evt_handler->_push();
8400  addrem_flags(RBLCK|RVAL, RNXT|USTY);
8401  _set_indentation(startindent);
8402  _line_progressed(1);
8403  _maybe_skip_whitespace_tokens();
8404  }
8405  else
8406  {
8407  _c4err("can only parse a seq into an existing seq");
8408  }
8409  }
8410  else if(has_any(RMAP)) // destination is a map
8411  {
8412  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8413  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
8414  if(first == '{')
8415  {
8416  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
8417  add_flags(RNXT);
8418  _handle_annotations_before_blck_val_scalar();
8419  m_evt_handler->_push();
8420  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8421  _set_indentation(startindent);
8422  _line_progressed(1);
8423  _maybe_skip_whitespace_tokens();
8424  }
8425  else if(first == '?' && _is_blck_token(rem))
8426  {
8427  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
8428  add_flags(RNXT);
8429  _handle_annotations_before_blck_val_scalar();
8430  m_evt_handler->_push();
8431  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8432  _save_indentation();
8433  _line_progressed(1);
8434  _maybe_skip_whitespace_tokens();
8435  }
8436  else if(first == ':' && _is_blck_token(rem))
8437  {
8438  _c4dbgp("usty[RMAP]: it's a map with an empty key");
8439  add_flags(RNXT);
8440  _handle_annotations_before_blck_val_scalar();
8441  m_evt_handler->_push();
8442  m_evt_handler->set_key_scalar_plain_empty();
8443  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8444  _save_indentation();
8445  _line_progressed(1);
8446  _maybe_skip_whitespace_tokens();
8447  }
8448  else if(rem.begins_with('&'))
8449  {
8450  csubstr anchor = _scan_anchor();
8451  _c4dbgpf("usty[RMAP]: anchor! {}", _prs(anchor));
8452  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8453  const size_t line = m_evt_handler->m_curr->pos.line;
8454  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8455  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8456  }
8457  else if(first == '*')
8458  {
8459  csubstr ref = _scan_ref_map();
8460  _c4dbgpf("usty[RMAP]: ref! {}", _prs(ref));
8461  if(!_maybe_scan_following_colon())
8462  {
8463  _c4err("cannot read a VAL to a map");
8464  }
8465  else
8466  {
8467  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
8468  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8469  add_flags(RNXT);
8470  _handle_annotations_before_start_mapblck(startline);
8471  m_evt_handler->_push();
8472  _handle_keyref(ref);
8473  _maybe_skip_whitespace_tokens();
8474  _set_indentation(startindent);
8475  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8476  }
8477  }
8478  else if(first == '!')
8479  {
8480  csubstr tag = _scan_tag();
8481  _c4dbgpf("usty[RMAP]: val tag! {}", _prs(tag));
8482  // we need to buffer the tags, as there may be two
8483  // consecutive tags in here
8484  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8485  const size_t line = m_evt_handler->m_curr->pos.line;
8486  _add_annotation(&m_pending_tags, tag, indentation, line);
8487  }
8488  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
8489  {
8490  _c4err("cannot parse a seq into an existing map");
8491  }
8492  else
8493  {
8494  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8495  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8496  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8497  ScannedScalar sc;
8498  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8499  if(first == '\'')
8500  {
8501  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
8502  sc = _scan_scalar_squot();
8503  if(!_maybe_scan_following_colon())
8504  {
8505  _c4err("cannot read a VAL to a map");
8506  }
8507  else
8508  {
8509  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8510  add_flags(RNXT);
8511  _handle_annotations_before_start_mapblck(startline);
8512  m_evt_handler->_push();
8513  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8514  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8515  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8516  _set_indentation(startindent);
8517  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8518  _maybe_skip_whitespace_tokens();
8519  }
8520  }
8521  else if(first == '"')
8522  {
8523  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
8524  sc = _scan_scalar_dquot();
8525  if(!_maybe_scan_following_colon())
8526  {
8527  _c4err("cannot read a VAL to a map");
8528  }
8529  else
8530  {
8531  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
8532  add_flags(RNXT);
8533  _handle_annotations_before_start_mapblck(startline);
8534  m_evt_handler->_push();
8535  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8536  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8537  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8538  _set_indentation(startindent);
8539  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8540  _maybe_skip_whitespace_tokens();
8541  }
8542  }
8543  else if(first == '|')
8544  {
8545  _c4err("block literal keys must be enclosed in '?'");
8546  }
8547  else if(first == '>')
8548  {
8549  _c4err("block literal keys must be enclosed in '?'");
8550  }
8551  else if(_scan_scalar_plain_unk(&sc))
8552  {
8553  _c4dbgp("usty[RMAP]: got a plain scalar");
8554  if(!_maybe_scan_following_colon())
8555  {
8556  _c4err("cannot read a VAL to a map");
8557  }
8558  else
8559  {
8560  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8561  add_flags(RNXT);
8562  _handle_annotations_before_start_mapblck(startline);
8563  m_evt_handler->_push();
8564  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8565  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8566  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8567  _set_indentation(startindent);
8568  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8569  _maybe_skip_whitespace_tokens();
8570  }
8571  }
8572  else
8573  {
8574  _c4err("parse error"); // LCOV_EXCL_LINE
8575  }
8576  }
8577  }
8578  else // destination is unknown
8579  {
8580  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8581  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8582  if(first == '[')
8583  {
8584  _c4dbgp("usty[UNK]: it's a flow seq");
8585  add_flags(RNXT);
8586  _handle_annotations_before_blck_val_scalar();
8587  m_evt_handler->begin_seq_val_flow();
8588  addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8589  _set_indentation(startindent);
8590  _line_progressed(1);
8591  _maybe_skip_whitespace_tokens();
8592  }
8593  else if(first == '-' && _is_blck_token(rem))
8594  {
8595  _c4dbgp("usty[UNK]: it's a block seq");
8596  add_flags(RNXT);
8597  _handle_annotations_before_blck_val_scalar();
8598  m_evt_handler->begin_seq_val_block();
8599  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8600  _set_indentation(startindent);
8601  _line_progressed(1);
8602  _maybe_skip_whitespace_tokens();
8603  }
8604  else if(first == '{')
8605  {
8606  _c4dbgp("usty[UNK]: it's a flow map");
8607  add_flags(RNXT);
8608  _handle_annotations_before_blck_val_scalar();
8609  m_evt_handler->begin_map_val_flow();
8610  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8611  _set_indentation(startindent);
8612  _line_progressed(1);
8613  _maybe_skip_whitespace_tokens();
8614  }
8615  else if(first == '?' && _is_blck_token(rem))
8616  {
8617  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8618  add_flags(RNXT);
8619  _handle_annotations_before_blck_val_scalar();
8620  m_evt_handler->begin_map_val_block();
8621  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8622  _save_indentation();
8623  _line_progressed(1);
8624  _maybe_skip_whitespace_tokens();
8625  }
8626  else if(first == ':' && _is_blck_token(rem))
8627  {
8628  _c4dbgp("usty[UNK]: it's a map with an empty key");
8629  add_flags(RNXT);
8630  _handle_annotations_before_blck_val_scalar();
8631  m_evt_handler->begin_map_val_block();
8632  m_evt_handler->set_key_scalar_plain_empty();
8633  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8634  _save_indentation();
8635  _line_progressed(1);
8636  _maybe_skip_whitespace_tokens();
8637  }
8638  else if(first == '&')
8639  {
8640  csubstr anchor = _scan_anchor();
8641  _c4dbgpf("usty[UNK]: anchor! {}", _prs(anchor));
8642  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8643  const size_t line = m_evt_handler->m_curr->pos.line;
8644  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8645  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8646  }
8647  else if(first == '*')
8648  {
8649  csubstr ref = _scan_ref_map();
8650  _c4dbgpf("usty[UNK]: ref! {}", _prs(ref));
8651  if(!_maybe_scan_following_colon())
8652  {
8653  _c4dbgp("usty[UNK]: set val ref");
8654  _handle_valref(ref);
8655  }
8656  else
8657  {
8658  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8659  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8660  add_flags(RNXT);
8661  _handle_annotations_before_start_mapblck(startline);
8662  m_evt_handler->begin_map_val_block();
8663  _handle_keyref(ref);
8664  _maybe_skip_whitespace_tokens();
8665  _set_indentation(startindent);
8666  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8667  }
8668  }
8669  else if(first == '!')
8670  {
8671  csubstr tag = _scan_tag();
8672  _c4dbgpf("usty[UNK]: val tag! {}", _prs(tag));
8673  // we need to buffer the tags, as there may be two
8674  // consecutive tags in here
8675  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8676  const size_t line = m_evt_handler->m_curr->pos.line;
8677  _add_annotation(&m_pending_tags, tag, indentation, line);
8678  }
8679  else
8680  {
8681  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8682  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8683  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8684  first = rem.str[0];
8685  ScannedScalar sc;
8686  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8687  if(first == '\'')
8688  {
8689  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8690  sc = _scan_scalar_squot();
8691  if(!_maybe_scan_following_colon())
8692  {
8693  _c4dbgp("usty[UNK]: set as val");
8694  _handle_annotations_before_blck_val_scalar();
8695  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8696  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8697  _end_stream();
8698  }
8699  else
8700  {
8701  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8702  add_flags(RNXT);
8703  _handle_annotations_before_start_mapblck(startline);
8704  m_evt_handler->begin_map_val_block();
8705  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8706  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8707  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8708  _set_indentation(startindent);
8709  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8710  _maybe_skip_whitespace_tokens();
8711  }
8712  }
8713  else if(first == '"')
8714  {
8715  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8716  sc = _scan_scalar_dquot();
8717  if(!_maybe_scan_following_colon())
8718  {
8719  _c4dbgp("usty[UNK]: set as val");
8720  _handle_annotations_before_blck_val_scalar();
8721  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8722  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8723  _end_stream();
8724  }
8725  else
8726  {
8727  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8728  add_flags(RNXT);
8729  _handle_annotations_before_start_mapblck(startline);
8730  m_evt_handler->begin_map_val_block();
8731  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8732  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8733  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8734  _set_indentation(startindent);
8735  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8736  _maybe_skip_whitespace_tokens();
8737  }
8738  }
8739  else if(first == '|')
8740  {
8741  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8742  ScannedBlock sb;
8743  _scan_block(&sb, startindent);
8744  _c4dbgp("usty[UNK]: set as val");
8745  _handle_annotations_before_blck_val_scalar();
8746  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8747  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8748  _end_stream();
8749  }
8750  else if(first == '>')
8751  {
8752  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8753  ScannedBlock sb;
8754  _scan_block(&sb, startindent);
8755  _c4dbgp("usty[UNK]: set as val");
8756  _handle_annotations_before_blck_val_scalar();
8757  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8758  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8759  _end_stream();
8760  }
8761  else if(_scan_scalar_plain_unk(&sc))
8762  {
8763  _c4dbgp("usty[UNK]: got a plain scalar");
8764  if(!_maybe_scan_following_colon())
8765  {
8766  _c4dbgp("usty[UNK]: set as val");
8767  _handle_annotations_before_blck_val_scalar();
8768  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8769  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8770  _end_stream();
8771  }
8772  else
8773  {
8774  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8775  add_flags(RNXT);
8776  _handle_annotations_before_start_mapblck(startline);
8777  m_evt_handler->begin_map_val_block();
8778  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8779  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8780  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8781  _set_indentation(startindent);
8782  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8783  _maybe_skip_whitespace_tokens();
8784  }
8785  }
8786  else
8787  {
8788  _c4err("parse error"); // LCOV_EXCL_LINE
8789  }
8790  }
8791  }
8792 }
8793 
8794 
8795 //-----------------------------------------------------------------------------
8796 
8797 template<class EventHandler>
8798 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8799 {
8800  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8801  _RYML_SAVE_TEST_JSON(filename, src);
8802  m_evt_handler->start_parse(filename.str, src);
8803  m_evt_handler->begin_stream();
8804  _reset();
8805  while( ! _finished_file())
8806  {
8807  _scan_line();
8808  while( ! _finished_line())
8809  {
8810  _c4dbgnextline();
8811  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8812  if(has_any(RSEQ))
8813  {
8814  _handle_seq_json();
8815  }
8816  else if(has_any(RMAP))
8817  {
8818  _handle_map_json();
8819  }
8820  else if(has_any(RUNK))
8821  {
8822  _handle_unk_json();
8823  }
8824  else
8825  {
8826  _c4err("internal error"); // LCOV_EXCL_LINE
8827  }
8828  }
8829  if(_finished_file())
8830  break; // it may have finished because of multiline blocks
8831  _line_ended();
8832  }
8833  _end_stream();
8834  m_evt_handler->finish_parse();
8835 }
8836 
8837 
8838 //-----------------------------------------------------------------------------
8839 
8840 template<class EventHandler>
8841 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8842 {
8843  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8844  _RYML_SAVE_TEST_YAML(filename, src);
8845  m_evt_handler->start_parse(filename.str, src);
8846  m_evt_handler->begin_stream();
8847  _reset();
8848  while( ! _finished_file())
8849  {
8850  _scan_line();
8851  while( ! _finished_line())
8852  {
8853  _c4dbgnextline();
8854  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8855  if(has_any(RFLOW))
8856  {
8857  if(has_none(RSEQIMAP))
8858  {
8859  if(has_any(RSEQ))
8860  {
8861  _handle_seq_flow();
8862  }
8863  else
8864  {
8865  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8866  _handle_map_flow();
8867  }
8868  }
8869  else
8870  {
8871  _handle_seq_imap();
8872  }
8873  }
8874  else if(has_any(RBLCK))
8875  {
8876  if(has_any(RSEQ))
8877  {
8878  _handle_seq_block();
8879  }
8880  else
8881  {
8882  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8883  _handle_map_block();
8884  }
8885  }
8886  else if(has_any(RUNK))
8887  {
8888  _handle_unk();
8889  }
8890  else if(has_any(USTY))
8891  {
8892  _handle_usty();
8893  }
8894  else
8895  {
8896  _c4err("internal error"); // LCOV_EXCL_LINE
8897  }
8898  }
8899  if(_finished_file())
8900  break; // it may have finished because of multiline blocks
8901  _line_ended();
8902  }
8903  _end_stream();
8904  m_evt_handler->finish_parse();
8905 }
8906 /** @endcond */
8907 
8908 } // namespace yml
8909 } // namespace c4
8910 
8911 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
8912 
8913 #undef _c4dbgnextline
8914 #undef _c4assert
8915 #undef _c4err
8916 
8917 #if defined(_MSC_VER)
8918 # pragma warning(pop)
8919 #elif defined(__clang__)
8920 # pragma clang diagnostic pop
8921 #elif defined(__GNUC__)
8922 # pragma GCC diagnostic pop
8923 #endif
8924 
8925 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:28
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:192
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1546
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition: common.cpp:210
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:889
bool is_valid_tag_handle(csubstr handle)
Definition: tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition: tag.cpp:9
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ npos
a null string position
Definition: common.hpp:258
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
@ UTF16BE
UTF16, Big-Endian.
Definition: common.hpp:266
@ UTF8
UTF8.
Definition: common.hpp:264
@ UTF16LE
UTF16, Little-Endian.
Definition: common.hpp:265
@ NOBOM
No Byte Order Mark was found.
Definition: common.hpp:263
@ UTF32BE
UTF32, Big-Endian.
Definition: common.hpp:268
@ UTF32LE
UTF32, Little-Endian.
Definition: common.hpp:267
enum c4::yml::Encoding_ Encoding_e
csubstr version()
Definition: version.cpp:6
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _ryml_relocate(s)
#define _c4err(...)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _c4assert(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
csubstr name
name of the file
Definition: common.hpp:287
Options to give to the parser to control its behavior.
Definition: common.hpp:347
utilities for UTF and Byte Order Mark