rapidyaml  0.12.1
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
4 #ifndef _C4_YML_PARSE_ENGINE_HPP_
6 #endif
7 #ifndef _C4_CHARCONV_HPP_
8 #include "c4/charconv.hpp"
9 #endif
10 #ifndef C4_UTF_HPP_
11 #include "c4/utf.hpp"
12 #endif
13 #ifndef _C4_YML_FILTER_PROCESSOR_HPP_
15 #endif
16 #ifndef _C4_YML_TAG_HPP_
17 #include "c4/yml/tag.hpp"
18 #endif
19 #ifndef _C4_YML_NODE_TYPE_HPP_
20 #include "c4/yml/node_type.hpp"
21 #endif
22 
23 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24 #include "c4/yml/detail/dbgprint.hpp"
25 #endif
26 
27 #ifdef RYML_DBG
28 #ifndef C4_DUMP_HPP_
29 #include <c4/dump.hpp>
30 #endif
31 #define _c4err(...) \
32  do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
33 #else
34 #define _c4err(...) \
35  this->_err(RYML_LOC_HERE(), __VA_ARGS__)
36 #endif
37 #define _c4assert(...) \
38  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
39 
40 
41 #if defined(RYML_WITH_TAB_TOKENS)
42 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43 #define _RYML_WITHOUT_TAB_TOKENS(...)
44 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
45 #else
46 #define _RYML_WITH_TAB_TOKENS(...)
47 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
49 #endif
50 
51 // helper to export cases to the YAML test suite
52 #ifndef RYML_SAVE_TEST_YAML
53 #define _RYML_SAVE_TEST_YAML(filename, src)
54 #define _RYML_SAVE_TEST_JSON(filename, src)
55 #else
56 #define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57 #define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
58 namespace c4 {
59 namespace yml {
60 void ryml_save_test_yaml(csubstr filename, csubstr src);
61 void ryml_save_test_json(csubstr filename, csubstr src);
62 } // namespace yml
63 } // namespace c4
64 #endif
65 
66 
67 // scaffold:
68 #define _c4dbgnextline() \
69  do { \
70  _c4dbgq("\n-----------"); \
71  _c4dbgt("handling line={}, offset={}B", \
72  m_evt_handler->m_curr->pos.line, \
73  m_evt_handler->m_curr->pos.offset); \
74  } while(0)
75 
76 
77 #if defined(_MSC_VER)
78 # pragma warning(push)
79 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
80 # pragma warning(disable: 4702/*unreachable code*/)
81 #elif defined(__clang__)
82 # pragma clang diagnostic push
83 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
84 # pragma clang diagnostic ignored "-Wformat-nonliteral"
85 # pragma clang diagnostic ignored "-Wold-style-cast"
86 #elif defined(__GNUC__)
87 # pragma GCC diagnostic push
88 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
89 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
90 # pragma GCC diagnostic ignored "-Wold-style-cast"
91 # if __GNUC__ >= 7
92 # pragma GCC diagnostic ignored "-Wduplicated-branches"
93 # endif
94 #endif
95 
96 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
97 
98 namespace c4 {
99 namespace yml {
100 
101 namespace { // NOLINT
102 
103 C4_HOT C4_ALWAYS_INLINE void _set_first(substr &C4_RESTRICT subject, size_t pos) noexcept
104 {
105  // avoids reassigning the ptr in substr
106  subject.len = pos != npos ? pos : subject.len;
107 }
108 C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos) noexcept
109 {
110  // avoids reassigning the ptr in substr
111  subject.len = pos != npos ? pos : subject.len;
112 }
113 C4_HOT C4_ALWAYS_INLINE void _set_first_strict(substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
114 {
115  // avoids reassigning the ptr in substr
116  _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
117  subject.len = pos;
118 }
119 C4_HOT C4_ALWAYS_INLINE void _set_first_strict(csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
120 {
121  // avoids reassigning the ptr in substr
122  _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
123  subject.len = pos;
124 }
125 
126 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) RYML_NOEXCEPT
127 {
128  _RYML_ASSERT_BASIC(s.len > 0);
129  _RYML_ASSERT_BASIC(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
130  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
131 }
132 
133 C4_HOT C4_ALWAYS_INLINE bool _is_blck_seq_token_maybe(csubstr const& C4_RESTRICT s) noexcept
134 {
135  return ((s.len >= 1) && (s.str[0] == '-') && ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t')))));
136 }
137 
138 inline bool _is_doc_begin_token(csubstr s) RYML_NOEXCEPT
139 {
140  _RYML_ASSERT_BASIC(s.begins_with('-'));
141  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
142  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
143  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
144  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
145 }
146 
147 inline bool _is_doc_end_token(csubstr s) RYML_NOEXCEPT
148 {
149  _RYML_ASSERT_BASIC(s.begins_with('.'));
150  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
151  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
152  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
153  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
154 }
155 
156 inline bool _is_doc_token(csubstr s) noexcept
157 {
158  if(s.len >= 3)
159  {
160  switch(s.str[0])
161  {
162  case '-':
163  //return _is_doc_begin_token(s); // this was failing with gcc -O2
164  return (s.str[1] == '-' && s.str[2] == '-')
165  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
166  case '.':
167  //return _is_doc_end_token(s); // this was failing with gcc -O2
168  return (s.str[1] == '.' && s.str[2] == '.')
169  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
170  }
171  }
172  return false;
173 }
174 
175 inline size_t _begins_with_special_json_scalar(csubstr s) RYML_NOEXCEPT
176 {
177  _RYML_ASSERT_BASIC(s.len);
178  switch(s.str[0])
179  {
180  case 'f':
181  return s.begins_with("false") ? 5u : 0u;
182  case 't':
183  return s.begins_with("true") ? 4u : 0u;
184  case 'n':
185  return s.begins_with("null") ? 4u : 0u;
186  }
187  return 0u;
188 }
189 
190 
191 //-----------------------------------------------------------------------------
192 
193 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
194 {
195  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
196 }
197 
198 //! look for the next newline chars, and jump to the right of those
199 inline substr _from_next_line(substr rem)
200 {
201  size_t nlpos = rem.first_of("\r\n");
202  if(nlpos == csubstr::npos)
203  return {};
204  const char nl = rem[nlpos];
205  rem = rem.right_of(nlpos);
206  if(rem.empty())
207  return {};
208  if(_extend_from_combined_newline(nl, rem.front()))
209  rem = rem.sub(1);
210  return rem;
211 }
212 
213 
214 //-----------------------------------------------------------------------------
215 
216 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
217 {
218  _RYML_ASSERT_BASIC(r[*i] == '\n');
219  size_t numnl_following = 0;
220  ++(*i);
221  for( ; *i < r.len; ++(*i))
222  {
223  if(r.str[*i] == '\n')
224  ++numnl_following;
225  // skip leading whitespace
226  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
227  ;
228  else
229  break;
230  }
231  return numnl_following;
232 }
233 
234 /** @p i is set to the first non whitespace character after the line
235  * @return the number of empty lines after the initial position */
236 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
237 {
238  _RYML_ASSERT_BASIC(r[*i] == '\n');
239  size_t numnl_following = 0;
240  ++(*i);
241  if(indentation == 0)
242  {
243  for( ; *i < r.len; ++(*i))
244  {
245  const char c = r.str[*i];
246  if(c == '\n')
247  ++numnl_following;
248  // skip leading whitespace
249  else if(c != ' ' && c != '\t' && c != '\r')
250  break;
251  }
252  }
253  else
254  {
255  for( ; *i < r.len; ++(*i))
256  {
257  char c = r.str[*i];
258  if(c == '\n')
259  {
260  ++numnl_following;
261  // skip the indentation after the newline
262  size_t stop = *i + indentation;
263  for( ; *i < r.len; ++(*i))
264  {
265  c = r.str[*i];
266  if(c != ' ' && c != '\r')
267  break;
268  _RYML_ASSERT_BASIC(*i < stop); // LCOV_EXCL_LINE
269  }
270  C4_UNUSED(stop);
271  }
272  // skip leading whitespace
273  else if(c != ' ' && c != '\t' && c != '\r')
274  {
275  break;
276  }
277  }
278  }
279  return numnl_following;
280 }
281 
282 } // anon namespace
283 
284 
285 //-----------------------------------------------------------------------------
286 //-----------------------------------------------------------------------------
287 //-----------------------------------------------------------------------------
288 
289 template<class EventHandler>
291 {
292  _free();
293  _clr();
294 }
295 
296 template<class EventHandler>
298  : m_options(opts)
299  , m_evt_handler(evt_handler)
300  , m_pending_anchors()
301  , m_pending_tags()
302  , m_has_directives_yaml(false)
303  , m_has_directives(false)
304  , m_doc_empty(true)
305  , m_prev_colon(npos)
306  , m_prev_val_end(npos)
307  , m_encoding(NOBOM)
308  , m_newline_offsets()
309  , m_newline_offsets_size(0)
310  , m_newline_offsets_capacity(0)
311 {
312  _RYML_CHECK_BASIC(evt_handler);
313 }
314 
315 template<class EventHandler>
317  : m_options(that.m_options)
318  , m_evt_handler(that.m_evt_handler)
319  , m_pending_anchors(that.m_pending_anchors)
320  , m_pending_tags(that.m_pending_tags)
321  , m_has_directives_yaml(that.m_has_directives_yaml)
322  , m_has_directives(that.m_has_directives)
323  , m_doc_empty(that.m_doc_empty)
324  , m_prev_colon(npos)
325  , m_prev_val_end(npos)
326  , m_encoding(NOBOM)
327  , m_newline_offsets(that.m_newline_offsets)
328  , m_newline_offsets_size(that.m_newline_offsets_size)
329  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
330 {
331  that._clr();
332 }
333 
334 template<class EventHandler>
336  : m_options(that.m_options)
337  , m_evt_handler(that.m_evt_handler)
338  , m_pending_anchors(that.m_pending_anchors)
339  , m_pending_tags(that.m_pending_tags)
340  , m_has_directives_yaml(that.m_has_directives_yaml)
341  , m_has_directives(that.m_has_directives)
342  , m_doc_empty(that.m_doc_empty)
343  , m_prev_colon(npos)
344  , m_prev_val_end(npos)
345  , m_encoding(NOBOM)
346  , m_newline_offsets()
347  , m_newline_offsets_size()
348  , m_newline_offsets_capacity()
349 {
350  if(that.m_newline_offsets_capacity)
351  {
352  _resize_locations(that.m_newline_offsets_capacity);
353  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
354  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
355  m_newline_offsets_size = that.m_newline_offsets_size;
356  }
357 }
358 
359 template<class EventHandler>
361 {
362  _free();
363  m_options = (that.m_options);
364  m_evt_handler = that.m_evt_handler;
365  m_pending_anchors = that.m_pending_anchors;
366  m_pending_tags = that.m_pending_tags;
367  m_has_directives_yaml = that.m_has_directives_yaml;
368  m_has_directives = that.m_has_directives;
369  m_doc_empty = that.m_doc_empty;
370  m_prev_colon = that.m_prev_colon;
371  m_prev_val_end = that.m_prev_val_end;
372  m_encoding = that.m_encoding;
373  m_newline_offsets = (that.m_newline_offsets);
374  m_newline_offsets_size = (that.m_newline_offsets_size);
375  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
376  that._clr();
377  return *this;
378 }
379 
380 template<class EventHandler>
382 {
383  if(&that != this)
384  {
385  _free();
386  m_options = (that.m_options);
387  m_evt_handler = that.m_evt_handler;
388  m_pending_anchors = that.m_pending_anchors;
389  m_pending_tags = that.m_pending_tags;
390  m_has_directives_yaml = that.m_has_directives_yaml;
391  m_has_directives = that.m_has_directives;
392  m_doc_empty = that.m_doc_empty;
393  m_prev_colon = that.m_prev_colon;
394  m_prev_val_end = that.m_prev_val_end;
395  m_encoding = that.m_encoding;
396  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
397  _resize_locations(that.m_newline_offsets_capacity);
398  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
399  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
400  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
401  m_newline_offsets_size = that.m_newline_offsets_size;
402  }
403  return *this;
404 }
405 
406 template<class EventHandler>
408 {
409  m_options = {};
410  m_evt_handler = {};
411  m_pending_anchors = {};
412  m_pending_tags = {};
413  m_has_directives_yaml = false;
414  m_has_directives = false;
415  m_doc_empty = true;
416  m_prev_colon = npos;
417  m_prev_val_end = npos;
418  m_encoding = NOBOM;
419  m_newline_offsets = {};
420  m_newline_offsets_size = {};
421  m_newline_offsets_capacity = {};
422 }
423 
424 template<class EventHandler>
425 void ParseEngine<EventHandler>::_free()
426 {
427  if(m_newline_offsets)
428  {
429  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
430  m_newline_offsets = nullptr;
431  m_newline_offsets_size = 0u;
432  m_newline_offsets_capacity = 0u;
433  }
434 }
435 
436 
437 //-----------------------------------------------------------------------------
438 
439 template<class EventHandler>
440 void ParseEngine<EventHandler>::_reset()
441 {
442  m_pending_anchors = {};
443  m_pending_tags = {};
444  m_has_directives_yaml = false;
445  m_has_directives = false;
446  m_doc_empty = true;
447  m_prev_colon = npos;
448  m_prev_val_end = npos;
449  m_bom_len = 0;
450  m_encoding = NOBOM;
451  m_bom_line = 0;
452  if(m_options.locations())
453  {
454  _prepare_locations();
455  }
456 }
457 
458 
459 //-----------------------------------------------------------------------------
460 
461 template<class EventHandler>
462 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
463 {
464  _c4dbgp("relocate to new arena");
465  const char *pb = prev_arena.str;
466  const char *pe = prev_arena.str + prev_arena.len;
467  #define _ryml_relocate(s) \
468  if((s).str >= pb && (s).str <= pe) \
469  { \
470  (s).str = next_arena.str + ((s).str - pb); \
471  }
472  for(ParserState &st : m_evt_handler->m_stack)
473  {
474  _ryml_relocate(st.line_contents.rem);
475  _ryml_relocate(st.line_contents.full);
476  }
477  _ryml_relocate(m_evt_handler->m_src);
478  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
479  {
480  _ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
481  _ryml_relocate(m_pending_tags.annotations[i].orig); // LCOV_EXCL_LINE
482  }
483  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
484  {
485  _ryml_relocate(m_pending_anchors.annotations[i].str);
486  _ryml_relocate(m_pending_anchors.annotations[i].orig);
487  }
488  {
489  TagDirectives &tds = m_evt_handler->tag_directives();
490  for(size_t i = 0, sz = tds.size(); i < sz; ++i)
491  {
492  _ryml_relocate(tds.m_directives[i].handle);
493  _ryml_relocate(tds.m_directives[i].prefix);
494  }
495  }
496  {
497  TagCache &tch = m_evt_handler->tag_cache();
498  for(id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
499  {
500  _ryml_relocate(tch.m_entries[i].tag);
501  _ryml_relocate(tch.m_entries[i].resolved);
502  }
503  }
504  if(other)
505  {
506  _ryml_relocate(*other);
507  }
508  #undef _ryml_relocate
509 }
510 
511 /** @cond dev */
512 template<class EventHandler>
513 substr ParseEngine<EventHandler>::_alloc_arena(size_t len, substr *other)
514 {
515  csubstr prev = m_evt_handler->arena();
516  substr out = m_evt_handler->alloc_arena(len);
517  substr curr = m_evt_handler->arena();
518  if(curr.str != prev.str)
519  _relocate_arena(prev, curr, other);
520  return out;
521 }
522 /** @endcond */
523 
524 
525 //-----------------------------------------------------------------------------
526 
527 #ifdef RYML_DBG
528 template<class EventHandler>
529 template<class DumpFn>
530 C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
531 {
532  ParserState const *const C4_RESTRICT st = m_evt_handler->m_curr;
533  LineContents const& C4_RESTRICT lc = st->line_contents;
534  csubstr contents = lc.full.first(lc.num_cols);
535  if(contents.len)
536  {
537  // print the yaml src line
538  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
539  csubstr m_file = m_evt_handler->m_curr->pos.name;
540  if(m_file.len)
541  {
542  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
543  offs += m_file.len + 1;
544  }
545  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
546  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
547  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
548  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", escaped_scalar(maybe_full_content, /*escape*/true), maybe_ellipsis, contents.len);
549  // highlight the remaining portion of the previous line
550  size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
551  size_t lastcol = firstcol + lc.rem.len;
552  size_t firstcol_adj = adjust_pos_with_escapes(lc.full, firstcol);
553  size_t len = adjust_pos_with_escapes(lc.rem, lc.rem.len);
554  for(size_t i = 0; i < offs + firstcol_adj; ++i)
555  std::forward<DumpFn>(dumpfn)(" ");
556  std::forward<DumpFn>(dumpfn)("^");
557  for(size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
558  std::forward<DumpFn>(dumpfn)("~");
559  _dbg_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
560  }
561  else
562  {
563  std::forward<DumpFn>(dumpfn)("\n");
564  }
565  // next line: print the state flags
566  {
567  char flagbuf_[128];
568  _dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
569  }
570 }
571 
572 template<class EventHandler>
573 void ParseEngine<EventHandler>::_print_state_stack(substr buf) const
574 {
575  if(_dbg_enabled())
576  {
577  for(ParserState const& s : m_evt_handler->m_stack)
578  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
579  }
580 }
581 
582 template<class EventHandler>
583 void ParseEngine<EventHandler>::_print_state_stack() const
584 {
585  char buf[128];
586  _print_state_stack(buf);
587 }
588 #endif
589 
590 
591 //-----------------------------------------------------------------------------
592 
593 template<class EventHandler>
594 template<class ...Args>
595 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
596 {
597  m_evt_handler->cancel_parse();
598  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
599 }
600 
601 template<class EventHandler>
602 template<class ...Args>
603 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
604 {
605  m_evt_handler->cancel_parse();
606  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
607 }
608 
609 
610 //-----------------------------------------------------------------------------
611 #ifdef RYML_DBG
612 template<class EventHandler>
613 template<class ...Args>
614 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
615 {
616  if(_dbg_enabled())
617  {
618  _dbg_printf(fmt, args...);
619  _dbg_dumper("\n");
620  _fmt_msg(_dbg_dumper);
621  }
622 }
623 #endif
624 
625 
626 //-----------------------------------------------------------------------------
627 template<class EventHandler>
628 bool ParseEngine<EventHandler>::_finished_file() const
629 {
630  bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
631  if(ret)
632  {
633  _c4dbgp("finished file!!!");
634  }
635  return ret;
636 }
637 
638 template<class EventHandler>
639 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const // LCOV_EXCL_LINE
640 {
641  return m_evt_handler->m_curr->line_contents.rem.empty();
642 }
643 
644 
645 //-----------------------------------------------------------------------------
646 
647 template<class EventHandler>
648 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
649 {
650  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')))
651  {
652  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
653  if(pos == npos)
654  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all whitespace
655  _c4dbgpf("skip {} whitespace characters", pos);
656  _line_progressed(pos);
657  }
658 }
659 
660 template<class EventHandler>
661 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
662 {
663  if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
664  {
665  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
666  if(pos == npos)
667  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all c
668  _c4dbgpf("skip {}x'{}'", pos, _c4prc(c));
669  _line_progressed(pos);
670  }
671 }
672 
673 template<class EventHandler>
674 template<size_t N>
675 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
676 {
677  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
678  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
679  if(pos == npos)
680  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
681  _c4dbgpf("skip {} characters", pos);
682  _line_progressed(pos);
683 }
684 
685 template<class EventHandler>
686 void ParseEngine<EventHandler>::_skip_comment()
687 {
688  LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
689  const size_t col = m_evt_handler->m_curr->pos.col - 1u;
690  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with('#'), m_evt_handler->m_curr->pos);
691  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
692  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos); // 1-based
693  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
694  // raise an error if the comment is not preceded by whitespace
695  if(lc.rem.str != lc.full.str) // not at line beginning
696  {
697  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
698  const char prev = lc.full.str[col - 1u];
699  if(C4_UNLIKELY(prev != ' ' && prev != '\t'))
700  _c4err("comment not preceded by whitespace");
701  }
702  _c4dbgpf("comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
703  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
704 }
705 
706 template<class EventHandler>
707 void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
708 {
709  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
710  if(pos != npos)
711  {
712  if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
713  {
714  _line_progressed(pos);
715  _skip_comment();
716  }
717  }
718 }
719 
720 template<class EventHandler>
721 void ParseEngine<EventHandler>::_maybe_skip_comment()
722 {
723  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
724  if(pos != npos)
725  {
726  if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
727  {
728  _line_progressed(pos);
729  _skip_comment();
730  }
731  }
732  else
733  {
734  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
735  }
736 }
737 
738 template<class EventHandler>
739 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
740 {
741  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
742  if(pos != npos)
743  {
744  if(':' == m_evt_handler->m_curr->line_contents.rem[pos])
745  {
746  // bump pos to skip the colon as well, and check the colon
747  // is followed by space or tab
748  if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
749  {
750  const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
751  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
752  ++pos;
753  else
754  return false;
755  }
756  _line_progressed(pos);
757  return true;
758  }
759  }
760  else
761  {
762  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
763  }
764  return false;
765 }
766 
767 
768 //-----------------------------------------------------------------------------
769 
770 template<class EventHandler>
771 csubstr ParseEngine<EventHandler>::_scan_anchor()
772 {
773  csubstr s = m_evt_handler->m_curr->line_contents.rem;
774  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'), m_evt_handler->m_curr->pos);
775  csubstr anchor = s.range(1, s.first_of(" ,]}\t"));
776  _line_progressed(1u + anchor.len);
777  _maybe_skipchars(' ');
778  return anchor;
779 }
780 
781 template<class EventHandler>
782 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
783 {
784  csubstr s = m_evt_handler->m_curr->line_contents.rem;
785  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
786  _set_first(s, s.first_of(" ,]\t"));
787  _line_progressed(s.len);
788  return s;
789 }
790 
791 template<class EventHandler>
792 csubstr ParseEngine<EventHandler>::_scan_ref_map()
793 {
794  csubstr s = m_evt_handler->m_curr->line_contents.rem;
795  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
796  _set_first(s, s.first_of(" ,}\t"));
797  _line_progressed(s.len);
798  return s;
799 }
800 
801 template<class EventHandler>
802 csubstr ParseEngine<EventHandler>::_scan_tag()
803 {
804  csubstr t = m_evt_handler->m_curr->line_contents.rem;
805  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
806  if(!t.begins_with("!<"))
807  {
808  _c4dbgp("begins with '!'");
809  _set_first(t, t.first_of(" ,]}\t"));
810  if(C4_UNLIKELY(t.first_of("[{") != npos))
811  _c4err("invalid tag");
812  _line_progressed(t.len);
813  if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
814  t = _resolve_tag(t);
815  }
816  else
817  {
818  _c4dbgp("begins with '!<'");
819  size_t pos = t.find('>');
820  if(C4_UNLIKELY(pos == npos))
821  _c4err("invalid tag");
822  _set_first_strict(t, pos+1);
823  _line_progressed(t.len);
824  t = t.sub(1);
825  }
826  _maybe_skip_whitespace_tokens();
827  return t;
828 }
829 
830 template<class EventHandler>
831 csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
832 {
833  csubstr t = m_evt_handler->m_curr->line_contents.rem;
834  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
835  if(!t.begins_with("!<"))
836  {
837  _c4dbgp("begins with '!'");
838  _set_first(t, t.first_of(" ,\t"));
839  if(C4_UNLIKELY(t.first_of("[{") != npos))
840  _c4err("invalid tag");
841  _line_progressed(t.len);
842  *orig = t;
843  if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
844  t = _resolve_tag(t);
845  }
846  else
847  {
848  _c4dbgp("begins with '!<'");
849  size_t pos = t.find('>');
850  if(C4_UNLIKELY(pos == npos))
851  _c4err("invalid tag");
852  _set_first_strict(t, pos+1);
853  _line_progressed(t.len);
854  *orig = t;
855  t = t.sub(1);
856  }
857  _maybe_skip_whitespace_tokens();
858  return t;
859 }
860 
861 
862 //-----------------------------------------------------------------------------
863 
864 template<class EventHandler>
865 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
866 {
867  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
868  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(":-"), m_evt_handler->m_curr->pos);
869  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
870  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
871  if(s.len > 1)
872  {
873  switch(s.str[1])
874  {
875  case ' ':
876  case ',':
877  case '}':
878  case ']':
879  case '\t':
880  if(s.str[0] == ':')
881  {
882  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
883  return false;
884  }
885  else
886  {
887  _c4err("invalid scalar");
888  }
889  break;
890  case '{':
891  case '[':
892  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
893  break;
894  default:
895  break;
896  }
897  }
898  else
899  {
900  if(s.str[0] == '-')
901  _c4err("invalid scalar");
902  return false;
903  }
904  return true;
905 }
906 
907 template<class EventHandler>
908 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
909 {
910  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
911  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '?', m_evt_handler->m_curr->pos);
912  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
913  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
914  if(s.len > 1)
915  {
916  switch(s.str[1])
917  {
918  case ' ':
919  case '\t':
920  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
921  return false;
922  case '{':
923  case '}':
924  case '[':
925  case ']':
926  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
927  break;
928  default:
929  break;
930  }
931  }
932  else
933  {
934  return false;
935  }
936  return true;
937 }
938 
939 
940 template<class EventHandler>
941 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
942 {
943  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
944  // it's not a scalar if it starts with any of these characters:
945  switch(s.str[0])
946  {
947  // these are all legal tokens which mean no scalar is starting:
948  case '[':
949  case ']':
950  case '{':
951  case '}':
952  case '&':
953  case '*':
954  case '!':
955  case '|':
956  case '>':
957  case '#':
958  case ',':
959  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
960  return false;
961  // '-' and ':' are illegal at the beginning if not followed by a scalar character
962  case '-':
963  case ':':
964  _c4dbgpf("suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
965  return _is_valid_start_scalar_plain_flow_check_block_token(s);
966  case '?':
967  _c4dbgpf("qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
968  return _is_valid_start_scalar_plain_flow_check_qmrk(s);
969  // everything else is a legal starting character
970  default:
971  return true;
972  }
973 }
974 
975 
976 template<class EventHandler>
977 bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s, size_t offs)
978 {
979  _c4dbgpf("newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs), true));
980  if(s.len > offs + 1)
981  {
982  _c4dbgp("newl[PLAIN]: buffer continues");
983  csubstr next_line = s.sub(offs + 1);
984  size_t next_line_indentation = next_line.first_not_of(' ');
985  if(next_line_indentation != npos)
986  {
987  _c4dbgpf("newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
988  next_line = next_line.first(next_line.first_of("\n\r"));
989  _c4dbgpf("newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
990  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
991  if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
992  {
993  _c4dbgp("newl[PLAIN]: larger indentation");
994  next_line = next_line.sub(next_line_indentation);
995  }
996  else if(C4_UNLIKELY(next_line.len && next_line.triml(' ').len))
997  {
998  _c4dbgp("newl[PLAIN]: err, smaller indentation");
999  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1000  _line_ended();
1001  _scan_line();
1002  if(m_evt_handler->m_curr->line_contents.indentation != npos)
1003  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1004  _c4err("parse error"); // cannot reduce indentation here
1005  }
1006  _c4dbgpf("newl[PLAIN]: next_line.len={}", next_line.len);
1007  if(next_line.len)
1008  {
1009  next_line = next_line.triml(" \t");
1010  if(next_line.begins_with_any(",]#:")) // any of the characters we're interested in
1011  {
1012  _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[0]);
1013  return false;
1014  }
1015  }
1016  }
1017  }
1018  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1019  _line_ended();
1020  _scan_line();
1021  return true;
1022 }
1023 
1024 template<class EventHandler>
1025 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1026 {
1027  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1028  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1029  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP), m_evt_handler->m_curr->pos);
1030  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1031  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1032 
1033  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1034  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1035 
1036  if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1037  return false;
1038 
1039  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1040  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1041 
1042  _c4dbgp("scanning seqflow scalar...");
1043 
1044  bool needs_filter = false;
1045  size_t col = 0; // zero-based column
1046  size_t offs = 0; // offset
1047  for( ; offs < s.len; ++offs, ++col)
1048  {
1049  const char c = s.str[offs];
1050  switch(c)
1051  {
1052  case ',':
1053  case ']':
1054  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1055  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1056  goto ended_scalar;
1057  case '\n':
1058  _c4dbgpf("found '\\n' at col={}", col);
1059  if(!_scan_scalar_plain_handle_newline(s, offs))
1060  goto ended_scalar;
1061  col = (size_t)-1; // so that col is 0 in the next loop iteration
1062  needs_filter = true;
1063  break;
1064  case '\r':
1065  --col; // don't count \r when calling _line_progressed()
1066  needs_filter = true;
1067  break;
1068  case ':':
1069  _c4dbgp("found suspicious ':'");
1070  if(s.len > offs + 1)
1071  {
1072  char next = s.str[offs + 1];
1073  _c4dbgpf("next char is '{}'", _c4prc(next));
1074  if(next == '\r')
1075  {
1076  csubstr after = s.sub(offs + 1).triml('\r');
1077  if(after.len)
1078  {
1079  next = after.str[0];
1080  _c4dbgpf("skip \\r to '{}'", _c4prc(next));
1081  }
1082  }
1083  // no else here.
1084  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t') || next == ',' || next == '\n' || next == ']')
1085  {
1086  _c4dbgp("map starting!");
1087  goto ended_scalar;
1088  }
1089  else
1090  {
1091  _c4dbgp("':' nothing to see here");
1092  }
1093  }
1094  else
1095  {
1096  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1097  _line_progressed(col);
1098  _c4err("missing termination: '{}'", c); // noreturn
1099  }
1100  break;
1101  case '#':
1102  {
1103  _c4dbgp("found suspicious '#'");
1104  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1105  char prev = s.str[offs - 1];
1106  if(prev == ' ' _RYML_WITH_TAB_TOKENS(|| prev == '\t'))
1107  {
1108  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1109  goto ended_scalar;
1110  }
1111  }
1112  break;
1113  case '[':
1114  case '{':
1115  case '}':
1116  _line_progressed(col); // advance to report the proper position in the error
1117  _c4err("invalid character: '{}'", c); // noreturn
1118  case '-':
1119  case '.':
1120  _c4dbgpf("doc token character: '{}', offs={}", c, offs);
1121  if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1122  {
1123  _c4dbgp("at line beginning");
1124  if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1125  {
1126  _c4err("parse error"); // no return
1127  }
1128  }
1129  default:
1130  ;
1131  }
1132  }
1133 
1134 ended_scalar:
1135 
1136  _line_progressed(col);
1137  _set_first(s, offs);
1138  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1139  sc->needs_filter = needs_filter;
1140 
1141  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1142 
1143  return true;
1144 }
1145 
1146 template<class EventHandler>
1147 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1148 {
1149  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1150  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1151  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP), m_evt_handler->m_curr->pos);
1152  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1153  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1154 
1155  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1156  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1157 
1158  if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1159  return false;
1160 
1161  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1162  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1163 
1164  _c4dbgp("scanning mapflow scalar...");
1165 
1166  bool needs_filter = false;
1167  size_t col = 0; // zero-based column
1168  size_t offs = 0; // offset
1169  for( ; offs < s.len; ++offs, ++col)
1170  {
1171  const char c = s.str[offs];
1172  switch(c)
1173  {
1174  case ',':
1175  case '}':
1176  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1177  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1178  goto ended_scalar;
1179  case '\n':
1180  _c4dbgpf("found '\\n' at col={}", col);
1181  if(!_scan_scalar_plain_handle_newline(s, offs))
1182  goto ended_scalar;
1183  col = (size_t)-1; // so that col is 0 in the next loop iteration
1184  needs_filter = true;
1185  break;
1186  case '\r':
1187  --col; // don't count \r when calling _line_progressed()
1188  needs_filter = true;
1189  break;
1190  case ':':
1191  _c4dbgpf("found ':'", c);
1192  if(s.len == offs+1)
1193  break;
1194  {
1195  const char next = s.str[offs+1];
1196  _c4dbgpf("next='{}'", c);
1197  if(next == ' ' || next == ',' || next == '}' || next == '\n' || next == '\r' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
1198  {
1199  _c4dbgpf("found terminating character: '{}'", c);
1200  goto ended_scalar;
1201  }
1202  }
1203  break;
1204  case '{':
1205  case '[':
1206  _line_progressed(col);
1207  _c4err("invalid character: '{}'", c); // noreturn
1208  break;
1209  case ']':
1210  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1211  goto ended_scalar;
1212  default:
1213  ;
1214  }
1215  }
1216 
1217 ended_scalar:
1218 
1219  _line_progressed(col);
1220  s = s.first(offs);
1221  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1222  sc->needs_filter = needs_filter;
1223 
1224  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1225 
1226  return sc->scalar.len > 0u;
1227 }
1228 
1229 template<class EventHandler>
1230 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1231 {
1232  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1233  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1234  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1235  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1236 
1237  substr s = m_evt_handler->m_curr->line_contents.rem;
1238  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1239  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1240 
1241  _c4dbgp("seq_json: scanning scalar...");
1242 
1243  switch(s.str[0])
1244  {
1245  case ']':
1246  case '{':
1247  case ',':
1248  _c4dbgp("seq_json: not a scalar.");
1249  return false;
1250  }
1251 
1252  {
1253  const size_t len = _begins_with_special_json_scalar(s);
1254  if(len)
1255  {
1256  char c = s.len > len ? s.str[len] : ',';
1257  if(c == ',' || c == ']' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1258  {
1259  sc->scalar = s.first(len);
1260  sc->needs_filter = false;
1261  _c4dbgpf("seq_json: special scalar: '{}'", sc->scalar);
1262  _line_progressed(len);
1263  return true;
1264  }
1265  else
1266  {
1267  return false;
1268  }
1269  }
1270  }
1271 
1272  // must be a number or special scalar
1273  size_t i = 0;
1274  for( ; i < s.len; ++i)
1275  {
1276  const char c = s.str[i];
1277  switch(c)
1278  {
1279  case ',':
1280  case ']':
1281  case ' ':
1282  case '\t':
1283  _c4dbgpf("seq_json: found terminating character: '{}'", c);
1284  goto ended_scalar;
1285  default:
1286  ;
1287  }
1288  }
1289 
1290 ended_scalar:
1291 
1292  if(C4_LIKELY(i > 0))
1293  {
1294  _line_progressed(i);
1295  sc->scalar = s.first(i);
1296  sc->needs_filter = false;
1297  _c4dbgpf("seq_json: scalar was {}", _prs(sc->scalar, /*escape*/true));
1298  }
1299 
1300  return true;
1301 }
1302 
1303 template<class EventHandler>
1304 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1305 {
1306  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1307  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1308  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1309  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1310  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL), m_evt_handler->m_curr->pos);
1311 
1312  substr s = m_evt_handler->m_curr->line_contents.rem;
1313  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1314  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1315 
1316  _c4dbgp("scanning scalar...");
1317 
1318  {
1319  const size_t len = _begins_with_special_json_scalar(s);
1320  if(len)
1321  {
1322  char c = s.len > len ? s.str[len] : ',';
1323  _c4dbgpf("begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1324  if(c == ',' || c == '}' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1325  {
1326  sc->scalar = s.first(len);
1327  sc->needs_filter = false;
1328  _c4dbgpf("special json scalar: '{}'", _prs(sc->scalar));
1329  _line_progressed(len);
1330  return true;
1331  }
1332  else
1333  {
1334  return false;
1335  }
1336  }
1337  }
1338 
1339  // must be a number
1340  size_t i = 0;
1341  for( ; i < s.len; ++i)
1342  {
1343  const char c = s.str[i];
1344  switch(c)
1345  {
1346  case ',':
1347  case '}':
1348  case ' ':
1349  case '\t':
1350  _c4dbgpf("found terminating character: '{}'", c);
1351  goto ended_scalar;
1352  default:
1353  ;
1354  }
1355  }
1356 
1357 ended_scalar:
1358 
1359  if(C4_LIKELY(i > 0))
1360  {
1361  _line_progressed(i);
1362  sc->scalar = s.first(i);
1363  sc->needs_filter = false;
1364  _c4dbgpf("scalar was {}", _prs(sc->scalar));
1365  return true;
1366  }
1367 
1368  return false;
1369 }
1370 
1371 template<class EventHandler>
1372 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1373 {
1374  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '-', m_evt_handler->m_curr->pos);
1375  return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1376 }
1377 
1378 template<class EventHandler>
1379 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1380 {
1381  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '.', m_evt_handler->m_curr->pos);
1382  return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1383 }
1384 
1385 template<class EventHandler>
1386 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1387 {
1388  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1389  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1390  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY), m_evt_handler->m_curr->pos);
1391 
1392  substr s = m_evt_handler->m_curr->line_contents.rem;
1393  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1394  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1395 
1396  switch(s.str[0])
1397  {
1398  case '-':
1399  if(_is_blck_token(s))
1400  {
1401  return false;
1402  }
1403  else if(_is_doc_begin(s))
1404  {
1405  _c4dbgp("token is doc start");
1406  return false;
1407  }
1408  break;
1409  case ':':
1410  case '?':
1411  if(_is_blck_token(s))
1412  return false;
1413  break;
1414  case '[':
1415  case '{':
1416  case '&':
1417  case '*':
1418  case '!':
1419  case '\t':
1420  case ',':
1421  case '%':
1422  return false;
1423  case '.':
1424  if(_is_doc_end(s))
1425  {
1426  _c4dbgp("token is doc end");
1427  return false;
1428  }
1429  break;
1430  }
1431 
1432  _c4dbgpf("plain scalar! indentation={}", indentation);
1433 
1434  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1435  const size_t start_line = m_evt_handler->m_curr->pos.line;
1436 
1437  bool needs_filter = false;
1438  while(true)
1439  {
1440  _c4dbgpf("plain scalar line: {}", _prs(s));
1441  for(size_t i = 0; i < s.len; ++i)
1442  {
1443  const char curr = s.str[i];
1444  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1445  switch(curr)
1446  {
1447  case ':':
1448  _c4dbgpf("[{}]: got suspicious ':'", i);
1449  // are there more characters?
1450  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1451  {
1452  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1453  _line_progressed(i);
1454  // ': ' is accepted only on the first line
1455  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1456  {
1457  _c4dbgp("start line. scalar ends here");
1458  goto ended_scalar;
1459  }
1460  else
1461  {
1462  _c4err("multiline scalars cannot be used as implicit keys");
1463  }
1464  }
1465  else
1466  {
1467  size_t j = i;
1468  while(j + 1 < s.len && s.str[j+1] == ':')
1469  {
1470  _c4dbgp("skip colon");
1471  ++j;
1472  }
1473  i = j > i ? j-1 : i;
1474  _c4dbgp("nothing to see here");
1475  }
1476  break;
1477  case '#':
1478  _c4dbgp("got suspicious '#'");
1479  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1480  {
1481  _c4dbgp("comment! scalar ends here");
1482  _line_progressed(i);
1483  goto ended_scalar;
1484  }
1485  else
1486  {
1487  _c4dbgp("nothing to see here");
1488  }
1489  break;
1490  }
1491  }
1492  _line_progressed(s.len);
1493  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1494  next_peeked = next_peeked.trimr("\n\r");
1495  const size_t next_indentation = next_peeked.first_not_of(' ');
1496  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1497  if(next_indentation < indentation)
1498  {
1499  _c4dbgp("smaller indentation! scalar ended");
1500  goto ended_scalar;
1501  }
1502  else if(next_indentation == 0 && next_peeked.len > 0)
1503  {
1504  const char first = next_peeked.str[0];
1505  switch(first)
1506  {
1507  case '-':
1508  _c4dbgpf("doc begin? peeked={}", _prs(next_peeked, size_t(3)));
1509  if(_is_doc_begin_token(next_peeked))
1510  {
1511  _c4dbgp("doc begin! scalar ended");
1512  goto ended_scalar;
1513  }
1514  break;
1515  case '.':
1516  _c4dbgpf("doc end? peeked={}", _prs(next_peeked, size_t(3)));
1517  if(_is_doc_end_token(next_peeked))
1518  {
1519  _c4dbgp("doc end! scalar ended");
1520  goto ended_scalar;
1521  }
1522  break;
1523  }
1524  }
1525  // load with next line
1526  _c4dbgp("next line!");
1527  if(!_finished_file())
1528  {
1529  _c4dbgp("next line!");
1530  _line_ended();
1531  _scan_line();
1532  }
1533  else
1534  {
1535  _c4dbgp("file finished!");
1536  goto ended_scalar;
1537  }
1538  s = m_evt_handler->m_curr->line_contents.rem;
1539  needs_filter = true;
1540  }
1541 
1542 ended_scalar:
1543 
1544  sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1545  sc->needs_filter = needs_filter;
1546 
1547  _c4dbgpf("scalar was {}", _prs(sc->scalar));
1548 
1549  return true;
1550 }
1551 
1552 template<class EventHandler>
1553 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1554 {
1555  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1556  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1557  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1558  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1559  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1560  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1561  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1562 }
1563 
1564 template<class EventHandler>
1565 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1566 {
1567  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1568  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1569  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1570  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1571  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1572  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1573 }
1574 
1575 template<class EventHandler>
1576 C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1577 {
1578  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY), m_evt_handler->m_curr->pos);
1579  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1580 }
1581 
1582 
1583 //-----------------------------------------------------------------------------
1584 
1585 template<class EventHandler>
1586 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1587 {
1588  substr rem{}; // declare here because of the goto
1589  size_t nlpos{}; // declare here because of the goto
1590  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1591  if(pos >= _buf().len)
1592  goto next_is_empty;
1593 
1594  // look for the next newline chars, and jump to the right of those
1595  rem = _from_next_line(_buf().sub(pos));
1596  if(rem.empty())
1597  goto next_is_empty;
1598 
1599  // now get everything up to and including the following newline chars
1600  nlpos = rem.first_of("\r\n");
1601  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1602  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1603  rem = rem.left_of(nlpos, /*include_pos*/true);
1604 
1605  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1606  return rem;
1607 
1608 next_is_empty:
1609  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1610  return {};
1611 }
1612 
1613 //-----------------------------------------------------------------------------
1614 
1615 template<class EventHandler>
1616 void ParseEngine<EventHandler>::_scan_line()
1617 {
1618  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1619  m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1620  else
1621  m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1622 }
1623 
1624 template<class EventHandler>
1625 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1626 {
1627  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1628  m_evt_handler->m_curr->pos.line,
1629  m_evt_handler->m_curr->line_contents.full.len,
1630  ahead, m_evt_handler->m_curr->pos.col,
1631  m_evt_handler->m_curr->pos.col+ahead,
1632  m_evt_handler->m_curr->pos.offset,
1633  m_evt_handler->m_curr->pos.offset+ahead);
1634  m_evt_handler->m_curr->pos.offset += ahead;
1635  m_evt_handler->m_curr->pos.col += ahead;
1636  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1637  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1638 }
1639 
1640 template<class EventHandler>
1641 void ParseEngine<EventHandler>::_line_ended()
1642 {
1643  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1644  m_evt_handler->m_curr->pos.line,
1645  m_evt_handler->m_curr->line_contents.full.len,
1646  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1647  m_evt_handler->m_curr->pos.col, 1);
1648  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1649  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1650  ++m_evt_handler->m_curr->pos.line;
1651  m_evt_handler->m_curr->pos.col = 1;
1652 }
1653 
1654 template<class EventHandler>
1655 void ParseEngine<EventHandler>::_line_ended_undo()
1656 {
1657  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1658  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1659  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1660  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1661  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1662  m_evt_handler->m_curr->pos.offset -= delta;
1663  --m_evt_handler->m_curr->pos.line;
1664  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1665  // don't forget to undo also the changes to the remainder of the line
1666  //_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= _buf().len || _buf()[m_evt_handler->m_curr->pos.offset] == '\n' || _buf()[m_evt_handler->m_curr->pos.offset] == '\r', m_evt_handler->m_curr->pos);
1667  m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1668 }
1669 
1670 
1671 //-----------------------------------------------------------------------------
1672 template<class EventHandler>
1673 void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
1674 {
1675  m_evt_handler->m_curr->indref = indentation;
1676  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1677 }
1678 
1679 template<class EventHandler>
1680 void ParseEngine<EventHandler>::_save_indentation()
1681 {
1682  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1683  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1684  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1685 }
1686 
1687 template<class EventHandler>
1688 void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1689 {
1690  _c4dbgpf("SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1691  m_prev_val_end = m_evt_handler->m_curr->pos.line;
1692 }
1693 
1694 
1695 //-----------------------------------------------------------------------------
1696 
1697 template<class EventHandler>
1698 void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1699 {
1700  _c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1701  m_evt_handler->actually_val_is_first_key_of_new_map_block();
1702  addrem_flags(RMAP|RVAL|RBLCK, RKCL|RSEQ|RUNK);
1703  _set_indentation(orig_indent);
1704  _maybe_skip_whitespace_tokens();
1705 }
1706 
1707 template<class EventHandler>
1708 void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent, bool multiline)
1709 {
1710  // this is called AFTER ending the flow container,
1711  // so now we're at the parent container's scope
1712  if(has_all(RMAP|RBLCK) && has_none(RKCL|RVAL|RNXT))
1713  {
1714  _c4dbgp("flow container: end as vanilla block map key!");
1715  if(C4_UNLIKELY(multiline))
1716  _c4err("multiline key is invalid");
1717  if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1718  _c4err("could not find ':' colon after key");
1719  _maybe_skip_whitespace_tokens();
1720  addrem_flags(RVAL, RKEY|RKCL|RNXT);
1721  }
1722  else if(has_none(RFLOW))
1723  {
1724  _c4dbgp("end_flow_container: now not in flow!");
1725  if(has_any(RUNK|RSEQ|RKCL) && _maybe_scan_following_colon())
1726  {
1727  if(C4_UNLIKELY(multiline))
1728  _c4err("multiline key is invalid");
1729  _flow_container_was_a_key(orig_indent);
1730  }
1731  else
1732  {
1733  _c4dbgp("end_flow_container: end map as key!");
1734  }
1735  }
1736  else if(has_any(RSEQ))
1737  {
1738  _c4dbgp("end_flow_container: now in a flow seq");
1739  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1740  _mark_seqflow_val_end();
1741  }
1742 }
1743 
1744 template<class EventHandler>
1745 void ParseEngine<EventHandler>::_end_map_flow()
1746 {
1747  bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1748  size_t orig_indent = m_evt_handler->m_curr->indref;
1749  _c4dbgpf("mapflow: end, multiline={}", multiline);
1750  m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1751  _end_flow_container(orig_indent, multiline);
1752 }
1753 
1754 template<class EventHandler>
1755 void ParseEngine<EventHandler>::_end_seq_flow()
1756 {
1757  bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1758  size_t orig_indent = m_evt_handler->m_curr->indref;
1759  _c4dbgpf("seqflow: end, multiline={}", multiline);
1760  m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1761  _end_flow_container(orig_indent, multiline);
1762 }
1763 
1764 template<class EventHandler>
1765 void ParseEngine<EventHandler>::_end_map_blck()
1766 {
1767  _c4dbgp("mapblck: end");
1768  if(has_any(RKCL|RVAL))
1769  {
1770  _c4dbgp("mapblck: set missing val");
1771  _handle_annotations_before_blck_val_scalar();
1772  m_evt_handler->set_val_scalar_plain_empty();
1773  }
1774  else if(has_any(QMRK))
1775  {
1776  _c4dbgp("mapblck: set missing keyval");
1777  _handle_annotations_before_blck_key_scalar();
1778  m_evt_handler->set_key_scalar_plain_empty();
1779  _handle_annotations_before_blck_val_scalar();
1780  m_evt_handler->set_val_scalar_plain_empty();
1781  }
1782  m_evt_handler->end_map_block();
1783 }
1784 
1785 template<class EventHandler>
1786 void ParseEngine<EventHandler>::_end_seq_blck()
1787 {
1788  if(has_any(RVAL))
1789  {
1790  _c4dbgp("seqblck: set missing val");
1791  _handle_annotations_before_blck_val_scalar();
1792  m_evt_handler->set_val_scalar_plain_empty();
1793  }
1794  m_evt_handler->end_seq_block();
1795 }
1796 
1797 template<class EventHandler>
1798 void ParseEngine<EventHandler>::_end2_map()
1799 {
1800  _c4dbgp("map: end");
1801  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1802  if(has_any(RBLCK))
1803  {
1804  _end_map_blck();
1805  }
1806  else
1807  {
1808  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1809  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1810  m_evt_handler->_pop();
1811  }
1812 }
1813 
1814 template<class EventHandler>
1815 void ParseEngine<EventHandler>::_end2_seq()
1816 {
1817  _c4dbgp("seq: end");
1818  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1819  if(has_any(RBLCK))
1820  {
1821  _end_seq_blck();
1822  }
1823  else
1824  {
1825  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1826  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1827  m_evt_handler->_pop();
1828  }
1829 }
1830 
1831 template<class EventHandler>
1832 void ParseEngine<EventHandler>::_begin2_doc()
1833 {
1834  _c4dbgp("begin_doc");
1835  m_has_directives_yaml = false;
1836  m_has_directives = false;
1837  m_doc_empty = true;
1838  add_flags(RDOC);
1839  m_evt_handler->begin_doc();
1840  m_evt_handler->m_curr->indref = 0; // ?
1841 }
1842 
1843 template<class EventHandler>
1844 void ParseEngine<EventHandler>::_begin2_doc_expl()
1845 {
1846  _c4dbgp("begin_doc_expl");
1847  m_has_directives_yaml = false;
1848  m_has_directives = false;
1849  m_doc_empty = true;
1850  add_flags(RDOC);
1851  m_evt_handler->begin_doc_expl();
1852  m_evt_handler->m_curr->indref = 0; // ?
1853 }
1854 
1855 template<class EventHandler>
1856 void ParseEngine<EventHandler>::_end2_doc()
1857 {
1858  _c4dbgp("doc: end");
1859  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1860  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1861  {
1862  _c4dbgp("doc was empty; add empty val");
1863  _handle_annotations_before_blck_val_scalar();
1864  m_evt_handler->set_val_scalar_plain_empty();
1865  }
1866  m_evt_handler->end_doc();
1867  m_bom_len = 0;
1868 }
1869 
1870 template<class EventHandler>
1871 void ParseEngine<EventHandler>::_end2_doc_expl()
1872 {
1873  _c4dbgp("doc: end");
1874  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1875  {
1876  _c4dbgp("doc: no children; add empty val");
1877  _handle_annotations_before_blck_val_scalar();
1878  m_evt_handler->set_val_scalar_plain_empty();
1879  }
1880  m_evt_handler->end_doc_expl();
1881  m_bom_len = 0;
1882 }
1883 
1884 template<class EventHandler>
1885 void ParseEngine<EventHandler>::_maybe_begin_doc()
1886 {
1887  if(has_none(RDOC))
1888  {
1889  _c4dbgp("doc must be started");
1890  _begin2_doc();
1891  }
1892 }
1893 template<class EventHandler>
1894 void ParseEngine<EventHandler>::_maybe_end_doc()
1895 {
1896  if(has_any(RDOC))
1897  {
1898  _c4dbgp("doc must be finished");
1899  _end2_doc();
1900  }
1901  else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1902  {
1903  _c4dbgp("no doc to finish, but pending annotations");
1904  m_evt_handler->begin_doc();
1905  _handle_annotations_before_blck_val_scalar();
1906  m_evt_handler->set_val_scalar_plain_empty();
1907  m_evt_handler->end_doc();
1908  }
1909 }
1910 
1911 template<class EventHandler>
1912 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1913 {
1914  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1915  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags & RDOC, m_evt_handler->m_curr->pos);
1916  _c4dbgp("root is RDOC");
1917  if(m_evt_handler->m_curr->level != 0)
1918  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1919  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1920 }
1921 
1922 /** Check whether the current parse tokens are trailing on the
1923  * previous doc, and raise an error if they are */
1924 template<class EventHandler>
1925 void ParseEngine<EventHandler>::_check_trailing_doc_token()
1926 {
1927  const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1928  const bool isndoc = (m_evt_handler->m_curr->flags & NDOC) != 0;
1929  const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1930  _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1931  if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1932  _c4err("parse error");
1933 }
1934 
1935 template<class EventHandler>
1936 void ParseEngine<EventHandler>::_end_doc_suddenly()
1937 {
1938  _c4dbgp("end doc suddenly");
1939  _end_doc_suddenly__pop();
1940  _end2_doc_expl();
1941  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1942 }
1943 
1944 template<class EventHandler>
1945 void ParseEngine<EventHandler>::_check_doc_end_tokens() const
1946 {
1947  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1948  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(". \t"), m_evt_handler->m_curr->pos);
1949  if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
1950  {
1951  _c4err("parse error");
1952  }
1953 }
1954 
1955 template<class EventHandler>
1956 void ParseEngine<EventHandler>::_start_doc_suddenly()
1957 {
1958  _c4dbgp("start doc suddenly");
1959  _end_doc_suddenly__pop();
1960  _end2_doc();
1961  _begin2_doc_expl();
1962 }
1963 
1964 template<class EventHandler>
1965 void ParseEngine<EventHandler>::_end_stream()
1966 {
1967  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1968  if(C4_UNLIKELY(has_all(RSEQ|RFLOW)))
1969  _c4err("missing terminating ]");
1970  else if(C4_UNLIKELY(has_all(RMAP|RFLOW)))
1971  _c4err("missing terminating }");
1972  if(m_evt_handler->m_stack.size() > 1)
1973  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1974  if(has_all(RDOC))
1975  {
1976  _end2_doc();
1977  }
1978  else if(has_all(RTOP|RUNK))
1979  {
1980  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1981  {
1982  if(m_doc_empty)
1983  {
1984  m_evt_handler->begin_doc();
1985  _handle_annotations_before_blck_val_scalar();
1986  m_evt_handler->set_val_scalar_plain_empty();
1987  m_evt_handler->end_doc();
1988  }
1989  }
1990  }
1991  m_evt_handler->end_stream();
1992  if(C4_UNLIKELY(m_has_directives))
1993  _c4err("directives cannot be used without a document");
1994 }
1995 
1996 
1997 template<class EventHandler>
1998 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1999 {
2000  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2001  while(m_evt_handler->m_curr != popto)
2002  {
2003  if(has_any(RSEQ))
2004  {
2005  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2006  _end2_seq();
2007  }
2008  else if(has_any(RMAP))
2009  {
2010  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2011  _end2_map();
2012  }
2013  else
2014  {
2015  break;
2016  }
2017  }
2018  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2019 }
2020 
2021 template<class EventHandler>
2022 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2023 {
2024  // search the stack frame to jump to based on its indentation
2025  using state_type = typename EventHandler::state;
2026  state_type const* popto = nullptr;
2027  auto &stack = m_evt_handler->m_stack;
2028  _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2029  _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2030  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2031  #ifdef RYML_DBG
2032  _print_state_stack();
2033  #endif
2034  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2035  {
2036  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2037  if(s->indref == ind)
2038  {
2039  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
2040  popto = s;
2041  break;
2042  }
2043  }
2044  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2045  {
2046  _c4err("parse error: incorrect indentation?");
2047  }
2048  _handle_indentation_pop(popto);
2049 }
2050 
2051 template<class EventHandler>
2052 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2053 {
2054  // search the stack frame to jump to based on its indentation
2055  using state_type = typename EventHandler::state;
2056  auto &stack = m_evt_handler->m_stack;
2057  _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2058  _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2059  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2060  state_type const* popto = nullptr;
2061  #ifdef RYML_DBG
2062  char flagbuf_[128];
2063  _print_state_stack(flagbuf_);
2064  #endif
2065  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
2066  {
2067  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2068  if(s->indref < ind)
2069  {
2070  break;
2071  }
2072  else if(s->indref == ind)
2073  {
2074  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
2075  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
2076  {
2077  break;
2078  }
2079  popto = s;
2080  if(has_all(RSEQ|RBLCK, s))
2081  {
2082  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2083  const size_t first = rem.first_not_of(' ');
2084  _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first == npos, m_evt_handler->m_curr->pos);
2085  rem = rem.right_of(first, true);
2086  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
2087  if(rem.begins_with('-') && _is_blck_token(rem))
2088  {
2089  _c4dbgp("parent was indentless seq");
2090  break;
2091  }
2092  }
2093  }
2094  }
2095  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2096  {
2097  _c4err("parse error: incorrect indentation?");
2098  }
2099  _handle_indentation_pop(popto);
2100 }
2101 
2102 
2103 //-----------------------------------------------------------------------------
2104 template<class EventHandler>
2105 void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2106 {
2107  if(C4_UNLIKELY(has_all(RMAP|RBLCK|RKEY)))
2108  {
2109  _c4err("multiline quoted keys are invalid");
2110  }
2111  else // check contextual indentation
2112  {
2113  const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(RMAP|RSEQ) && has_any(RBLCK)));
2114  _c4dbgpf("indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2115  if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2116  {
2117  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2118  m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(' '),
2119  m_evt_handler->m_curr->pos);
2120  csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2121  _c4dbgpf("trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem, true));
2122  if(C4_UNLIKELY(!!trimmed.len))
2123  {
2124  _c4err("bad indentation");
2125  }
2126  }
2127  }
2128 }
2129 
2130 
2131 //-----------------------------------------------------------------------------
2132 template<class EventHandler>
2133 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2134 {
2135  // quoted scalars can spread over multiple lines!
2136  // nice explanation here: http://yaml-multiline.info/
2137 
2138  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('\''), m_evt_handler->m_curr->pos);
2139 
2140  // a span to the end of the file, skipping the opening quote
2141  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2142  _line_progressed(1); // advance over the opening quote
2143  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2144 
2145  bool needs_filter = false;
2146  size_t pos = npos; // find the pos of the matching quote
2147  while( ! _finished_file())
2148  {
2149  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2150  _c4dbgpf("scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2151  if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2152  _c4err("token can not appear at line begin");
2153  for(size_t i = 0; i < line.len; ++i)
2154  {
2155  const char curr = line.str[i];
2156  if(curr == '\'') // single quotes are escaped with two single quotes
2157  {
2158  const char next = i+1 < line.len ? line.str[i+1] : '~';
2159  if(next != '\'') // so just look for the first quote
2160  { // without another after it
2161  _line_progressed(i + 1); // progress beyond the quote
2162  pos = i + (size_t)(line.str - s.str); // set pos to before the quote
2163  goto found_close;
2164  }
2165  else
2166  {
2167  needs_filter = true; // needs filter to remove escaped quotes
2168  ++i; // skip the escaped quote
2169  }
2170  }
2171  }
2172 
2173  needs_filter = true;
2174  _line_progressed(line.len);
2175  _line_ended();
2176  _scan_line();
2177  _check_valid_newline_in_quoted_scalar();
2178  }
2179 
2180  _c4err("reached end of file while looking for closing quote");
2181 
2182 found_close:
2183 
2184  _c4dbgpf("found closing quote at: {}", pos);
2185  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2186  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2187  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2188  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2189  _set_first_strict(s, pos);
2190 
2191  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
2192 
2193  return ScannedScalar { s, needs_filter };
2194 }
2195 
2196 
2197 //-----------------------------------------------------------------------------
2198 template<class EventHandler>
2199 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2200 {
2201  // quoted scalars can spread over multiple lines!
2202  // nice explanation here: http://yaml-multiline.info/
2203 
2204  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('"'), m_evt_handler->m_curr->pos);
2205 
2206  // a span to the end of the file, skipping the opening quote
2207  substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2208  _line_progressed(1); // advance over the opening quote
2209  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2210 
2211  bool needs_filter = false;
2212  size_t pos = npos; // find the pos of the matching quote
2213  while( ! _finished_file())
2214  {
2215  #if defined(__GNUC__) && (/*__GNUC__ == 12 || */__GNUC__ == 13)
2216  C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem); // prevent hoisting
2217  #endif
2218  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2219  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2220  if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2221  _c4err("token can not appear at line begin");
2222  for(size_t i = 0; i < rem.len; ++i)
2223  {
2224  const char curr = rem.str[i];
2225  // every \ is an escape
2226  if(curr == '\\')
2227  {
2228  const char next = i+1 < rem.len ? rem.str[i+1] : '~';
2229  needs_filter = true;
2230  if(next == '"' || next == '\\')
2231  ++i;
2232  }
2233  else if(curr == '"')
2234  {
2235  _line_progressed(i + 1); // progress beyond the quote
2236  pos = i + (size_t)(rem.str - s.str); // set pos to before the quote
2237  goto found_close;
2238  }
2239  }
2240 
2241  // leading whitespace also needs filtering
2242  needs_filter = true;
2243  _line_progressed(rem.len);
2244  _line_ended();
2245  _scan_line();
2246  _check_valid_newline_in_quoted_scalar();
2247  }
2248 
2249  _c4err("reached end of file while looking for closing quote");
2250 
2251 found_close:
2252 
2253  _c4dbgpf("found closing quote at: {}", pos);
2254  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2255  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2256  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2257  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2258  _set_first_strict(s, pos);
2259 
2260  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2261 
2262  return ScannedScalar{s, needs_filter};
2263 }
2264 
2265 
2266 //-----------------------------------------------------------------------------
2267 template<class EventHandler>
2268 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2269 {
2270  _c4dbgpf("blck: indref={}", indref);
2271  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref != npos, m_evt_handler->m_curr->pos);
2272 
2273  // nice explanation here: http://yaml-multiline.info/
2274  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2275  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'), m_evt_handler->m_curr->pos);
2276 
2277  _c4dbgpf("blck: specs={}", _prs(s));
2278 
2279  // parse the spec
2280  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2281  size_t indentation = npos; // have to find out if no spec is given
2282  if(s.len > 1)
2283  {
2284  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"), m_evt_handler->m_curr->pos);
2285  csubstr t = s.sub(1);
2286  _c4dbgpf("blck: spec is multichar: '{}'", t);
2287  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2288  size_t pos = t.first_of("-+");
2289  _c4dbgpf("blck: spec chomp char at {}", pos);
2290  if(pos != npos)
2291  {
2292  if(t[pos] == '-')
2293  chomp = CHOMP_STRIP;
2294  else if(t[pos] == '+')
2295  chomp = CHOMP_KEEP;
2296  if(pos == 0)
2297  t = t.sub(1);
2298  else
2299  t = t.first(pos);
2300  }
2301  // from here to the end, only digits are considered
2302  pos = t.first_not_of("0123456789");
2303  csubstr digits = t.first(pos);
2304  if( ! digits.empty())
2305  {
2306  if(C4_UNLIKELY(digits.len > 1))
2307  _c4err("parse error: invalid indentation");
2308  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2309  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2310  _c4err("parse error: could not read indentation as decimal"); // LCOV_EXCL_LINE
2311  if(C4_UNLIKELY( ! indentation))
2312  _c4err("parse error: null indentation");
2313  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2314  indentation += m_evt_handler->m_curr->indref;
2315  }
2316  else
2317  {
2318  if(C4_UNLIKELY(t.len && (!t.begins_with_any(" \t") || !t.sub(pos).triml(" \t").begins_with('#'))))
2319  _c4err("parse error: invalid token");
2320  }
2321  }
2322 
2323  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2324 
2325  // finish the current line
2326  _line_progressed(s.len);
2327  _line_ended();
2328  _scan_line();
2329 
2330  // start with a zero-length block, already pointing at the right place
2331  substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset, size_t(0));
2332  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2333 
2334  // read every full line into a raw block,
2335  // from which newlines are to be stripped as needed.
2336  //
2337  // If no explicit indentation was given, pick it from the first
2338  // non-empty line. See
2339  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2340  size_t num_lines = 0;
2341  size_t first = m_evt_handler->m_curr->pos.line;
2342  size_t provisional_indentation = npos;
2343  LineContents lc;
2344  while(( ! _finished_file()))
2345  {
2346  // peek next line, but do not advance immediately
2347  lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2348  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2349  C4_DONT_OPTIMIZE(lc.rem);
2350  #endif
2351  _c4dbgpf("blck: peeking at {}", _prs(lc.rem.trimr("\r\n"), true));
2352  // evaluate termination conditions
2353  if(indentation != npos)
2354  {
2355  _c4dbgpf("blck: indentation={}", indentation);
2356  // stop when the line is deindented and not empty
2357  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2358  {
2359  if(raw_block.len)
2360  {
2361  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2362  }
2363  else
2364  {
2365  _c4err("indentation decreased without any scalar");
2366  }
2367  break;
2368  }
2369  else if(indentation == 0)
2370  {
2371  _c4dbgpf("blck: noindent. lc.rem={}", _prs(lc.rem));
2372  if(_is_doc_token(lc.rem))
2373  {
2374  _c4dbgp("blck: stop. indentation=0 and doc ended");
2375  break;
2376  }
2377  }
2378  }
2379  else
2380  {
2381  const size_t fns = lc.rem.first_not_of(' ');
2382  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2383  if(fns != npos) // non-empty line
2384  {
2385  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2386  if(C4_UNLIKELY(lc.full.begins_with('\t')))
2387  _c4err("parse error");
2388  if(provisional_indentation == npos)
2389  {
2390  if(lc.indentation < indref)
2391  {
2392  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2393  if(raw_block.len == 0)
2394  {
2395  _c4dbgp("blck: was empty, undo next line");
2396  _line_ended_undo();
2397  }
2398  break;
2399  }
2400  else if(lc.indentation == m_evt_handler->m_curr->indref)
2401  {
2402  if(has_any(RSEQ|RMAP))
2403  {
2404  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2405  break;
2406  }
2407  }
2408  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2409  indentation = lc.indentation;
2410  }
2411  else
2412  {
2413  if(lc.indentation >= provisional_indentation)
2414  {
2415  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2416  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2417  indentation = lc.indentation;
2418  }
2419  else
2420  {
2421  if(lc.indentation >= indref)
2422  _c4err("parse error: first non-empty block line should have at least the original indentation");
2423  _c4dbgp("blck: finished");
2424  break;
2425  }
2426  }
2427  }
2428  else // empty line
2429  {
2430  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2431  if(provisional_indentation != npos)
2432  {
2433  if(lc.rem.len >= provisional_indentation)
2434  {
2435  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2436  provisional_indentation = lc.rem.len;
2437  }
2438  }
2439  else
2440  {
2441  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2442  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2443  if(provisional_indentation == npos)
2444  {
2445  provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2446  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2447  }
2448  if(provisional_indentation < indref)
2449  {
2450  provisional_indentation = indref;
2451  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2452  }
2453  }
2454  }
2455  }
2456  // advance now that we know the folded scalar continues
2457  m_evt_handler->m_curr->line_contents = lc;
2458  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2459  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2460  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2461  _line_ended();
2462  ++num_lines;
2463  }
2464  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2465  C4_UNUSED(num_lines);
2466  C4_UNUSED(first);
2467 
2468  if(indentation == npos)
2469  {
2470  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2471  indentation = provisional_indentation;
2472  }
2473 
2474  if(num_lines)
2475  _line_ended_undo();
2476 
2477  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2478 
2479  sb->scalar = raw_block;
2480  sb->indentation = indentation;
2481  sb->chomp = chomp;
2482 }
2483 
2484 
2485 //-----------------------------------------------------------------------------
2486 //-----------------------------------------------------------------------------
2487 //-----------------------------------------------------------------------------
2488 /** @cond dev */
2489 
2490 // a debugging scaffold:
2491 #if 0
2492 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2493 #else
2494 #define _c4dbgfws(...)
2495 #endif
2496 
2497 template<class EventHandler>
2498 template<class FilterProcessor>
2499 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2500 {
2501  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2502  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t', m_evt_handler->m_curr->pos);
2503 
2504  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2505  if(first_pos != npos)
2506  {
2507  const char first_char = proc.src[first_pos];
2508  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2509  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2510  {
2511  _c4dbgfws("whitespace is trailing on line", "");
2512  proc.skip(first_pos - proc.rpos);
2513  }
2514  else // a legit whitespace
2515  {
2516  proc.copy();
2517  _c4dbgfws("legit whitespace. sofar={}", _prs(proc.sofar()));
2518  }
2519  return true;
2520  }
2521  _c4dbgfws("whitespace is trailing on line", "");
2522  return false;
2523 }
2524 
2525 template<class EventHandler>
2526 template<class FilterProcessor>
2527 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2528 {
2529  if(!_filter_ws_handle_to_first_non_space(proc))
2530  {
2531  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2532  proc.copy(proc.src.len - proc.rpos);
2533  }
2534 }
2535 
2536 template<class EventHandler>
2537 template<class FilterProcessor>
2538 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2539 {
2540  if(!_filter_ws_handle_to_first_non_space(proc))
2541  {
2542  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2543  proc.skip(proc.src.len - proc.rpos);
2544  }
2545 }
2546 
2547 #undef _c4dbgfws
2548 
2549 
2550 //-----------------------------------------------------------------------------
2551 //-----------------------------------------------------------------------------
2552 //-----------------------------------------------------------------------------
2553 /* plain scalars */
2554 
2555 // a debugging scaffold:
2556 #if 0
2557 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2558 #else
2559 #define _c4dbgfps(fmt, ...)
2560 #endif
2561 
2562 template<class EventHandler>
2563 template<class FilterProcessor>
2564 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2565 {
2566  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2567 
2568  _c4dbgfps("found newline. sofar={}", _prs(proc.sofar()));
2569  size_t ii = proc.rpos;
2570  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2571  if(numnl_following)
2572  {
2573  proc.set('\n', numnl_following);
2574  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2575  }
2576  else
2577  {
2578  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2579  if(ret != npos)
2580  {
2581  proc.set(' ');
2582  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2583  }
2584  else
2585  {
2586  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2587  ii = proc.src.len;
2588  }
2589  }
2590  proc.rpos = ii;
2591 }
2592 
2593 template<class EventHandler>
2594 template<class FilterProcessor>
2595 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2596 {
2597  _RYML_ASSERT_PARSE_(this->callbacks(), indentation != npos, m_evt_handler->m_curr->pos);
2598  _c4dbgfps("before={}", _prs(proc.src));
2599 
2600  while(proc.has_more_chars())
2601  {
2602  const char curr = proc.curr();
2603  _c4dbgfps("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2604  switch(curr)
2605  {
2606  case ' ':
2607  _RYML_WITH_TAB_TOKENS(case '\t':)
2608  _c4dbgfps("whitespace", curr);
2609  _filter_ws_skip_trailing(proc);
2610  break;
2611  case '\n':
2612  _c4dbgfps("newline", curr);
2613  _filter_nl_plain(proc, /*indentation*/indentation);
2614  break;
2615  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2616  _c4dbgfps("carriage return, ignore", curr);
2617  proc.skip();
2618  break;
2619  default:
2620  proc.copy();
2621  break;
2622  }
2623  }
2624 
2625  _c4dbgfps("after={}", _prs(proc.sofar()));
2626 
2627  return proc.result();
2628 }
2629 
2630 #undef _c4dbgfps
2631 
2632 
2633 template<class EventHandler>
2634 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2635 {
2636  FilterProcessorSrcDst proc(scalar, dst);
2637  return _filter_plain(proc, indentation);
2638 }
2639 
2640 template<class EventHandler>
2641 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2642 {
2643  FilterProcessorInplaceEndExtending proc(dst, cap);
2644  return _filter_plain(proc, indentation);
2645 }
2646 
2647 
2648 //-----------------------------------------------------------------------------
2649 //-----------------------------------------------------------------------------
2650 //-----------------------------------------------------------------------------
2651 /* single quoted */
2652 
2653 // a debugging scaffold:
2654 #if 0
2655 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2656 #else
2657 #define _c4dbgfsq(fmt, ...)
2658 #endif
2659 
2660 template<class EventHandler>
2661 template<class FilterProcessor>
2662 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2663 {
2664  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2665 
2666  _c4dbgfsq("found newline. sofar={}", _prs(proc.sofar()));
2667  size_t ii = proc.rpos;
2668  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2669  if(numnl_following)
2670  {
2671  proc.set('\n', numnl_following);
2672  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2673  }
2674  else
2675  {
2676  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2677  if(ret != npos)
2678  {
2679  proc.set(' ');
2680  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2681  }
2682  else
2683  {
2684  proc.set(' ');
2685  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2686  }
2687  }
2688  proc.rpos = ii;
2689 }
2690 
2691 template<class EventHandler>
2692 template<class FilterProcessor>
2693 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2694 {
2695  _c4dbgfsq("before={}", _prs(proc.src));
2696 
2697  // from the YAML spec for double-quoted scalars:
2698  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2699  while(proc.has_more_chars())
2700  {
2701  const char curr = proc.curr();
2702  _c4dbgfsq("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2703  switch(curr)
2704  {
2705  case ' ':
2706  case '\t':
2707  _c4dbgfsq("whitespace", curr);
2708  _filter_ws_copy_trailing(proc);
2709  break;
2710  case '\n':
2711  _c4dbgfsq("newline", curr);
2712  _filter_nl_squoted(proc);
2713  break;
2714  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2715  _c4dbgfsq("skip cr", curr);
2716  proc.skip();
2717  break;
2718  case '\'':
2719  _c4dbgfsq("squote", curr);
2720  if(proc.next() == '\'')
2721  {
2722  _c4dbgfsq("two consecutive squotes", curr);
2723  proc.skip();
2724  proc.copy();
2725  }
2726  else
2727  {
2728  _c4err("filter error");
2729  }
2730  break;
2731  default:
2732  proc.copy();
2733  break;
2734  }
2735  }
2736 
2737  _c4dbgfsq(": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2738 
2739  return proc.result();
2740 }
2741 
2742 #undef _c4dbgfsq
2743 
2744 template<class EventHandler>
2745 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2746 {
2747  FilterProcessorSrcDst proc(scalar, dst);
2748  return _filter_squoted(proc);
2749 }
2750 
2751 template<class EventHandler>
2752 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2753 {
2754  FilterProcessorInplaceEndExtending proc(dst, cap);
2755  return _filter_squoted(proc);
2756 }
2757 
2758 
2759 //-----------------------------------------------------------------------------
2760 //-----------------------------------------------------------------------------
2761 //-----------------------------------------------------------------------------
2762 /* double quoted */
2763 
2764 // a debugging scaffold:
2765 #if 0
2766 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2767 #else
2768 #define _c4dbgfdq(...)
2769 #endif
2770 
2771 template<class EventHandler>
2772 template<class FilterProcessor>
2773 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2774 {
2775  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2776 
2777  _c4dbgfdq("found newline. sofar={}", _prs(proc.sofar()));
2778  size_t ii = proc.rpos;
2779  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2780  if(numnl_following)
2781  {
2782  proc.set('\n', numnl_following);
2783  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2784  }
2785  else
2786  {
2787  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2788  if(ret != npos)
2789  {
2790  proc.set(' ');
2791  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2792  }
2793  else
2794  {
2795  proc.set(' ');
2796  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2797  }
2798  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2799  {
2800  _c4dbgfdq("backslash at [{}]", ii);
2801  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2802  if(next == ' ' || next == '\t')
2803  {
2804  _c4dbgfdq("extend skip to backslash", "");
2805  ++ii;
2806  }
2807  }
2808  }
2809  proc.rpos = ii;
2810 }
2811 
2812 template<class EventHandler>
2813 template<class FilterProcessor>
2814 void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc, size_t sz)
2815 {
2816  const size_t szp1 = sz + 1u;
2817  if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2818  _c4err("codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2819  char readbuf[8];
2820  csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2821  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2822  uint32_t codepoint_val = {};
2823  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2824  _c4err("failed to parse codepoint. scalar pos={}", proc.rpos);
2825  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2826  if(C4_UNLIKELY(numbytes == 0))
2827  _c4err("failed to decode code point={}", proc.rpos);
2828  _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2829  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/szp1);
2830  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2831 }
2832 
2833 template<class EventHandler>
2834 template<class FilterProcessor>
2835 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2836 {
2837  char next = proc.next();
2838  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2839  if(next == '\r')
2840  {
2841  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2842  {
2843  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2844  next = '\n';
2845  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2846  }
2847  }
2848 
2849  if(next == '\n')
2850  {
2851  size_t ii = proc.rpos + 2;
2852  for( ; ii < proc.src.len; ++ii)
2853  {
2854  // skip leading whitespace
2855  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2856  ;
2857  else
2858  break;
2859  }
2860  proc.skip(ii - proc.rpos);
2861  }
2862  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2863  {
2864  // escapes for json compatibility
2865  proc.translate_esc(next);
2866  _c4dbgfdq("here, used '{}'", _c4prc(next));
2867  }
2868  else if(next == '\r')
2869  {
2870  proc.skip();
2871  }
2872  else if(next == 'n')
2873  {
2874  proc.translate_esc('\n');
2875  }
2876  else if(next == 'r')
2877  {
2878  proc.translate_esc('\r');
2879  }
2880  else if(next == 't')
2881  {
2882  proc.translate_esc('\t');
2883  }
2884  else if(next == '\\')
2885  {
2886  proc.translate_esc('\\');
2887  }
2888  else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2889  {
2890  _filter_dquoted_backslash_decode(proc, 2u);
2891  }
2892  else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2893  {
2894  _filter_dquoted_backslash_decode(proc, 4u);
2895  }
2896  else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2897  {
2898  _filter_dquoted_backslash_decode(proc, 8u);
2899  }
2900  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2901  else if(next == '0')
2902  {
2903  proc.translate_esc('\0');
2904  }
2905  else if(next == 'b') // backspace
2906  {
2907  proc.translate_esc('\b');
2908  }
2909  else if(next == 'f') // form feed
2910  {
2911  proc.translate_esc('\f');
2912  }
2913  else if(next == 'a') // bell character
2914  {
2915  proc.translate_esc('\a');
2916  }
2917  else if(next == 'v') // vertical tab
2918  {
2919  proc.translate_esc('\v');
2920  }
2921  else if(next == 'e') // escape character
2922  {
2923  proc.translate_esc('\x1b');
2924  }
2925  else if(next == '_') // unicode non breaking space \u00a0
2926  {
2927  // https://www.compart.com/en/unicode/U+00a0
2928  const char payload[] = {
2929  _RYML_CHCONST(-0x3e, 0xc2),
2930  _RYML_CHCONST(-0x60, 0xa0),
2931  };
2932  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2933  }
2934  else if(next == 'N') // unicode next line \u0085
2935  {
2936  // https://www.compart.com/en/unicode/U+0085
2937  const char payload[] = {
2938  _RYML_CHCONST(-0x3e, 0xc2),
2939  _RYML_CHCONST(-0x7b, 0x85),
2940  };
2941  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2942  }
2943  else if(next == 'L') // unicode line separator \u2028
2944  {
2945  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2946  const char payload[] = {
2947  _RYML_CHCONST(-0x1e, 0xe2),
2948  _RYML_CHCONST(-0x80, 0x80),
2949  _RYML_CHCONST(-0x58, 0xa8),
2950  };
2951  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2952  }
2953  else if(next == 'P') // unicode paragraph separator \u2029
2954  {
2955  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2956  const char payload[] = {
2957  _RYML_CHCONST(-0x1e, 0xe2),
2958  _RYML_CHCONST(-0x80, 0x80),
2959  _RYML_CHCONST(-0x57, 0xa9),
2960  };
2961  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2962  }
2963  else if(next == '\0')
2964  {
2965  proc.skip();
2966  }
2967  else
2968  {
2969  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2970  }
2971  _c4dbgfdq("backslash...sofar={}", _prs(proc.sofar()));
2972 }
2973 
2974 
2975 template<class EventHandler>
2976 template<class FilterProcessor>
2977 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2978 {
2979  _c4dbgfdq("before={}", _prs(proc.src));
2980  // from the YAML spec for double-quoted scalars:
2981  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2982  while(proc.has_more_chars())
2983  {
2984  const char curr = proc.curr();
2985  _c4dbgfdq("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
2986  switch(curr)
2987  {
2988  case ' ':
2989  case '\t':
2990  {
2991  _c4dbgfdq("whitespace", curr);
2992  _filter_ws_copy_trailing(proc);
2993  break;
2994  }
2995  case '\n':
2996  {
2997  _c4dbgfdq("newline", curr);
2998  _filter_nl_dquoted(proc);
2999  break;
3000  }
3001  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
3002  {
3003  _c4dbgfdq("carriage return, ignore", curr);
3004  proc.skip();
3005  break;
3006  }
3007  case '\\':
3008  {
3009  _filter_dquoted_backslash(proc);
3010  break;
3011  }
3012  default:
3013  {
3014  proc.copy();
3015  break;
3016  }
3017  }
3018  }
3019  _c4dbgfdq("after={}", _prs(proc.sofar()));
3020  return proc.result();
3021 }
3022 
3023 #undef _c4dbgfdq
3024 
3025 
3026 template<class EventHandler>
3027 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
3028 {
3029  FilterProcessorSrcDst proc(scalar, dst);
3030  return _filter_dquoted(proc);
3031 }
3032 
3033 template<class EventHandler>
3034 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
3035 {
3036  FilterProcessorInplaceMidExtending proc(dst, cap);
3037  return _filter_dquoted(proc);
3038 }
3039 
3040 
3041 //-----------------------------------------------------------------------------
3042 //-----------------------------------------------------------------------------
3043 //-----------------------------------------------------------------------------
3044 // block filtering helpers
3045 
3046 C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
3047 {
3048  if(indentation + 1 > s.len)
3049  return npos;
3050  for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
3051  {
3052  if(s.str[i] == '\n')
3053  {
3054  csubstr rem = s.sub(i + 1);
3055  size_t first = rem.first_not_of(' ');
3056  first = (first != npos) ? first : rem.len;
3057  if(first > indentation)
3058  return i;
3059  }
3060  }
3061  return npos;
3062 }
3063 
3064 template<class EventHandler>
3065 template<class FilterProcessor>
3066 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
3067 {
3068  _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3069  _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos, m_evt_handler->m_curr->pos);
3070 
3071  // a debugging scaffold:
3072  #if 0
3073  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3074  #else
3075  #define _c4dbgchomp(...)
3076  #endif
3077 
3078  // advance to the last line having spaces beyond the indentation
3079  {
3080  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3081  if(last != npos)
3082  {
3083  _c4dbgchomp("found newline and larger indentation. last={}", last);
3084  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
3085  _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3086  // remove indentation spaces, copy the rest
3087  while((proc.rpos < last) && proc.has_more_chars())
3088  {
3089  const char curr = proc.curr();
3090  _c4dbgchomp("curr='{}'", _c4prc(curr));
3091  switch(curr)
3092  {
3093  case '\n':
3094  {
3095  _c4dbgchomp("newline! remlen={}", proc.rem().len);
3096  proc.copy();
3097  // are there spaces after the newline?
3098  csubstr at_next_line = proc.rem();
3099  if(at_next_line.begins_with(' '))
3100  {
3101  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
3102  // there are spaces.
3103  size_t first_non_space = at_next_line.first_not_of(' ');
3104  _c4dbgchomp("first_non_space={}", first_non_space);
3105  if(first_non_space == npos)
3106  {
3107  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
3108  first_non_space = at_next_line.len;
3109  }
3110  if(first_non_space <= indentation)
3111  {
3112  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
3113  proc.skip(first_non_space);
3114  }
3115  else
3116  {
3117  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
3118  proc.skip(indentation);
3119  // copy the spaces after the indentation
3120  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3121  proc.copy(first_non_space - indentation);
3122  }
3123  }
3124  break;
3125  }
3126  case '\r':
3127  proc.skip();
3128  break;
3129  }
3130  }
3131  }
3132  }
3133 
3134  // from now on, we only have line ends (or indentation spaces)
3135  switch(chomp)
3136  {
3137  case CHOMP_CLIP:
3138  {
3139  bool had_one = false;
3140  while(proc.has_more_chars())
3141  {
3142  const char curr = proc.curr();
3143  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
3144  switch(curr)
3145  {
3146  case '\n':
3147  {
3148  _c4dbgchomp("copy newline!", curr);
3149  proc.copy();
3150  proc.set_at_end();
3151  had_one = true;
3152  break;
3153  }
3154  case ' ':
3155  case '\r':
3156  _c4dbgchomp("skip!", curr);
3157  proc.skip();
3158  break;
3159  }
3160  }
3161  if(!had_one) // there were no newline characters. add one.
3162  {
3163  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3164  proc.set('\n');
3165  }
3166  break;
3167  }
3168  case CHOMP_KEEP:
3169  {
3170  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3171  while(proc.has_more_chars())
3172  {
3173  const char curr = proc.curr();
3174  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3175  switch(curr)
3176  {
3177  case '\n':
3178  _c4dbgchomp("copy newline!", curr);
3179  proc.copy();
3180  break;
3181  case ' ':
3182  case '\r':
3183  _c4dbgchomp("skip!", curr);
3184  proc.skip();
3185  break;
3186  }
3187  }
3188  break;
3189  }
3190  case CHOMP_STRIP:
3191  {
3192  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3193  // nothing to do!
3194  break;
3195  }
3196  }
3197 
3198  #undef _c4dbgchomp
3199 }
3200 
3201 
3202 // a debugging scaffold:
3203 #if 0
3204 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3205 #else
3206 #define _c4dbgfb(...)
3207 #endif
3208 
3209 template<class EventHandler>
3210 template<class FilterProcessor>
3211 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3212 {
3213  csubstr rem = proc.rem(); // remaining
3214  if(rem.len)
3215  {
3216  size_t first = rem.first_not_of(' ');
3217  if(first != npos)
3218  {
3219  _c4dbgfb("{} spaces follow before next nonws character", first);
3220  if(first < indentation)
3221  {
3222  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3223  proc.skip(first);
3224  }
3225  else
3226  {
3227  _c4dbgfb("skip {} spaces from indentation", indentation);
3228  proc.skip(indentation);
3229  }
3230  }
3231  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3232  else
3233  {
3234  _c4dbgfb("all spaces to the end: {} spaces", first);
3235  first = rem.len;
3236  if(first)
3237  {
3238  if(first < indentation)
3239  {
3240  _c4dbgfb("skip everything", first);
3241  proc.skip(proc.src.len - proc.rpos);
3242  }
3243  else
3244  {
3245  _c4dbgfb("skip {} spaces from indentation", indentation);
3246  proc.skip(indentation);
3247  }
3248  }
3249  }
3250  #endif
3251  }
3252 }
3253 
3254 template<class EventHandler>
3255 template<class FilterProcessor>
3256 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3257 {
3258  csubstr contents = proc.src.trimr(" \n\r");
3259  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3260  if(!contents.len)
3261  {
3262  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3263  if(chomp == CHOMP_KEEP && proc.src.len)
3264  {
3265  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3266  while(proc.has_more_chars())
3267  {
3268  const char curr = proc.curr();
3269  if(curr == '\n')
3270  proc.copy();
3271  else
3272  proc.skip();
3273  }
3274  if(!proc.wpos)
3275  {
3276  proc.set('\n');
3277  }
3278  }
3279  }
3280  return contents.len;
3281 }
3282 
3283 template<class EventHandler>
3284 template<class FilterProcessor>
3285 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3286 {
3287  _c4dbgfb("contents_len={}", contents_len);
3288 
3289  _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3290 
3291  // extend contents to just before the first newline at the end,
3292  // in case it is preceded by spaces
3293  size_t firstnewl = proc.src.first_of('\n', contents_len);
3294  if(firstnewl != npos)
3295  {
3296  contents_len = firstnewl;
3297  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3298  }
3299  else
3300  {
3301  contents_len = proc.src.len;
3302  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3303  }
3304 
3305  return contents_len;
3306 }
3307 
3308 #undef _c4dbgfb
3309 
3310 
3311 //-----------------------------------------------------------------------------
3312 //-----------------------------------------------------------------------------
3313 //-----------------------------------------------------------------------------
3314 
3315 // a debugging scaffold:
3316 #if 0
3317 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3318 #else
3319 #define _c4dbgfbl(...)
3320 #endif
3321 
3322 template<class EventHandler>
3323 template<class FilterProcessor>
3324 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3325 {
3326  _c4dbgfbl("indentation={} before={}", indentation, _prs(proc.src));
3327 
3328  size_t contents_len = _handle_all_whitespace(proc, chomp);
3329  if(!contents_len)
3330  return proc.result();
3331 
3332  contents_len = _extend_to_chomp(proc, contents_len);
3333 
3334  _c4dbgfbl("to filter={}", _prs(proc.src.first(contents_len)));
3335 
3336  _filter_block_indentation(proc, indentation);
3337 
3338  // now filter the bulk
3339  while(proc.has_more_chars(/*maxpos*/contents_len))
3340  {
3341  const char curr = proc.curr();
3342  _c4dbgfbl("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3343  switch(curr)
3344  {
3345  case '\n':
3346  {
3347  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3348  proc.copy(); // copy the newline
3349  _filter_block_indentation(proc, indentation);
3350  break;
3351  }
3352  case '\r':
3353  proc.skip();
3354  break;
3355  default:
3356  proc.copy();
3357  break;
3358  }
3359  }
3360 
3361  _c4dbgfbl("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3362 
3363  _filter_chomp(proc, chomp, indentation);
3364 
3365  _c4dbgfbl("final={}", _prs(proc.sofar()));
3366 
3367  return proc.result();
3368 }
3369 
3370 #undef _c4dbgfbl
3371 
3372 template<class EventHandler>
3373 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3374 {
3375  FilterProcessorSrcDst proc(scalar, dst);
3376  return _filter_block_literal(proc, indentation, chomp);
3377 }
3378 
3379 template<class EventHandler>
3380 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3381 {
3382  FilterProcessorInplaceEndExtending proc(scalar, cap);
3383  return _filter_block_literal(proc, indentation, chomp);
3384 }
3385 
3386 
3387 //-----------------------------------------------------------------------------
3388 //-----------------------------------------------------------------------------
3389 //-----------------------------------------------------------------------------
3390 
3391 // a debugging scaffold:
3392 #if 0
3393 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3394 #else
3395 #define _c4dbgfbf(...)
3396 #endif
3397 
3398 
3399 template<class EventHandler>
3400 template<class FilterProcessor>
3401 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3402 {
3403  _filter_block_indentation(proc, indentation);
3404  while(proc.has_more_chars(len))
3405  {
3406  const char curr = proc.curr();
3407  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3408  switch(curr)
3409  {
3410  case '\n':
3411  _c4dbgfbf("newline.", curr);
3412  proc.copy();
3413  _filter_block_indentation(proc, indentation);
3414  break;
3415  case '\r':
3416  proc.skip();
3417  break;
3418  case ' ':
3419  case '\t':
3420  {
3421  size_t first = proc.rem().first_not_of(" \t");
3422  _c4dbgfbf("space. first={}", first);
3423  if(first == npos)
3424  first = proc.rem().len;
3425  _c4dbgfbf("... indentation increased to {}", first);
3426  _filter_block_folded_indented_block(proc, indentation, len, first);
3427  break;
3428  }
3429  default:
3430  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3431  return;
3432  }
3433  }
3434 }
3435 
3436 template<class EventHandler>
3437 template<class FilterProcessor>
3438 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3439 {
3440  switch(num_newl)
3441  {
3442  case 1u:
3443  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3444  wpos_at_first_newl = proc.wpos;
3445  proc.skip();
3446  proc.set(' ');
3447  break;
3448  case 2u:
3449  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3450  _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl != npos, m_evt_handler->m_curr->pos);
3451  _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ', m_evt_handler->m_curr->pos);
3452  _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3453  proc.skip();
3454  proc.set_at(wpos_at_first_newl, '\n');
3455  _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n', m_evt_handler->m_curr->pos);
3456  break;
3457  default:
3458  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3459  proc.copy();
3460  break;
3461  }
3462  return wpos_at_first_newl;
3463 }
3464 
3465 template<class EventHandler>
3466 template<class FilterProcessor>
3467 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3468 {
3469  _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
3470  size_t num_newl = 0;
3471  size_t wpos_at_first_newl = npos;
3472  while(proc.has_more_chars(len))
3473  {
3474  const char curr = proc.curr();
3475  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3476  switch(curr)
3477  {
3478  case '\n':
3479  {
3480  _c4dbgfbf("newline. sofar={}", num_newl);
3481  // NOTE: vs2022-32bit-release builds were giving wrong
3482  // results in this block, if it was written as either
3483  // as a switch(num_newl) or its equivalent if-form.
3484  //
3485  // For this reason, we're using a dedicated function
3486  // (**_compress), which seems to work around the issue.
3487  //
3488  // The manifested problem was that somewhere between the
3489  // assignment to curr and this point, proc.wpos (the
3490  // write-position of the processor) jumped to npos, which
3491  // made the write wrap-around! To make things worse,
3492  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3493  // problem go away!
3494  //
3495  // The only way to make the problem appear with prints
3496  // enabled was by disabling all prints in this function
3497  // (including in the block which was moved to the compress
3498  // function) and then selectively enabling only some of
3499  // those prints.
3500  //
3501  // This may be due to some bug in the cl-x86 optimizer; or
3502  // it may be triggered by some UB which may be
3503  // inadvertedly present in this function or in the filter
3504  // processor. This is despite our best efforts to weed out
3505  // any such UB problem: neither clang-tidy nor none of the
3506  // sanitizers, or gcc's -fanalyzer pointed to any problems
3507  // in this code.
3508  //
3509  // In the end, moving this block to a separate function
3510  // was the only way to bury the problem. But it may
3511  // resurface again, as The Undead, rising to from the
3512  // grave to haunt us with his terrible presence.
3513  //
3514  // We may have to revisit this. With a stake, and lots of
3515  // garlic.
3516  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3517  _filter_block_indentation(proc, indentation);
3518  break;
3519  }
3520  case ' ':
3521  case '\t':
3522  {
3523  size_t first = proc.rem().first_not_of(" \t");
3524  _c4dbgfbf("space. first={}", first);
3525  if(first == npos)
3526  first = proc.rem().len;
3527  _c4dbgfbf("... indentation increased to {}", first);
3528  if(num_newl)
3529  {
3530  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3531  proc.set_at(wpos_at_first_newl, '\n');
3532  }
3533  if(num_newl > 1u)
3534  {
3535  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3536  proc.set('\n');
3537  }
3538  _filter_block_folded_indented_block(proc, indentation, len, first);
3539  num_newl = 0;
3540  wpos_at_first_newl = npos;
3541  break;
3542  }
3543  case '\r':
3544  proc.skip();
3545  break;
3546  default:
3547  _c4dbgfbf("not space, not newline. stop.", 0);
3548  return;
3549  }
3550  }
3551 }
3552 
3553 
3554 template<class EventHandler>
3555 template<class FilterProcessor>
3556 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3557 {
3558  _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos), m_evt_handler->m_curr->pos);
3559  if(curr_indentation)
3560  proc.copy(curr_indentation);
3561  while(proc.has_more_chars(len))
3562  {
3563  const char curr = proc.curr();
3564  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3565  switch(curr)
3566  {
3567  case '\n':
3568  {
3569  proc.copy();
3570  _filter_block_indentation(proc, indentation);
3571  csubstr rem = proc.rem();
3572  const size_t first = rem.first_not_of(' ');
3573  _c4dbgfbf("newline. firstns={}", first);
3574  if(first == 0)
3575  {
3576  const char c = rem[first];
3577  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3578  if(c == '\n' || c == '\r')
3579  {
3580  ;
3581  }
3582  else
3583  {
3584  _c4dbgfbf("done with indented block", first);
3585  goto endloop;
3586  }
3587  }
3588  else if(first != npos)
3589  {
3590  proc.copy(first);
3591  _c4dbgfbf("copy all {} spaces", first);
3592  }
3593  break;
3594  }
3595  break;
3596  case '\r':
3597  proc.skip();
3598  break;
3599  default:
3600  proc.copy();
3601  break;
3602  }
3603  }
3604  endloop:
3605  return;
3606 }
3607 
3608 
3609 template<class EventHandler>
3610 template<class FilterProcessor>
3611 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3612 {
3613  _c4dbgfbf("indentation={} before={}", indentation, _prs(proc.src));
3614 
3615  size_t contents_len = _handle_all_whitespace(proc, chomp);
3616  if(!contents_len)
3617  return proc.result();
3618 
3619  contents_len = _extend_to_chomp(proc, contents_len);
3620 
3621  _c4dbgfbf("to filter={}", _prs(proc.src.first(contents_len)));
3622 
3623  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3624 
3625  // now filter the bulk
3626  while(proc.has_more_chars(/*maxpos*/contents_len))
3627  {
3628  const char curr = proc.curr();
3629  _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3630  switch(curr)
3631  {
3632  case '\n':
3633  {
3634  _c4dbgfbf("found newline", curr);
3635  _filter_block_folded_newlines(proc, indentation, contents_len);
3636  break;
3637  }
3638  case '\r':
3639  proc.skip();
3640  break;
3641  default:
3642  proc.copy();
3643  break;
3644  }
3645  }
3646 
3647  _c4dbgfbf("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3648 
3649  _filter_chomp(proc, chomp, indentation);
3650 
3651  _c4dbgfbf("final={}", proc.sofar().len, _prs(proc.sofar()));
3652 
3653  return proc.result();
3654 }
3655 
3656 #undef _c4dbgfbf
3657 
3658 template<class EventHandler>
3659 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3660 {
3661  FilterProcessorSrcDst proc(scalar, dst);
3662  return _filter_block_folded(proc, indentation, chomp);
3663 }
3664 
3665 template<class EventHandler>
3666 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3667 {
3668  FilterProcessorInplaceEndExtending proc(scalar, cap);
3669  return _filter_block_folded(proc, indentation, chomp);
3670 }
3671 
3672 
3673 //-----------------------------------------------------------------------------
3674 //-----------------------------------------------------------------------------
3675 //-----------------------------------------------------------------------------
3676 
3677 template<class EventHandler>
3678 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3679 {
3680  _c4dbgpf("filtering plain scalar: s={}", _prs(s));
3681  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3682  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3683  _c4dbgpf("filtering plain scalar: success! s={}", _prs(r.get()));
3684  return r.get();
3685 }
3686 
3687 //-----------------------------------------------------------------------------
3688 
3689 template<class EventHandler>
3690 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3691 {
3692  _c4dbgpf("filtering squo scalar: s={}", _prs(s));
3693  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3694  _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3695  _c4dbgpf("filtering squo scalar: success! s={}", _prs(r.get()));
3696  return r.get();
3697 }
3698 
3699 
3700 //-----------------------------------------------------------------------------
3701 
3702 template<class EventHandler>
3703 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3704 {
3705  _c4dbgpf("filtering dquo scalar: s={}", _prs(s));
3706  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3707  if(C4_LIKELY(r.valid()))
3708  {
3709  _c4dbgpf("filtering dquo scalar: success! s={}", _prs(r.get()));
3710  return r.get();
3711  }
3712  else
3713  {
3714  const size_t len = r.required_len();
3715  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3716  substr dst = _alloc_arena(len, &s);
3717  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3718  if(dst.str)
3719  {
3720  _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3721  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3722  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3723  _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos); // may be smaller!
3724  _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3725  _c4dbgpf("filtering dquo scalar: success! s={}", _prs(rsd.get()));
3726  return rsd.get();
3727  }
3728  return dst;
3729  }
3730 }
3731 
3732 
3733 //-----------------------------------------------------------------------------
3734 
3735 template<class EventHandler>
3736 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3737 {
3738  if(s.is_sub(_buf()))
3739  {
3740  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3741  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3742  if(s.len)
3743  memmove(s.str - 1, s.str, s.len);
3744  --s.str;
3745  s.str[s.len] = '\n';
3746  ++s.len;
3747  return s;
3748  }
3749  else
3750  {
3751  substr dst = _alloc_arena(s.len + 1, &s);
3752  if(s.len)
3753  memcpy(dst.str, s.str, s.len);
3754  dst[s.len] = '\n';
3755  return dst;
3756  }
3757 }
3758 
3759 template<class EventHandler>
3760 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3761 {
3762  _c4dbgpf("filtering block literal scalar: s={}", _prs(s));
3763  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3764  csubstr result;
3765  if(C4_LIKELY(r.valid()))
3766  {
3767  result = r.get();
3768  }
3769  else
3770  {
3771  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3772  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3773  // this can only happen when adding a single newline in clip mode.
3774  // so we shift left the scalar by one place
3775  result = _move_scalar_left_and_add_newline(s);
3776  }
3777  _c4dbgpf("filtering block literal scalar: success! s={}", _prs(result));
3778  return result;
3779 }
3780 
3781 
3782 //-----------------------------------------------------------------------------
3783 template<class EventHandler>
3784 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3785 {
3786  _c4dbgpf("filtering block folded scalar: s={}", _prs(s));
3787  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3788  csubstr result;
3789  if(C4_LIKELY(r.valid()))
3790  {
3791  result = r.get();
3792  }
3793  else
3794  {
3795  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3796  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3797  // this can only happen when adding a single newline in clip mode.
3798  // so we shift left the scalar by one place
3799  result = _move_scalar_left_and_add_newline(s);
3800  }
3801  _c4dbgpf("filtering block folded scalar: success! s={}", _prs(result));
3802  return result;
3803 }
3804 
3805 
3806 //-----------------------------------------------------------------------------
3807 
3808 template<class EventHandler>
3809 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3810 {
3811  if(sc.needs_filter)
3812  {
3813  if(m_options.scalar_filtering())
3814  {
3815  return _filter_scalar_plain(sc.scalar, indentation);
3816  }
3817  else
3818  {
3819  _c4dbgp("plain scalar left unfiltered");
3820  m_evt_handler->mark_key_scalar_unfiltered();
3821  }
3822  }
3823  else
3824  {
3825  _c4dbgp("plain scalar doesn't need filtering");
3826  }
3827  return sc.scalar;
3828 }
3829 
3830 template<class EventHandler>
3831 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3832 {
3833  if(sc.needs_filter)
3834  {
3835  if(m_options.scalar_filtering())
3836  {
3837  return _filter_scalar_plain(sc.scalar, indentation);
3838  }
3839  else
3840  {
3841  _c4dbgp("plain scalar left unfiltered");
3842  m_evt_handler->mark_val_scalar_unfiltered();
3843  }
3844  }
3845  else
3846  {
3847  _c4dbgp("plain scalar doesn't need filtering");
3848  }
3849  return sc.scalar;
3850 }
3851 
3852 
3853 //-----------------------------------------------------------------------------
3854 
3855 template<class EventHandler>
3856 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3857 {
3858  if(sc.needs_filter)
3859  {
3860  if(m_options.scalar_filtering())
3861  {
3862  return _filter_scalar_squot(sc.scalar);
3863  }
3864  else
3865  {
3866  _c4dbgp("squo key scalar left unfiltered");
3867  m_evt_handler->mark_key_scalar_unfiltered();
3868  }
3869  }
3870  else
3871  {
3872  _c4dbgp("squo key scalar doesn't need filtering");
3873  }
3874  return sc.scalar;
3875 }
3876 
3877 template<class EventHandler>
3878 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3879 {
3880  if(sc.needs_filter)
3881  {
3882  if(m_options.scalar_filtering())
3883  {
3884  return _filter_scalar_squot(sc.scalar);
3885  }
3886  else
3887  {
3888  _c4dbgp("squo val scalar left unfiltered");
3889  m_evt_handler->mark_val_scalar_unfiltered();
3890  }
3891  }
3892  else
3893  {
3894  _c4dbgp("squo val scalar doesn't need filtering");
3895  }
3896  return sc.scalar;
3897 }
3898 
3899 
3900 //-----------------------------------------------------------------------------
3901 
3902 template<class EventHandler>
3903 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3904 {
3905  if(sc.needs_filter)
3906  {
3907  if(m_options.scalar_filtering())
3908  {
3909  return _filter_scalar_dquot(sc.scalar);
3910  }
3911  else
3912  {
3913  _c4dbgp("dquo scalar left unfiltered");
3914  m_evt_handler->mark_key_scalar_unfiltered();
3915  }
3916  }
3917  else
3918  {
3919  _c4dbgp("dquo scalar doesn't need filtering");
3920  }
3921  return sc.scalar;
3922 }
3923 
3924 template<class EventHandler>
3925 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3926 {
3927  if(sc.needs_filter)
3928  {
3929  if(m_options.scalar_filtering())
3930  {
3931  return _filter_scalar_dquot(sc.scalar);
3932  }
3933  else
3934  {
3935  _c4dbgp("dquo scalar left unfiltered");
3936  m_evt_handler->mark_val_scalar_unfiltered();
3937  }
3938  }
3939  else
3940  {
3941  _c4dbgp("dquo scalar doesn't need filtering");
3942  }
3943  return sc.scalar;
3944 }
3945 
3946 
3947 //-----------------------------------------------------------------------------
3948 
3949 template<class EventHandler>
3950 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3951 {
3952  if(m_options.scalar_filtering())
3953  {
3954  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3955  }
3956  else
3957  {
3958  _c4dbgp("literal scalar left unfiltered");
3959  m_evt_handler->mark_key_scalar_unfiltered();
3960  }
3961  return sb.scalar;
3962 }
3963 
3964 template<class EventHandler>
3965 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3966 {
3967  if(m_options.scalar_filtering())
3968  {
3969  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3970  }
3971  else
3972  {
3973  _c4dbgp("literal scalar left unfiltered");
3974  m_evt_handler->mark_val_scalar_unfiltered();
3975  }
3976  return sb.scalar;
3977 }
3978 
3979 
3980 //-----------------------------------------------------------------------------
3981 
3982 template<class EventHandler>
3983 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3984 {
3985  if(m_options.scalar_filtering())
3986  {
3987  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3988  }
3989  else
3990  {
3991  _c4dbgp("folded scalar left unfiltered");
3992  m_evt_handler->mark_key_scalar_unfiltered();
3993  }
3994  return sb.scalar;
3995 }
3996 
3997 template<class EventHandler>
3998 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3999 {
4000  if(m_options.scalar_filtering())
4001  {
4002  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4003  }
4004  else
4005  {
4006  _c4dbgp("folded scalar left unfiltered");
4007  m_evt_handler->mark_val_scalar_unfiltered();
4008  }
4009  return sb.scalar;
4010 }
4011 
4012 
4013 //-----------------------------------------------------------------------------
4014 //-----------------------------------------------------------------------------
4015 //-----------------------------------------------------------------------------
4016 
4017 #ifdef RYML_DBG // !!! <----------------------------------
4018 
4019 template<class EventHandler>
4020 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on)
4021 {
4022  ParserState *s = m_evt_handler->m_curr;
4023  char buf1_[64], buf2_[64], buf3_[64];
4024  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4025  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4026  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4027  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4028  s->flags |= on;
4029 }
4030 
4031 template<class EventHandler>
4032 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off)
4033 {
4034  ParserState *s = m_evt_handler->m_curr;
4035  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4036  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4037  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4038  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4039  csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4040  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4041  _RYML_ASSERT_BASIC((on & off) == ParserFlag_t(0));
4042  s->flags &= ~off;
4043  s->flags |= on;
4044 }
4045 
4046 template<class EventHandler>
4047 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off)
4048 {
4049  ParserState *s = m_evt_handler->m_curr;
4050  char buf1_[64], buf2_[64], buf3_[64];
4051  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4052  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4053  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4054  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4055  s->flags &= ~off;
4056 }
4057 
4058 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
4059 {
4060  size_t pos = 0;
4061  bool gotone = false;
4062 
4063  #define _prflag(fl) \
4064  if((flags & fl) == (fl)) \
4065  { \
4066  if(gotone) \
4067  { \
4068  if(pos + 1 < buf.len) \
4069  buf[pos] = '|'; \
4070  ++pos; \
4071  } \
4072  csubstr fltxt = #fl; \
4073  if(pos + fltxt.len <= buf.len) \
4074  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4075  pos += fltxt.len; \
4076  gotone = true; \
4077  }
4078 
4079  _prflag(RTOP);
4080  _prflag(RUNK);
4081  _prflag(RMAP);
4082  _prflag(RSEQ);
4083  _prflag(RFLOW);
4084  _prflag(RBLCK);
4085  _prflag(QMRK);
4086  _prflag(RKEY);
4087  _prflag(RVAL);
4088  _prflag(RKCL);
4089  _prflag(RNXT);
4090  _prflag(SSCL);
4091  _prflag(QSCL);
4092  _prflag(RSET);
4093  _prflag(RDOC);
4094  _prflag(NDOC);
4095  _prflag(USTY);
4096  _prflag(RSEQIMAP);
4097 
4098  #undef _prflag
4099 
4100  if(pos == 0)
4101  if(buf.len > 0)
4102  buf[pos++] = '0';
4103 
4104  _RYML_CHECK_BASIC(pos <= buf.len);
4105 
4106  return buf.first(pos);
4107 }
4108 
4109 #endif // RYML_DBG !!! <----------------------------------
4110 
4111 
4112 //-----------------------------------------------------------------------------
4113 //-----------------------------------------------------------------------------
4114 //-----------------------------------------------------------------------------
4115 
4116 template<class EventHandler>
4117 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
4118 {
4119  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4120  return _buf().sub(loc.offset);
4121 }
4122 
4123 template<class EventHandler>
4124 Location ParseEngine<EventHandler>::val_location(const char *val) const
4125 {
4126  if(C4_UNLIKELY(val == nullptr))
4127  return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4128  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4129  // NOTE: if any of these checks fails, the parser needs to be
4130  // instantiated with locations enabled.
4131  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4132  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4133  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4134  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4135  // NOTE: the pointer needs to belong to the buffer that was used to parse.
4136  csubstr src = _buf();
4137  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4138  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4139  // ok. search the first stored newline after the given ptr
4140  using lineptr_type = size_t const* C4_RESTRICT;
4141  lineptr_type lineptr = nullptr;
4142  size_t offset = (size_t)(val - src.begin());
4143  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4144  {
4145  // just do a linear search if the size is small.
4146  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4147  {
4148  if(*curr > offset)
4149  {
4150  lineptr = curr;
4151  break;
4152  }
4153  }
4154  }
4155  else
4156  {
4157  // do a bisection search if the size is not small.
4158  //
4159  // We could use std::lower_bound but this is simple enough and
4160  // spares the costly include of <algorithm>.
4161  size_t count = m_newline_offsets_size;
4162  lineptr = m_newline_offsets;
4163  while(count)
4164  {
4165  size_t step = count >> 1;
4166  lineptr_type it = lineptr + step;
4167  if(*it < offset)
4168  {
4169  lineptr = ++it;
4170  count -= step + 1;
4171  }
4172  else
4173  {
4174  count = step;
4175  }
4176  }
4177  }
4178  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4179  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4180  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4181  Location loc;
4182  loc.name = m_evt_handler->m_curr->pos.name;
4183  loc.offset = offset;
4184  loc.line = (size_t)(lineptr - m_newline_offsets);
4185  if(lineptr > m_newline_offsets)
4186  loc.col = (offset - *(lineptr-1) - 1u);
4187  else
4188  loc.col = offset;
4189  return loc;
4190 }
4191 
4192 template<class EventHandler>
4193 void ParseEngine<EventHandler>::_prepare_locations()
4194 {
4195  csubstr src = _buf();
4196  size_t numnewlines = 1u + src.count('\n');
4197  _resize_locations(numnewlines);
4198  m_newline_offsets_size = 0;
4199  for(size_t i = 0; i < src.len; i++)
4200  if(src.str[i] == '\n')
4201  m_newline_offsets[m_newline_offsets_size++] = i; // NOLINT
4202  m_newline_offsets[m_newline_offsets_size++] = src.len; // NOLINT
4203  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4204 }
4205 
4206 template<class EventHandler>
4207 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4208 {
4209  numnewlines = numnewlines >= 16 ? numnewlines : 16;
4210  if(numnewlines > m_newline_offsets_capacity)
4211  {
4212  if(m_newline_offsets)
4213  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4214  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4215  m_newline_offsets_capacity = numnewlines;
4216  }
4217 }
4218 
4219 template<class EventHandler>
4220 bool ParseEngine<EventHandler>::_locations_dirty() const
4221 {
4222  return !m_newline_offsets_size;
4223 }
4224 
4225 
4226 //-----------------------------------------------------------------------------
4227 //-----------------------------------------------------------------------------
4228 //-----------------------------------------------------------------------------
4229 
4230 template<class EventHandler>
4231 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4232 {
4233  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4234  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4235  {
4236  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4237  {
4238  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4239  _skipchars(" \t");
4240  }
4241  // comments
4242  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4243  {
4244  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4245  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4246  }
4247  }
4248 }
4249 
4250 
4251 template<class EventHandler>
4252 void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4253 {
4254  _c4dbgpf("flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4255  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4256  if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4257  {
4258  csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4259  _c4dbgpf("flow: after indentation={}", _prs(trimmed));
4260  if(trimmed.len && trimmed.triml(" \t").len)
4261  {
4262  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4263  _c4err("bad indentation");
4264  }
4265  }
4266 }
4267 
4268 template<class EventHandler>
4269 size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4270 {
4271  const size_t mark = m_evt_handler->m_curr->pos.offset;
4272  const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
4273  _c4dbgpf("block: mark={} firstpos={}", mark, firstpos);
4274  if(firstpos != npos)
4275  {
4276  _c4dbgp("block: non empty line");
4277  _line_progressed(firstpos);
4278  return mark;
4279  }
4280  else
4281  {
4282  _c4dbgp("block: rest of line is whitespace");
4283  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4284  return npos;
4285  }
4286 }
4287 
4288 template<class EventHandler>
4289 void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(size_t start_mark, size_t end_mark)
4290 {
4291  _c4dbgpf("block: start_mark={} end_mark={}", start_mark, end_mark);
4292  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4293  if(end_mark != start_mark)
4294  {
4295  csubstr leading = _buf().range(start_mark, end_mark);
4296  _c4dbgpf("block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading, true));
4297  if(leading.find('\t') != npos)
4298  _c4err("invalid tab character to the left");
4299  }
4300 }
4301 
4302 
4303 //-----------------------------------------------------------------------------
4304 
4305 
4306 template<class EventHandler>
4307 void ParseEngine<EventHandler>::_handle_colon()
4308 {
4309  size_t curr = m_evt_handler->m_curr->pos.line;
4310  if(C4_UNLIKELY(m_prev_colon != npos && curr == m_prev_colon))
4311  {
4312  _c4dbgpf("colon: prevline={} currline={}", m_prev_colon, curr);
4313  _c4err("two colons on same line");
4314  }
4315  _c4dbgpf("colon: set prevline={}->{}", m_prev_colon, curr);
4316  m_prev_colon = curr;
4317 }
4318 
4319 template<class EventHandler>
4320 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4321 {
4322  _c4dbgpf("store annotation[{}]: {}", dst->num_entries, _prs(str));
4323  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4324  dst->annotations[dst->num_entries].str = str;
4325  dst->annotations[dst->num_entries].indentation = {};
4326  dst->annotations[dst->num_entries].line = {};
4327  dst->annotations[dst->num_entries].orig = {};
4328  ++dst->num_entries;
4329 }
4330 
4331 template<class EventHandler>
4332 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4333 {
4334  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4335  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4336  if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4337  {
4338  _c4err("parse error");
4339  }
4340  dst->annotations[dst->num_entries].str = str;
4341  dst->annotations[dst->num_entries].indentation = indentation;
4342  dst->annotations[dst->num_entries].line = line;
4343  dst->annotations[dst->num_entries].orig = {};
4344  ++dst->num_entries;
4345 }
4346 
4347 template<class EventHandler>
4348 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line, csubstr orig)
4349 {
4350  _c4dbgpf("store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4351  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4352  if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4353  {
4354  _c4err("parse error");
4355  }
4356  dst->annotations[dst->num_entries].str = str;
4357  dst->annotations[dst->num_entries].indentation = indentation;
4358  dst->annotations[dst->num_entries].line = line;
4359  dst->annotations[dst->num_entries].orig = orig;
4360  ++dst->num_entries;
4361 }
4362 
4363 template<class EventHandler>
4364 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4365 {
4366  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4367 }
4368 
4369 template<class EventHandler>
4370 bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4371 {
4372  if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4373  return false;
4374  _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4375  if(m_pending_tags.num_entries)
4376  {
4377  _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4378  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4379  {
4380  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4381  _clear_annotations(&m_pending_tags);
4382  }
4383  else
4384  {
4385  _c4err("too many tags");
4386  }
4387  }
4388  if(m_pending_anchors.num_entries)
4389  {
4390  _c4dbgpf("handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4391  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4392  {
4393  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4394  _clear_annotations(&m_pending_anchors);
4395  }
4396  else
4397  {
4398  _c4err("too many anchors");
4399  }
4400  }
4401  m_evt_handler->set_key_scalar_plain_empty();
4402  m_evt_handler->set_val_scalar_plain_empty();
4403  return true;
4404 }
4405 
4406 template<class EventHandler>
4407 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4408 {
4409  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4410  if(m_pending_tags.num_entries)
4411  {
4412  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4413  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4414  {
4415  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4416  _clear_annotations(&m_pending_tags);
4417  }
4418  else
4419  {
4420  _c4err("too many tags"); // LCOV_EXCL_LINE
4421  }
4422  }
4423  if(m_pending_anchors.num_entries)
4424  {
4425  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4426  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4427  {
4428  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4429  _clear_annotations(&m_pending_anchors);
4430  }
4431  else
4432  {
4433  _c4err("too many anchors"); // LCOV_EXCL_LINE
4434  }
4435  }
4436 }
4437 
4438 template<class EventHandler>
4439 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4440 {
4441  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4442  if(m_pending_tags.num_entries)
4443  {
4444  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4445  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4446  {
4447  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4448  _clear_annotations(&m_pending_tags);
4449  }
4450  else
4451  {
4452  _c4err("too many tags");
4453  }
4454  }
4455  if(m_pending_anchors.num_entries)
4456  {
4457  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4458  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4459  {
4460  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4461  _clear_annotations(&m_pending_anchors);
4462  }
4463  else
4464  {
4465  _c4err("too many anchors");
4466  }
4467  }
4468 }
4469 
4470 template<class EventHandler>
4471 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4472 {
4473  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4474  if(m_pending_tags.num_entries == 2)
4475  {
4476  _c4dbgp("2 tags, setting entry 0");
4477  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4478  }
4479  else if(m_pending_tags.num_entries == 1)
4480  {
4481  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4482  if(m_pending_tags.annotations[0].line < current_line)
4483  {
4484  _c4dbgp("...tag is for the map. setting it.");
4485  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4486  _clear_annotations(&m_pending_tags);
4487  }
4488  }
4489  //
4490  if(m_pending_anchors.num_entries == 2)
4491  {
4492  _c4dbgp("2 anchors, setting entry 0");
4493  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4494  }
4495  else if(m_pending_anchors.num_entries == 1)
4496  {
4497  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4498  if(m_pending_anchors.annotations[0].line < current_line)
4499  {
4500  _c4dbgp("...anchor is for the map. setting it.");
4501  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4502  _clear_annotations(&m_pending_anchors);
4503  }
4504  }
4505 }
4506 
4507 template<class EventHandler>
4508 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4509 {
4510  _c4dbgp("annotations_before_start_mapblck_as_key");
4511  switch(m_pending_tags.num_entries)
4512  {
4513  case 1u:
4514  _c4dbgpf("annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4515  if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4516  {
4517  _c4dbgp("annotations_after_start_mapblck_as_key: is map tag");
4518  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4519  _clear_annotations(&m_pending_tags);
4520  }
4521  break;
4522  case 2u:
4523  _c4dbgpf("annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4524  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4525  break;
4526  }
4527  switch(m_pending_anchors.num_entries)
4528  {
4529  case 1u:
4530  _c4dbgpf("annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4531  if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4532  {
4533  _c4dbgp("annotations_after_start_mapblck_as_key: is map anchor");
4534  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4535  _clear_annotations(&m_pending_anchors);
4536  }
4537  break;
4538  case 2u:
4539  _c4dbgpf("annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4540  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4541  break;
4542  }
4543 }
4544 
4545 template<class EventHandler>
4546 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4547 {
4548  _c4dbgp("annotations_after_start_mapblck");
4549  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4550  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4551  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4552  {
4553  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4554  switch(m_pending_tags.num_entries)
4555  {
4556  case 1u:
4557  _c4dbgpf("annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4558  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4559  _clear_annotations(&m_pending_tags);
4560  break;
4561  case 2u:
4562  _c4dbgpf("annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4563  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4564  _clear_annotations(&m_pending_tags);
4565  break;
4566  }
4567  switch(m_pending_anchors.num_entries)
4568  {
4569  case 1u:
4570  _c4dbgpf("annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4571  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4572  _clear_annotations(&m_pending_anchors);
4573  break;
4574  case 2u:
4575  _c4dbgpf("annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4576  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4577  _clear_annotations(&m_pending_anchors);
4578  break;
4579  }
4580  }
4581  _set_indentation(key_indentation);
4582 }
4583 
4584 template<class EventHandler>
4585 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4586 {
4587  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4588  // select the left-most annotation on the max line
4589  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4590  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4591  {
4592  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4593  if(ann.line > curr->line)
4594  curr = &ann;
4595  else if(ann.indentation < curr->indentation)
4596  curr = &ann;
4597  }
4598  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4599  {
4600  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4601  if(ann.line > curr->line)
4602  curr = &ann;
4603  else if(ann.indentation < curr->indentation)
4604  curr = &ann;
4605  }
4606  return curr->line < val_line ? val_indentation : curr->indentation;
4607 }
4608 
4609 template<class EventHandler>
4610 void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4611 {
4612  if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4613  m_evt_handler->set_key_ref(alias);
4614  else
4615  _c4err("aliases cannot have anchors or tags");
4616 }
4617 
4618 template<class EventHandler>
4619 void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4620 {
4621  if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4622  m_evt_handler->set_val_ref(alias);
4623  else
4624  _c4err("aliases cannot have anchors or tags");
4625 }
4626 
4627 template<class EventHandler>
4628 csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4629 {
4630  _c4dbgpf("resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4631  _c4assert(tag.is_sub(_buf()));
4632  TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4633  if(ret)
4634  {
4635  _c4dbgpf("resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4636  return ret.resolved;
4637  }
4638  _c4dbgpf("resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4639  size_t bufsz = 0;
4640  substr buf = m_evt_handler->arena_rem();
4641  TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4642  csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4643  m_evt_handler->m_curr->pos,
4644  m_evt_handler->m_stack.m_callbacks);
4645  _c4dbgpf("resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4646  _c4assert((bufsz > buf.len) == (!ttag.str));
4647  _c4assert(!!bufsz == (ttag.len == bufsz));
4648  // try again if the arena size was not enough
4649  if(!ttag.str)
4650  {
4651  _c4dbgpf("tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4652  _c4assert(ttag.len == bufsz);
4653  buf = _alloc_arena(bufsz, &tag);
4654  if(buf.str) // the alloc may fail eg with the ints handler
4655  {
4656  ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4657  m_evt_handler->m_curr->pos,
4658  m_evt_handler->m_stack.m_callbacks);
4659  }
4660  _c4assert(ttag.len == bufsz);
4661  _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4662  }
4663  else if(bufsz) // if we succeeded writing into the arena, grow it as needed
4664  {
4665  _c4dbgp("tag required arena. update size");
4666  _c4assert(ttag.len == bufsz);
4667  _c4assert(ttag.is_sub(buf));
4668  (void)_alloc_arena(bufsz);
4669  }
4670  C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127) // conditional expression is constant
4671  if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4672  {
4673  _c4dbgpf("handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4674  // is the resolved tag not in any of those buffers?
4675  if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4676  {
4677  _c4dbgpf("copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4678  buf = _alloc_arena(ttag.len, &tag);
4679  if(buf.str) // the alloc may fail eg with the ints handler
4680  memcpy(buf.str, ttag.str, ttag.len);
4681  ttag.str = buf.str; // keep the current len!
4682  _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4683  }
4684  }
4685  C4_SUPPRESS_WARNING_MSVC_POP
4686  _c4dbgpf("resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4687  _c4assert(ttag.len > 0);
4688  // cache the hard-earned result!
4689  m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4690  return ttag;
4691 }
4692 
4693 template<class EventHandler>
4694 bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT version) const
4695 {
4696  _c4assert(directive->begins_with("%YAML"));
4697  size_t version_start = directive->first_not_of(" \t", 5);
4698  if(version_start != npos)
4699  {
4700  csubstr digits = "0123456789";
4701  size_t major_end = directive->first_not_of(digits, version_start);
4702  if(major_end != npos && directive->str[major_end] == '.') // single dot
4703  {
4704  size_t minor_end = directive->first_not_of(digits, major_end + 1);
4705  if(minor_end == npos)
4706  minor_end = directive->len;
4707  _set_first_strict(*directive, minor_end);
4708  *version = directive->range(version_start, minor_end);
4709  _c4dbgpf("%YAML: version={} full={}", *version, _prs(*directive, true));
4710  return true;
4711  }
4712  }
4713  return false;
4714 }
4715 
4716 template<class EventHandler>
4717 bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix) const
4718 {
4719  _c4assert(directive->begins_with("%TAG"));
4720  csubstr whitespace = " \t";
4721  size_t handle_start = directive->first_not_of(whitespace, 4);
4722  if(handle_start != npos && directive->str[handle_start] == '!')
4723  {
4724  size_t handle_end = directive->first_of(whitespace, handle_start);
4725  if(handle_end != npos)
4726  {
4727  size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4728  if(prefix_start != npos)
4729  {
4730  size_t prefix_end = directive->first_of(whitespace, prefix_start);
4731  if(prefix_end == npos)
4732  prefix_end = directive->len;
4733  _set_first_strict(*directive, prefix_end);
4734  *handle = directive->range(handle_start, handle_end);
4735  *prefix = directive->range(prefix_start, prefix_end);
4736  _c4dbgpf("%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive, true));
4737  if(is_valid_tag_handle(*handle))
4738  return true;
4739  }
4740  }
4741  }
4742  return false;
4743 }
4744 
4745 template<class EventHandler>
4746 void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4747 {
4748  _c4dbgpf("handle_directive: rem={}", _prs(directive, true));
4749  _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with('%'));
4750  _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4751  const char *err = nullptr;
4752  csubstr rem;
4753  size_t pos;
4754  auto isdirective = [](csubstr str, csubstr dir) {
4755  if(str.begins_with(dir))
4756  {
4757  csubstr rest = str.sub(dir.len);
4758  return (!rest.len || rest.str[0] == ' ' || rest.str[0] == '\t');
4759  }
4760  return false;
4761  };
4762  if(isdirective(directive, "%TAG"))
4763  {
4764  csubstr handle;
4765  csubstr prefix;
4766  if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4767  {
4768  err = "invalid %TAG directive";
4769  goto directive_error; // NOLINT
4770  }
4771  m_evt_handler->add_directive_tag(handle, prefix);
4772  }
4773  else if(isdirective(directive, "%YAML"))
4774  {
4775  csubstr version;
4776  if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &version)))
4777  {
4778  err = "invalid %YAML directive";
4779  goto directive_error; // NOLINT
4780  }
4781  if(C4_UNLIKELY(m_has_directives_yaml))
4782  {
4783  err = "multiple %YAML directives";
4784  goto directive_error; // NOLINT
4785  }
4786  m_has_directives_yaml = true;
4787  m_evt_handler->add_directive_yaml(version);
4788  }
4789  m_has_directives = true;
4790  rem = m_evt_handler->m_curr->line_contents.rem;
4791  pos = rem.first_not_of(" \t", directive.len);
4792  pos = pos != npos ? pos : rem.len;
4793  _line_progressed(pos);
4794  rem = rem.sub(pos);
4795  _c4dbgpf("handle_directive: rest={}", _prs(rem));
4796  if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
4797  {
4798  err = "invalid tokens after directive";
4799  goto directive_error; // NOLINT
4800  }
4801 directive_error:
4802  if(C4_UNLIKELY(err != nullptr))
4803  _c4err(err);
4804 }
4805 
4806 template<class EventHandler>
4807 bool ParseEngine<EventHandler>::_handle_bom()
4808 {
4809  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4810  if(rem.len)
4811  {
4812  const csubstr rest = rem.sub(1);
4813  // https://yaml.org/spec/1.2.2/#52-character-encodings
4814  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4815  if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4816  {
4817  _c4dbgp("byte order mark: UTF32BE");
4818  _handle_bom(UTF32BE);
4819  _line_progressed(4);
4820  m_bom_len = 4;
4821  return true;
4822  }
4823  else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4824  {
4825  _c4dbgp("byte order mark: UTF32LE");
4826  _handle_bom(UTF32LE);
4827  _line_progressed(4);
4828  m_bom_len = 4;
4829  return true;
4830  }
4831  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4832  {
4833  _c4dbgp("byte order mark: UTF16BE");
4834  _handle_bom(UTF16BE);
4835  _line_progressed(2);
4836  m_bom_len = 2;
4837  return true;
4838  }
4839  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4840  {
4841  _c4dbgp("byte order mark: UTF16LE");
4842  _handle_bom(UTF16LE);
4843  _line_progressed(2);
4844  m_bom_len = 2;
4845  return true;
4846  }
4847  else if(rem.begins_with("\xef\xbb\xbf"))
4848  {
4849  _c4dbgp("byte order mark: UTF8");
4850  _handle_bom(UTF8);
4851  _line_progressed(3);
4852  m_bom_len = 3;
4853  return true;
4854  }
4855  #undef _rymlisascii
4856  }
4857  return false;
4858 }
4859 
4860 template<class EventHandler>
4861 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4862 {
4863  if(m_encoding == NOBOM)
4864  {
4865  if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4866  m_encoding = enc;
4867  else
4868  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4869  }
4870  else if(enc != m_encoding)
4871  {
4872  _c4err("byte order mark can only be set once");
4873  }
4874 }
4875 
4876 
4877 //-----------------------------------------------------------------------------
4878 
4879 template<class EventHandler>
4880 void ParseEngine<EventHandler>::_handle_seq_json()
4881 {
4882 seqjson_start:
4883  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4884 
4885  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
4886  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
4887  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
4888  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
4889  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
4890 
4891  _handle_flow_skip_whitespace();
4892  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4893  if(!rem.len)
4894  goto seqjson_again;
4895 
4896  if(has_any(RVAL))
4897  {
4898  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
4899  const char first = rem.str[0];
4900  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4901  switch(first)
4902  {
4903  case '"':
4904  {
4905  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4906  ScannedScalar sc = _scan_scalar_dquot();
4907  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4908  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4909  addrem_flags(RNXT, RVAL);
4910  break;
4911  }
4912  case '[':
4913  {
4914  _c4dbgp("seqjson[RVAL]: start child seqjson");
4915  addrem_flags(RNXT, RVAL);
4916  m_evt_handler->begin_seq_val_flow();
4917  addrem_flags(RVAL, RNXT);
4918  _line_progressed(1);
4919  break;
4920  }
4921  case '{':
4922  {
4923  _c4dbgp("seqjson[RVAL]: start child mapjson");
4924  addrem_flags(RNXT, RVAL);
4925  m_evt_handler->begin_map_val_flow();
4926  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4927  _line_progressed(1);
4928  goto seqjson_finish;
4929  }
4930  case ']': // this happens on a trailing comma like ", ]"
4931  {
4932  _c4dbgp("seqjson[RVAL]: end!");
4933  rem_flags(RSEQ);
4934  _end_seq_flow();
4935  _line_progressed(1);
4936  if(!has_all(RSEQ|RFLOW))
4937  goto seqjson_finish;
4938  break;
4939  }
4940  default:
4941  {
4942  ScannedScalar sc;
4943  if(_scan_scalar_seq_json(&sc))
4944  {
4945  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4946  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4947  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4948  addrem_flags(RNXT, RVAL);
4949  }
4950  else
4951  {
4952  _c4err("parse error");
4953  }
4954  }
4955  }
4956  }
4957  else // RNXT
4958  {
4959  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
4960  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
4961  const char first = rem.str[0];
4962  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4963  switch(first)
4964  {
4965  case ',':
4966  {
4967  _c4dbgp("seqjson[RNXT]: expect next val");
4968  addrem_flags(RVAL, RNXT);
4969  m_evt_handler->add_sibling();
4970  _line_progressed(1);
4971  break;
4972  }
4973  case ']':
4974  {
4975  _c4dbgp("seqjson[RNXT]: end!");
4976  _end_seq_flow();
4977  _line_progressed(1);
4978  goto seqjson_finish;
4979  }
4980  default:
4981  _c4err("parse error");
4982  }
4983  }
4984 
4985  seqjson_again:
4986  _c4dbgt("seqjson: go again", 0);
4987  if(_finished_line())
4988  {
4989  if(C4_LIKELY(!_finished_file()))
4990  {
4991  _line_ended();
4992  _scan_line();
4993  _c4dbgnextline();
4994  }
4995  else
4996  {
4997  _c4err("missing terminating ]");
4998  }
4999  }
5000  goto seqjson_start;
5001 
5002  seqjson_finish:
5003  _c4dbgp("seqjson: finish");
5004 }
5005 
5006 
5007 //-----------------------------------------------------------------------------
5008 
5009 template<class EventHandler>
5010 void ParseEngine<EventHandler>::_handle_map_json()
5011 {
5012 mapjson_start:
5013  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5014 
5015  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5016  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5017  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5018  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT), m_evt_handler->m_curr->pos);
5019  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
5020 
5021  _handle_flow_skip_whitespace();
5022  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5023  if(!rem.len)
5024  goto mapjson_again;
5025 
5026  if(has_any(RKEY))
5027  {
5028  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5029  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5030  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5031  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5032  const char first = rem.str[0];
5033  _c4dbgpf("mapjson[RKEY]: '{}'", first);
5034  switch(first)
5035  {
5036  case '"':
5037  {
5038  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
5039  ScannedScalar sc = _scan_scalar_dquot();
5040  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5041  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5042  addrem_flags(RKCL, RKEY);
5043  break;
5044  }
5045  case '}': // this happens on a trailing comma like ", }"
5046  {
5047  _c4dbgp("mapjson[RKEY]: end!");
5048  _end_map_flow();
5049  _line_progressed(1);
5050  goto mapjson_finish;
5051  }
5052  default:
5053  _c4err("parse error");
5054  }
5055  }
5056  else if(has_any(RVAL))
5057  {
5058  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5059  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5060  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5061  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5062  const char first = rem.str[0];
5063  _c4dbgpf("mapjson[RVAL]: '{}'", first);
5064  switch(first)
5065  {
5066  case '"':
5067  {
5068  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
5069  ScannedScalar sc = _scan_scalar_dquot();
5070  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5071  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5072  addrem_flags(RNXT, RVAL);
5073  break;
5074  }
5075  case '[':
5076  {
5077  _c4dbgp("mapjson[RVAL]: start val seqjson");
5078  addrem_flags(RNXT, RVAL);
5079  m_evt_handler->begin_seq_val_flow();
5080  _set_indentation(m_evt_handler->m_parent->indref);
5081  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5082  _line_progressed(1);
5083  goto mapjson_finish;
5084  }
5085  case '{':
5086  {
5087  _c4dbgp("mapjson[RVAL]: start val mapjson");
5088  addrem_flags(RNXT, RVAL);
5089  m_evt_handler->begin_map_val_flow();
5090  _set_indentation(m_evt_handler->m_parent->indref);
5091  addrem_flags(RKEY, RNXT);
5092  _line_progressed(1);
5093  // keep going in this function
5094  break;
5095  }
5096  default:
5097  {
5098  ScannedScalar sc;
5099  if(_scan_scalar_map_json(&sc))
5100  {
5101  _c4dbgp("mapjson[RVAL]: plain scalar.");
5102  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5103  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5104  addrem_flags(RNXT, RVAL);
5105  }
5106  else
5107  {
5108  _c4err("parse error");
5109  }
5110  break;
5111  }
5112  }
5113  }
5114  else if(has_any(RKCL)) // read the key colon
5115  {
5116  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5117  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5118  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5119  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5120  const char first = rem.str[0];
5121  _c4dbgpf("mapjson[RKCL]: '{}'", first);
5122  if(first == ':')
5123  {
5124  _c4dbgp("mapjson[RKCL]: found the colon");
5125  addrem_flags(RVAL, RKCL);
5126  _line_progressed(1);
5127  }
5128  else
5129  {
5130  _c4err("parse error");
5131  }
5132  }
5133  else if(has_any(RNXT))
5134  {
5135  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5136  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5137  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5138  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5139  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
5140  if(rem.begins_with(','))
5141  {
5142  _c4dbgp("mapjson[RNXT]: expect next keyval");
5143  m_evt_handler->add_sibling();
5144  addrem_flags(RKEY, RNXT);
5145  _line_progressed(1);
5146  }
5147  else if(rem.begins_with('}'))
5148  {
5149  _c4dbgp("mapjson[RNXT]: end!");
5150  _end_map_flow();
5151  _line_progressed(1);
5152  goto mapjson_finish;
5153  }
5154  else
5155  {
5156  _c4err("parse error"); // LCOV_EXCL_LINE
5157  }
5158  }
5159 
5160  mapjson_again:
5161  _c4dbgt("mapjson: go again", 0);
5162  if(_finished_line())
5163  {
5164  if(C4_LIKELY(!_finished_file()))
5165  {
5166  _line_ended();
5167  _scan_line();
5168  _c4dbgnextline();
5169  }
5170  else
5171  {
5172  _c4err("missing terminating }");
5173  }
5174  }
5175  goto mapjson_start;
5176 
5177  mapjson_finish:
5178  _c4dbgp("mapjson: finish");
5179 }
5180 
5181 
5182 //-----------------------------------------------------------------------------
5183 
5184 template<class EventHandler>
5185 void ParseEngine<EventHandler>::_handle_seq_imap()
5186 {
5187 seqimap_start:
5188  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5189 
5190  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP), m_evt_handler->m_curr->pos);
5191  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5192  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL), m_evt_handler->m_curr->pos);
5193  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL), m_evt_handler->m_curr->pos);
5194  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5195 
5196  _handle_flow_skip_whitespace();
5197  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5198  if(!rem.len)
5199  goto seqimap_again;
5200 
5201  if(has_any(RVAL))
5202  {
5203  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
5204  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5205  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5206  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5207  const char first = rem.str[0];
5208  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
5209  ScannedScalar sc;
5210  if(first == '\'')
5211  {
5212  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
5213  sc = _scan_scalar_squot();
5214  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5215  _handle_annotations_before_blck_val_scalar();
5216  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5217  _end_map_flow();
5218  goto seqimap_finish;
5219  }
5220  else if(first == '"')
5221  {
5222  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
5223  sc = _scan_scalar_dquot();
5224  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5225  _handle_annotations_before_blck_val_scalar();
5226  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5227  _end_map_flow();
5228  goto seqimap_finish;
5229  }
5230  // block scalars (ie | and >) cannot appear in flow containers
5231  else if(_scan_scalar_plain_map_flow(&sc))
5232  {
5233  _c4dbgp("seqimap[RVAL]: it's a scalar.");
5234  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5235  _handle_annotations_before_blck_val_scalar();
5236  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5237  _end_map_flow();
5238  goto seqimap_finish;
5239  }
5240  else if(first == '[')
5241  {
5242  _c4dbgp("seqimap[RVAL]: start child seqflow");
5243  addrem_flags(RNXT, RVAL);
5244  _handle_annotations_before_blck_val_scalar();
5245  m_evt_handler->begin_seq_val_flow();
5246  addrem_flags(RVAL, RNXT|RSEQIMAP);
5247  _set_indentation(m_evt_handler->m_parent->indref);
5248  _line_progressed(1);
5249  goto seqimap_finish;
5250  }
5251  else if(first == '{')
5252  {
5253  _c4dbgp("seqimap[RVAL]: start child mapflow");
5254  addrem_flags(RNXT, RVAL);
5255  _handle_annotations_before_blck_val_scalar();
5256  m_evt_handler->begin_map_val_flow();
5257  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
5258  _set_indentation(m_evt_handler->m_parent->indref);
5259  _line_progressed(1);
5260  goto seqimap_finish;
5261  }
5262  else if(first == ',' || first == ']')
5263  {
5264  _c4dbgp("seqimap[RVAL]: finish without val.");
5265  _handle_annotations_before_blck_val_scalar();
5266  m_evt_handler->set_val_scalar_plain_empty();
5267  _end_map_flow();
5268  goto seqimap_finish;
5269  }
5270  else if(first == '*')
5271  {
5272  csubstr ref = _scan_ref_seq();
5273  _c4dbgpf("seqimap[RVAL]: ref! {}", _prs(ref));
5274  _handle_valref(ref);
5275  addrem_flags(RNXT, RVAL);
5276  }
5277  else if(first == '&')
5278  {
5279  csubstr anchor = _scan_anchor();
5280  _c4dbgpf("seqimap[RVAL]: anchor! {}", _prs(anchor));
5281  _add_annotation(&m_pending_anchors, anchor);
5282  }
5283  else if(first == '!')
5284  {
5285  csubstr tag = _scan_tag();
5286  _c4dbgpf("seqimap[RVAL]: tag! {}", _prs(tag));
5287  _add_annotation(&m_pending_tags, tag);
5288  }
5289  else
5290  {
5291  _c4err("parse error"); // LCOV_EXCL_LINE
5292  }
5293  }
5294  else if(has_any(RNXT))
5295  {
5296  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5297  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5298  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5299  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5300  const char first = rem.str[0];
5301  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
5302  if(first == ',' || first == ']')
5303  {
5304  // we may get here because a map or a seq started and we
5305  // return later
5306  _c4dbgp("seqimap: done");
5307  _end_map_flow();
5308  goto seqimap_finish;
5309  }
5310  else
5311  {
5312  _c4err("parse error"); // LCOV_EXCL_LINE
5313  }
5314  }
5315  else if(has_any(QMRK))
5316  {
5317  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK), m_evt_handler->m_curr->pos);
5318  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5319  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5320  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5321  const char first = rem.str[0];
5322  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
5323  ScannedScalar sc;
5324  if(first == '\'')
5325  {
5326  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
5327  sc = _scan_scalar_squot();
5328  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5329  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5330  addrem_flags(RKCL, QMRK);
5331  goto seqimap_again;
5332  }
5333  else if(first == '"')
5334  {
5335  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
5336  sc = _scan_scalar_dquot();
5337  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5338  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5339  addrem_flags(RKCL, QMRK);
5340  goto seqimap_again;
5341  }
5342  // block scalars (ie | and >) cannot appear in flow containers
5343  else if(_scan_scalar_plain_map_flow(&sc))
5344  {
5345  _c4dbgp("seqimap[QMRK]: it's a scalar.");
5346  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5347  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5348  addrem_flags(RKCL, QMRK);
5349  goto seqimap_again;
5350  }
5351  else if(first == '[')
5352  {
5353  _c4dbgp("seqimap[QMRK]: start child seqflow");
5354  addrem_flags(RKCL, QMRK);
5355  m_evt_handler->begin_seq_key_flow();
5356  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
5357  _set_indentation(m_evt_handler->m_parent->indref);
5358  _line_progressed(1);
5359  goto seqimap_finish;
5360  }
5361  else if(first == '{')
5362  {
5363  _c4dbgp("seqimap[QMRK]: start child mapflow");
5364  addrem_flags(RKCL, QMRK);
5365  m_evt_handler->begin_map_key_flow();
5366  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
5367  _set_indentation(m_evt_handler->m_parent->indref);
5368  _line_progressed(1);
5369  goto seqimap_finish;
5370  }
5371  else if(first == ',' || first == ']')
5372  {
5373  _c4dbgp("seqimap[QMRK]: finish without key.");
5374  m_evt_handler->set_key_scalar_plain_empty();
5375  m_evt_handler->set_val_scalar_plain_empty();
5376  _end_map_flow();
5377  goto seqimap_finish;
5378  }
5379  else if(first == '&')
5380  {
5381  csubstr anchor = _scan_anchor();
5382  _c4dbgp("seqimap[QMRK]: anchor!");
5383  m_evt_handler->set_key_anchor(anchor);
5384  }
5385  else if(first == '*')
5386  {
5387  csubstr ref = _scan_ref_seq();
5388  _c4dbgp("seqimap[QMRK]: ref!");
5389  _handle_keyref(ref);
5390  addrem_flags(RKCL, QMRK);
5391  }
5392  else
5393  {
5394  _c4err("parse error"); // LCOV_EXCL_LINE
5395  }
5396  }
5397  else if(has_any(RKCL))
5398  {
5399  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5400  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5401  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5402  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL), m_evt_handler->m_curr->pos);
5403  const char first = rem.str[0];
5404  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
5405  if(first == ':')
5406  {
5407  _c4dbgp("seqimap[RKCL]: found ':'");
5408  addrem_flags(RVAL, RKCL);
5409  _line_progressed(1);
5410  goto seqimap_again;
5411  }
5412  else if(first == ',' || first == ']')
5413  {
5414  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
5415  m_evt_handler->set_val_scalar_plain_empty();
5416  _end_map_flow();
5417  goto seqimap_finish;
5418  }
5419  else
5420  {
5421  _c4err("parse error"); // LCOV_EXCL_LINE
5422  }
5423  }
5424 
5425  seqimap_again:
5426  _c4dbgt("seqimap: go again", 0);
5427  if(_finished_line())
5428  {
5429  if(C4_LIKELY(!_finished_file()))
5430  {
5431  _line_ended();
5432  _scan_line();
5433  _c4dbgnextline();
5434  }
5435  else
5436  {
5437  _c4err("parse error");
5438  }
5439  }
5440  goto seqimap_start;
5441 
5442  seqimap_finish:
5443  _c4dbgp("seqimap: finish");
5444 }
5445 
5446 
5447 //-----------------------------------------------------------------------------
5448 
5449 template<class EventHandler>
5450 void ParseEngine<EventHandler>::_handle_seq_flow()
5451 {
5452 seqflow_start:
5453  _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5454 
5455  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5456  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
5457  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5458  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
5459  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
5460  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
5461 
5462  if(m_evt_handler->m_curr->at_line_beginning())
5463  {
5464  _handle_flow_line_beginning();
5465  }
5466 
5467  _handle_flow_skip_whitespace();
5468  if(!m_evt_handler->m_curr->line_contents.rem.len)
5469  goto seqflow_again;
5470 
5471  if(has_any(RVAL))
5472  {
5473  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5474  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5475  ScannedScalar sc;
5476  if(first == '\'')
5477  {
5478  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5479  sc = _scan_scalar_squot();
5480  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5481  _handle_annotations_before_blck_val_scalar();
5482  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5483  addrem_flags(RNXT, RVAL);
5484  _mark_seqflow_val_end();
5485  }
5486  else if(first == '"')
5487  {
5488  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5489  sc = _scan_scalar_dquot();
5490  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5491  _handle_annotations_before_blck_val_scalar();
5492  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5493  addrem_flags(RNXT, RVAL);
5494  _mark_seqflow_val_end();
5495  }
5496  // block scalars (ie | and >) cannot appear in flow containers
5497  else if(_scan_scalar_plain_seq_flow(&sc))
5498  {
5499  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5500  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5501  _handle_annotations_before_blck_val_scalar();
5502  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5503  addrem_flags(RNXT, RVAL);
5504  _mark_seqflow_val_end();
5505  }
5506  else if(first == '[')
5507  {
5508  _c4dbgp("seqflow[RVAL]: start child seqflow");
5509  addrem_flags(RNXT, RVAL);
5510  _handle_annotations_before_blck_val_scalar();
5511  m_evt_handler->begin_seq_val_flow();
5512  _set_indentation(m_evt_handler->m_parent->indref);
5513  addrem_flags(RVAL, RNXT);
5514  _line_progressed(1);
5515  }
5516  else if(first == '{')
5517  {
5518  _c4dbgp("seqflow[RVAL]: start child mapflow");
5519  addrem_flags(RNXT, RVAL);
5520  _handle_annotations_before_blck_val_scalar();
5521  m_evt_handler->begin_map_val_flow();
5522  _set_indentation(m_evt_handler->m_parent->indref);
5523  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5524  _line_progressed(1);
5525  goto seqflow_finish;
5526  }
5527  else if(first == ']') // this happens on cases such as [] or [.., ]
5528  {
5529  _c4dbgp("seqflow[RVAL]: end!");
5530  if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5531  {
5532  _c4dbgp("seqflow[RVAL]: add pending annotations");
5533  _handle_annotations_before_blck_val_scalar();
5534  m_evt_handler->set_val_scalar_plain_empty();
5535  }
5536  _line_progressed(1);
5537  _end_seq_flow();
5538  goto seqflow_finish;
5539  }
5540  else if(first == '*')
5541  {
5542  csubstr ref = _scan_ref_seq();
5543  _c4dbgpf("seqflow[RVAL]: ref! {}", _prs(ref));
5544  _handle_valref(ref);
5545  addrem_flags(RNXT, RVAL);
5546  }
5547  else if(first == '&')
5548  {
5549  csubstr anchor = _scan_anchor();
5550  _c4dbgpf("seqflow[RVAL]: anchor! {}", _prs(anchor));
5551  _add_annotation(&m_pending_anchors, anchor);
5552  }
5553  else if(first == '!')
5554  {
5555  csubstr tag = _scan_tag();
5556  _c4dbgpf("seqflow[RVAL]: tag! {}", _prs(tag));
5557  _add_annotation(&m_pending_tags, tag);
5558  }
5559  else if(first == ':')
5560  {
5561  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5562  addrem_flags(RNXT, RVAL);
5563  m_evt_handler->begin_map_val_flow();
5564  _set_indentation(m_evt_handler->m_parent->indref);
5565  _handle_annotations_before_blck_key_scalar();
5566  m_evt_handler->set_key_scalar_plain_empty();
5567  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5568  _line_progressed(1);
5569  goto seqflow_finish;
5570  }
5571  else if(first == '?')
5572  {
5573  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5574  addrem_flags(RNXT, RVAL);
5575  m_evt_handler->begin_map_val_flow();
5576  _set_indentation(m_evt_handler->m_parent->indref);
5577  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5578  _line_progressed(1);
5579  _maybe_skip_whitespace_tokens();
5580  goto seqflow_finish;
5581  }
5582  else if(first == ',')
5583  {
5584  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5585  {
5586  _c4dbgp("seqflow[RVAL]: add pending annotations");
5587  _handle_annotations_before_blck_val_scalar();
5588  m_evt_handler->set_val_scalar_plain_empty();
5589  addrem_flags(RNXT, RVAL);
5590  _mark_seqflow_val_end();
5591  }
5592  else
5593  {
5594  _c4err("parse error");
5595  }
5596  }
5597  else
5598  {
5599  _c4err("parse error");
5600  }
5601  }
5602  else // RNXT
5603  {
5604  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5605  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5606  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5607  if(first == ',')
5608  {
5609  _c4dbgp("seqflow[RNXT]: expect next val");
5610  addrem_flags(RVAL, RNXT);
5611  m_evt_handler->add_sibling();
5612  _line_progressed(1);
5613  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5614  {
5615  _c4err("parse error: invalid comment after comma");
5616  }
5617  _mark_seqflow_val_end();
5618  }
5619  else if(first == ']')
5620  {
5621  _c4dbgp("seqflow[RNXT]: end!");
5622  _line_progressed(1);
5623  _end_seq_flow();
5624  goto seqflow_finish;
5625  }
5626  else if(first == ':')
5627  {
5628  _c4dbgpf("seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5629  if(m_prev_val_end != NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5630  {
5631  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5632  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5633  _set_indentation(m_evt_handler->m_parent->indref);
5634  _line_progressed(1);
5635  addrem_flags(RSEQIMAP|RVAL, RNXT);
5636  goto seqflow_finish;
5637  }
5638  else
5639  {
5640  _c4err("parse error");
5641  }
5642  }
5643  else
5644  {
5645  _c4err("parse error");
5646  }
5647  }
5648 
5649  seqflow_again:
5650  _c4dbgt("seqflow: go again", 0);
5651  if(_finished_line())
5652  {
5653  if(C4_LIKELY(!_finished_file()))
5654  {
5655  _line_ended();
5656  _scan_line();
5657  _c4dbgnextline();
5658  }
5659  else
5660  {
5661  _c4err("missing terminating ]");
5662  }
5663  }
5664  goto seqflow_start;
5665 
5666  seqflow_finish:
5667  _c4dbgp("seqflow: finish");
5668 }
5669 
5670 
5671 //-----------------------------------------------------------------------------
5672 
5673 template<class EventHandler>
5674 void ParseEngine<EventHandler>::_handle_map_flow()
5675 {
5676 mapflow_start:
5677  _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5678 
5679  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5680  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5681  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
5682  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
5683 
5684  if(m_evt_handler->m_curr->at_line_beginning())
5685  {
5686  _handle_flow_line_beginning();
5687  }
5688 
5689  _handle_flow_skip_whitespace();
5690  if(!m_evt_handler->m_curr->line_contents.rem.len)
5691  goto mapflow_again;
5692 
5693  if(has_any(RKEY))
5694  {
5695  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5696  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5697  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5698  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5699  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5700  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5701  ScannedScalar sc;
5702  if(first == '\'')
5703  {
5704  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5705  sc = _scan_scalar_squot();
5706  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5707  _handle_annotations_before_blck_key_scalar();
5708  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5709  addrem_flags(RKCL, RKEY|QMRK);
5710  }
5711  else if(first == '"')
5712  {
5713  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5714  sc = _scan_scalar_dquot();
5715  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5716  _handle_annotations_before_blck_key_scalar();
5717  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5718  addrem_flags(RKCL, RKEY|QMRK);
5719  }
5720  // block scalars (ie | and >) cannot appear in flow containers
5721  else if(_scan_scalar_plain_map_flow(&sc))
5722  {
5723  _c4dbgp("mapflow[RKEY]: plain scalar");
5724  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5725  _handle_annotations_before_blck_key_scalar();
5726  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5727  addrem_flags(RKCL, RKEY|QMRK);
5728  }
5729  else if(first == '?')
5730  {
5731  _c4dbgp("mapflow[RKEY]: explicit key");
5732  _handle_annotations_before_blck_key_scalar();
5733  addrem_flags(QMRK, RKEY);
5734  _line_progressed(1);
5735  _maybe_skip_whitespace_tokens();
5736  }
5737  else if(first == ':')
5738  {
5739  _c4dbgp("mapflow[RKEY]: setting empty key");
5740  _handle_annotations_before_blck_key_scalar();
5741  m_evt_handler->set_key_scalar_plain_empty();
5742  addrem_flags(RVAL, RKEY|QMRK);
5743  _line_progressed(1);
5744  _maybe_skip_whitespace_tokens();
5745  }
5746  else if(first == ',')
5747  {
5748  _c4dbgp("mapflow[RKEY]: comma!");
5749  if(!_handle_annotations_before_unexpected_flow_token_rkey())
5750  _c4err("unexpected comma");
5751  addrem_flags(RNXT, RKEY|QMRK);
5752  // keep going in this function
5753  }
5754  else if(first == '}') // this happens on a trailing comma like ", }"
5755  {
5756  _c4dbgp("mapflow[RKEY]: end!");
5757  (void)_handle_annotations_before_unexpected_flow_token_rkey();
5758  _line_progressed(1);
5759  _end_map_flow();
5760  goto mapflow_finish;
5761  }
5762  else if(first == '&')
5763  {
5764  csubstr anchor = _scan_anchor();
5765  _c4dbgpf("mapflow[RKEY]: key anchor! {}", _prs(anchor));
5766  _add_annotation(&m_pending_anchors, anchor);
5767  }
5768  else if(first == '!')
5769  {
5770  csubstr tag = _scan_tag();
5771  _c4dbgpf("mapflow[RKEY]: tag! {}", _prs(tag));
5772  _add_annotation(&m_pending_tags, tag);
5773  }
5774  else if(first == '*')
5775  {
5776  csubstr ref = _scan_ref_map();
5777  _c4dbgpf("mapflow[RKEY]: key ref! {}", _prs(ref));
5778  _handle_keyref(ref);
5779  addrem_flags(RKCL, RKEY);
5780  }
5781  else if(first == '[')
5782  {
5783  // RYML's tree cannot store container keys, but that's
5784  // handled inside the tree event handler. Other handler
5785  // types may be able to handle it.
5786  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5787  _handle_annotations_before_blck_key_scalar();
5788  addrem_flags(RKCL, RKEY);
5789  m_evt_handler->begin_seq_key_flow();
5790  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5791  _set_indentation(m_evt_handler->m_parent->indref);
5792  _line_progressed(1);
5793  goto mapflow_finish;
5794  }
5795  else if(first == '{')
5796  {
5797  // RYML's tree cannot store container keys, but that's
5798  // handled inside the tree event handler. Other handler
5799  // types may be able to handle it.
5800  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5801  _handle_annotations_before_blck_key_scalar();
5802  addrem_flags(RKCL, RKEY);
5803  m_evt_handler->begin_map_key_flow();
5804  addrem_flags(RKEY, RVAL|RKCL);
5805  _set_indentation(m_evt_handler->m_parent->indref);
5806  _line_progressed(1);
5807  // keep going in this function
5808  }
5809  else
5810  {
5811  _c4err("parse error"); // LCOV_EXCL_LINE
5812  }
5813  }
5814  else if(has_any(RKCL)) // read the key colon
5815  {
5816  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5817  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5818  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5819  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5820  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5821  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5822  if(first == ':')
5823  {
5824  _c4dbgp("mapflow[RKCL]: found the colon");
5825  addrem_flags(RVAL, RKCL);
5826  _line_progressed(1);
5827  }
5828  else if(first == '}')
5829  {
5830  _c4dbgp("mapflow[RKCL]: end with missing val!");
5831  addrem_flags(RVAL, RKCL);
5832  m_evt_handler->set_val_scalar_plain_empty();
5833  _line_progressed(1);
5834  _end_map_flow();
5835  goto mapflow_finish;
5836  }
5837  else if(first == ',')
5838  {
5839  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5840  m_evt_handler->set_val_scalar_plain_empty();
5841  m_evt_handler->add_sibling();
5842  addrem_flags(RKEY, RKCL);
5843  _line_progressed(1);
5844  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5845  {
5846  _c4err("parse error: invalid comment after comma");
5847  }
5848  }
5849  else
5850  {
5851  _c4err("parse error");
5852  }
5853  }
5854  else if(has_any(RVAL))
5855  {
5856  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5857  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5858  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5859  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5860  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5861  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5862  ScannedScalar sc;
5863  if(first == '\'')
5864  {
5865  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5866  sc = _scan_scalar_squot();
5867  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5868  _handle_annotations_before_blck_val_scalar();
5869  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5870  addrem_flags(RNXT, RVAL);
5871  }
5872  else if(first == '"')
5873  {
5874  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5875  sc = _scan_scalar_dquot();
5876  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5877  _handle_annotations_before_blck_val_scalar();
5878  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5879  addrem_flags(RNXT, RVAL);
5880  }
5881  // block scalars (ie | and >) cannot appear in flow containers
5882  else if(_scan_scalar_plain_map_flow(&sc))
5883  {
5884  _c4dbgp("mapflow[RVAL]: plain scalar.");
5885  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5886  _handle_annotations_before_blck_val_scalar();
5887  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5888  addrem_flags(RNXT, RVAL);
5889  }
5890  else if(first == '[')
5891  {
5892  _c4dbgp("mapflow[RVAL]: start val seqflow");
5893  addrem_flags(RNXT, RVAL);
5894  _handle_annotations_before_blck_val_scalar();
5895  m_evt_handler->begin_seq_val_flow();
5896  _set_indentation(m_evt_handler->m_parent->indref);
5897  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5898  _line_progressed(1);
5899  goto mapflow_finish;
5900  }
5901  else if(first == '{')
5902  {
5903  _c4dbgp("mapflow[RVAL]: start val mapflow");
5904  addrem_flags(RNXT, RVAL);
5905  _handle_annotations_before_blck_val_scalar();
5906  m_evt_handler->begin_map_val_flow();
5907  _set_indentation(m_evt_handler->m_parent->indref);
5908  addrem_flags(RKEY, RNXT);
5909  _line_progressed(1);
5910  // keep going in this function
5911  }
5912  else if(first == '}')
5913  {
5914  _c4dbgp("mapflow[RVAL]: end!");
5915  _handle_annotations_before_blck_val_scalar();
5916  m_evt_handler->set_val_scalar_plain_empty();
5917  _line_progressed(1);
5918  _end_map_flow();
5919  goto mapflow_finish;
5920  }
5921  else if(first == ',')
5922  {
5923  _c4dbgp("mapflow[RVAL]: empty val!");
5924  _handle_annotations_before_blck_val_scalar();
5925  m_evt_handler->set_val_scalar_plain_empty();
5926  addrem_flags(RNXT, RVAL);
5927  // keep going in this function
5928  }
5929  else if(first == '*')
5930  {
5931  csubstr ref = _scan_ref_map();
5932  _c4dbgpf("mapflow[RVAL]: key ref! {}", _prs(ref));
5933  _handle_valref(ref);
5934  addrem_flags(RNXT, RVAL);
5935  }
5936  else if(first == '&')
5937  {
5938  csubstr anchor = _scan_anchor();
5939  _c4dbgpf("mapflow[RVAL]: key anchor! {}", _prs(anchor));
5940  _add_annotation(&m_pending_anchors, anchor);
5941  }
5942  else if(first == '!')
5943  {
5944  csubstr tag = _scan_tag();
5945  _c4dbgpf("mapflow[RVAL]: tag! {}", _prs(tag));
5946  _add_annotation(&m_pending_tags, tag);
5947  }
5948  else
5949  {
5950  _c4err("parse error");
5951  }
5952  }
5953  else if(has_any(RNXT))
5954  {
5955  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5956  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5957  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5958  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5959  _c4dbgpf("mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5960  if(m_evt_handler->m_curr->line_contents.rem.begins_with(','))
5961  {
5962  _c4dbgp("mapflow[RNXT]: expect next keyval");
5963  m_evt_handler->add_sibling();
5964  addrem_flags(RKEY, RNXT);
5965  _line_progressed(1);
5966  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5967  {
5968  _c4err("parse error: invalid comment after comma");
5969  }
5970  }
5971  else if(m_evt_handler->m_curr->line_contents.rem.begins_with('}'))
5972  {
5973  _c4dbgp("mapflow[RNXT]: end!");
5974  _line_progressed(1);
5975  _end_map_flow();
5976  goto mapflow_finish;
5977  }
5978  else
5979  {
5980  _c4err("parse error");
5981  }
5982  }
5983  else if(has_any(QMRK))
5984  {
5985  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5986  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5987  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5988  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5989  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5990  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5991  ScannedScalar sc;
5992  if(first == '\'')
5993  {
5994  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5995  sc = _scan_scalar_squot();
5996  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5997  _handle_annotations_before_blck_key_scalar();
5998  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5999  addrem_flags(RKCL, QMRK);
6000  }
6001  else if(first == '"')
6002  {
6003  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
6004  sc = _scan_scalar_dquot();
6005  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6006  _handle_annotations_before_blck_key_scalar();
6007  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6008  addrem_flags(RKCL, QMRK);
6009  }
6010  // block scalars (ie | and >) cannot appear in flow containers
6011  else if(_scan_scalar_plain_map_flow(&sc))
6012  {
6013  _c4dbgp("mapflow[QMRK]: plain scalar");
6014  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6015  _handle_annotations_before_blck_key_scalar();
6016  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6017  addrem_flags(RKCL, QMRK);
6018  }
6019  else if(first == ':')
6020  {
6021  _c4dbgp("mapflow[QMRK]: setting empty key");
6022  _handle_annotations_before_blck_key_scalar();
6023  m_evt_handler->set_key_scalar_plain_empty();
6024  addrem_flags(RVAL, QMRK);
6025  _line_progressed(1);
6026  _maybe_skip_whitespace_tokens();
6027  }
6028  else if(first == '}') // this happens on a trailing comma like ", }"
6029  {
6030  _c4dbgp("mapflow[QMRK]: end!");
6031  _handle_annotations_before_blck_key_scalar();
6032  m_evt_handler->set_key_scalar_plain_empty();
6033  m_evt_handler->set_val_scalar_plain_empty();
6034  _end_map_flow();
6035  _line_progressed(1);
6036  goto mapflow_finish;
6037  }
6038  else if(first == ',')
6039  {
6040  _c4dbgp("mapflow[QMRK]: empty key+val!");
6041  _handle_annotations_before_blck_key_scalar();
6042  m_evt_handler->set_key_scalar_plain_empty();
6043  m_evt_handler->set_val_scalar_plain_empty();
6044  addrem_flags(RNXT, QMRK);
6045  }
6046  else if(first == '&')
6047  {
6048  csubstr anchor = _scan_anchor();
6049  _c4dbgpf("mapflow[QMRK]: key anchor! {}", _prs(anchor));
6050  _add_annotation(&m_pending_anchors, anchor);
6051  }
6052  else if(first == '*')
6053  {
6054  csubstr ref = _scan_ref_map();
6055  _c4dbgpf("mapflow[QMRK]: key ref! {}", _prs(ref));
6056  _handle_keyref(ref);
6057  addrem_flags(RKCL, QMRK);
6058  }
6059  else if(first == '[')
6060  {
6061  // RYML's tree cannot store container keys, but that's
6062  // handled inside the tree sink. Other sink types may be
6063  // able to handle it.
6064  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
6065  addrem_flags(RKCL, QMRK);
6066  _handle_annotations_before_blck_key_scalar();
6067  m_evt_handler->begin_seq_key_flow();
6068  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6069  _set_indentation(m_evt_handler->m_parent->indref);
6070  _line_progressed(1);
6071  goto mapflow_finish;
6072  }
6073  else if(first == '{')
6074  {
6075  // RYML's tree cannot store container keys, but that's
6076  // handled inside the tree sink. Other sink types may be
6077  // able to handle it.
6078  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
6079  addrem_flags(RKCL, QMRK);
6080  _handle_annotations_before_blck_key_scalar();
6081  m_evt_handler->begin_map_key_flow();
6082  _set_indentation(m_evt_handler->m_parent->indref);
6083  addrem_flags(RKEY, RKCL);
6084  _line_progressed(1);
6085  // keep going in this function
6086  }
6087  else if(first == '!')
6088  {
6089  csubstr tag = _scan_tag();
6090  _c4dbgpf("mapflow[QMRK]: tag! {}", _prs(tag));
6091  _add_annotation(&m_pending_tags, tag);
6092  }
6093  else
6094  {
6095  _c4err("parse error"); // LCOV_EXCL_LINE
6096  }
6097  }
6098 
6099  mapflow_again:
6100  _c4dbgt("mapflow: go again", 0);
6101  if(_finished_line())
6102  {
6103  if(C4_LIKELY(!_finished_file()))
6104  {
6105  _line_ended();
6106  _scan_line();
6107  _c4dbgnextline();
6108  }
6109  else
6110  {
6111  _c4err("missing terminating }");
6112  }
6113  }
6114  goto mapflow_start;
6115 
6116  mapflow_finish:
6117  _c4dbgp("mapflow: finish");
6118 }
6119 
6120 
6121 //-----------------------------------------------------------------------------
6122 
6123 template<class EventHandler>
6124 void ParseEngine<EventHandler>::_handle_seq_block()
6125 {
6126 seqblck_start:
6127  _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6128 
6129  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
6130  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6131  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
6132  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
6133 
6134  _maybe_skip_comment_strict();
6135  if(!m_evt_handler->m_curr->line_contents.rem.len)
6136  goto seqblck_again;
6137 
6138  if(has_any(RVAL))
6139  {
6140  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6141  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6142  if(m_evt_handler->m_curr->at_line_beginning())
6143  {
6144  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6145  if(m_evt_handler->m_curr->indentation_ge_extra())
6146  {
6147  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6148  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6149  if(!m_evt_handler->m_curr->line_contents.rem.len)
6150  goto seqblck_again;
6151  }
6152  else if(m_evt_handler->m_curr->indentation_lt_extra())
6153  {
6154  _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6155  if(m_evt_handler->m_curr->indentation_eq())
6156  {
6157  _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6158  _handle_annotations_before_blck_val_scalar();
6159  m_evt_handler->set_val_scalar_plain_empty();
6160  addrem_flags(RNXT, RVAL);
6161  goto seqblck_again;
6162  }
6163  else
6164  {
6165  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6166  _c4dbgp("seqblck[RVAL]: smaller indentation!");
6167  _handle_indentation_pop_from_block_seq();
6168  goto seqblck_finish;
6169  }
6170  }
6171  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6172  {
6173  _c4dbgp("seqblck[RVAL]: empty line!");
6174  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6175  goto seqblck_again;
6176  }
6177  }
6178  _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6179  const size_t startmark = _handle_block_skip_leading_whitespace();
6180  _c4dbgpf("seqblck[RVAL]: startmark={}", startmark);
6181  if(startmark == npos)
6182  {
6183  _c4dbgp("seqblck[RVAL]: whitespace only");
6184  goto seqblck_again;
6185  }
6186  const size_t tabmark = _handle_block_get_whitespace_mark();
6187  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6188  _c4dbgpf("seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6189  const size_t startline = m_evt_handler->m_curr->pos.line;
6190  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6191  ScannedScalar sc;
6192  if(first == '\'')
6193  {
6194  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
6195  sc = _scan_scalar_squot();
6196  if(!_maybe_scan_following_colon())
6197  {
6198  _c4dbgp("seqblck[RVAL]: set as val");
6199  _handle_annotations_before_blck_val_scalar();
6200  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6201  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6202  addrem_flags(RNXT, RVAL);
6203  }
6204  else
6205  {
6206  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6207  _handle_block_check_leading_tabs(startmark);
6208  addrem_flags(RNXT, RVAL);
6209  _handle_annotations_before_start_mapblck(startline);
6210  _handle_colon();
6211  m_evt_handler->begin_map_val_block();
6212  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6213  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6214  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6215  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6216  _maybe_skip_whitespace_tokens();
6217  goto seqblck_finish;
6218  }
6219  }
6220  else if(first == '"')
6221  {
6222  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
6223  sc = _scan_scalar_dquot();
6224  if(!_maybe_scan_following_colon())
6225  {
6226  _c4dbgp("seqblck[RVAL]: set as val");
6227  _handle_annotations_before_blck_val_scalar();
6228  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6229  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6230  addrem_flags(RNXT, RVAL);
6231  }
6232  else
6233  {
6234  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6235  addrem_flags(RNXT, RVAL);
6236  _handle_block_check_leading_tabs(startmark);
6237  _handle_annotations_before_start_mapblck(startline);
6238  _handle_colon();
6239  m_evt_handler->begin_map_val_block();
6240  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6241  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6242  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6243  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6244  _maybe_skip_whitespace_tokens();
6245  goto seqblck_finish;
6246  }
6247  }
6248  // block scalars can only appear as keys when in QMRK scope
6249  // (ie, after ? tokens), so no need to scan following colon in
6250  // here.
6251  else if(first == '|')
6252  {
6253  _c4dbgp("seqblck[RVAL]: block-literal scalar");
6254  ScannedBlock sb;
6255  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6256  _handle_annotations_before_blck_val_scalar();
6257  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6258  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6259  addrem_flags(RNXT, RVAL);
6260  }
6261  else if(first == '>')
6262  {
6263  _c4dbgp("seqblck[RVAL]: block-folded scalar");
6264  ScannedBlock sb;
6265  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6266  _handle_annotations_before_blck_val_scalar();
6267  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6268  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6269  addrem_flags(RNXT, RVAL);
6270  }
6271  else if(_scan_scalar_plain_seq_blck(&sc))
6272  {
6273  _c4dbgp("seqblck[RVAL]: plain scalar.");
6274  if(!_maybe_scan_following_colon())
6275  {
6276  _c4dbgp("seqblck[RVAL]: set as val");
6277  _handle_annotations_before_blck_val_scalar();
6278  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6279  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6280  addrem_flags(RNXT, RVAL);
6281  }
6282  else
6283  {
6284  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6285  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6286  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6287  _handle_block_check_leading_tabs(startmark, tabmark);
6288  addrem_flags(RNXT, RVAL);
6289  _handle_annotations_before_start_mapblck(startline);
6290  _handle_colon();
6291  m_evt_handler->begin_map_val_block();
6292  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6293  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6294  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6295  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6296  _maybe_skip_whitespace_tokens();
6297  goto seqblck_finish;
6298  }
6299  }
6300  else if(first == '[')
6301  {
6302  _c4dbgp("seqblck[RVAL]: start child seqflow");
6303  addrem_flags(RNXT, RVAL);
6304  _handle_annotations_before_blck_val_scalar();
6305  m_evt_handler->begin_seq_val_flow();
6306  addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
6307  _line_progressed(1);
6308  _set_indentation(m_evt_handler->m_parent->indref + 1u);
6309  goto seqblck_finish;
6310  }
6311  else if(first == '{')
6312  {
6313  _c4dbgp("seqblck[RVAL]: start child mapflow");
6314  addrem_flags(RNXT, RVAL);
6315  _handle_annotations_before_blck_val_scalar();
6316  m_evt_handler->begin_map_val_flow();
6317  addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
6318  _line_progressed(1);
6319  _set_indentation(m_evt_handler->m_parent->indref + 1u);
6320  goto seqblck_finish;
6321  }
6322  else if(first == '-')
6323  {
6324  _c4dbgp("seqblck[RVAL]: dash");
6325  _handle_block_check_leading_tabs(startmark);
6326  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6327  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6328  _c4dbgp("seqblck[RVAL]: start child seqblck");
6329  _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6330  addrem_flags(RNXT, RVAL);
6331  _handle_annotations_before_blck_val_scalar();
6332  m_evt_handler->begin_seq_val_block();
6333  addrem_flags(RVAL, RNXT);
6334  _set_indentation(startindent);
6335  // keep going on inside this function
6336  _line_progressed(1);
6337  }
6338  else if(first == ':')
6339  {
6340  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
6341  addrem_flags(RNXT, RVAL);
6342  _handle_annotations_before_start_mapblck(startline);
6343  _handle_colon();
6344  m_evt_handler->begin_map_val_block();
6345  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6346  m_evt_handler->set_key_scalar_plain_empty();
6347  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6348  _line_progressed(1);
6349  _maybe_skip_whitespace_tokens();
6350  goto seqblck_finish;
6351  }
6352  else if(first == '&')
6353  {
6354  const csubstr anchor = _scan_anchor();
6355  _c4dbgpf("seqblck[RVAL]: anchor! {}", _prs(anchor));
6356  // we need to buffer the anchors, as there may be two
6357  // consecutive anchors in here
6358  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6359  }
6360  else if(first == '*')
6361  {
6362  csubstr ref = _scan_ref_seq();
6363  _c4dbgpf("seqblck[RVAL]: ref! {}", _prs(ref));
6364  if(!_maybe_scan_following_colon())
6365  {
6366  _c4dbgp("seqblck[RVAL]: set ref as val!");
6367  _handle_valref(ref);
6368  addrem_flags(RNXT, RVAL);
6369  }
6370  else
6371  {
6372  _c4dbgp("seqblck[RVAL]: ref is key of map");
6373  addrem_flags(RNXT, RVAL);
6374  _handle_annotations_before_start_mapblck(startline);
6375  m_evt_handler->begin_map_val_block();
6376  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6377  _handle_keyref(ref);
6378  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6379  _set_indentation(startindent);
6380  _maybe_skip_whitespace_tokens();
6381  goto seqblck_finish;
6382  }
6383  }
6384  else if(first == '!')
6385  {
6386  csubstr tag = _scan_tag();
6387  _c4dbgpf("seqblck[RVAL]: val tag! {}", _prs(tag));
6388  // we need to buffer the tags, as there may be two
6389  // consecutive tags in here
6390  _add_annotation(&m_pending_tags, tag, startindent, startline);
6391  }
6392  else if(first == '?')
6393  {
6394  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
6395  addrem_flags(RNXT, RVAL);
6396  m_evt_handler->begin_map_val_block();
6397  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
6398  _set_indentation(startindent);
6399  _line_progressed(1);
6400  _maybe_skipchars(' ');
6401  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6402  {
6403  _c4dbgp("seqblck[RVAL]: seqblck starts after ?");
6404  addrem_flags(RKCL, QMRK);
6405  m_evt_handler->begin_seq_key_block();
6406  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6407  _save_indentation();
6408  _line_progressed(1);
6409  _maybe_skipchars(' ');
6410  }
6411  goto seqblck_finish;
6412  }
6413  else
6414  {
6415  _c4err("parse error");
6416  }
6417  }
6418  else // RNXT
6419  {
6420  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6421  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6422  //
6423  // handle indentation
6424  //
6425  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6426  if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6427  {
6428  _c4dbgp("seqblck[RNXT]: at line begin");
6429  if(m_evt_handler->m_curr->indentation_ge())
6430  {
6431  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6432  _line_progressed(m_evt_handler->m_curr->indref);
6433  if(!m_evt_handler->m_curr->line_contents.rem.len)
6434  goto seqblck_again;
6435  }
6436  else if(m_evt_handler->m_curr->indentation_lt())
6437  {
6438  _c4dbgp("seqblck[RNXT]: smaller indentation!");
6439  _handle_indentation_pop_from_block_seq();
6440  if(has_all(RSEQ|RBLCK))
6441  {
6442  _c4dbgp("seqblck[RNXT]: still seqblck!");
6443  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6444  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6445  if(!m_evt_handler->m_curr->line_contents.rem.len)
6446  goto seqblck_again; // LCOV_EXCL_LINE
6447  }
6448  else
6449  {
6450  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
6451  goto seqblck_finish;
6452  }
6453  }
6454  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6455  {
6456  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6457  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6458  if(!m_evt_handler->m_curr->line_contents.rem.len)
6459  goto seqblck_again; // LCOV_EXCL_LINE
6460  }
6461  }
6462  else
6463  {
6464  _c4dbgp("seqblck[RNXT]: NOT at line begin");
6465  if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
6466  {
6467  _c4err("parse error");
6468  }
6469  else
6470  {
6471  _skipchars(" \t");
6472  if(!m_evt_handler->m_curr->line_contents.rem.len)
6473  {
6474  _c4dbgp("seqblck[RNXT]: again");
6475  goto seqblck_again; // LCOV_EXCL_LINE
6476  }
6477  }
6478  }
6479  //
6480  // now handle the tokens
6481  //
6482  _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6483  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6484  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6485  if(first == '-')
6486  {
6487  if(m_evt_handler->m_curr->indref > 0
6488  || m_evt_handler->m_curr->line_contents.indentation > 0
6489  || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6490  {
6491  if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6492  {
6493  _c4dbgp("seqblck[RNXT]: expect next val");
6494  addrem_flags(RVAL, RNXT);
6495  m_evt_handler->add_sibling();
6496  _line_progressed(1);
6497  }
6498  else
6499  {
6500  _c4err("parse error");
6501  }
6502  }
6503  else
6504  {
6505  _c4dbgp("seqblck[RNXT]: start doc");
6506  _start_doc_suddenly();
6507  _line_progressed(3);
6508  _maybe_skip_whitespace_tokens();
6509  goto seqblck_finish;
6510  }
6511  }
6512  else if(first == ':')
6513  {
6514  // This happens for example in `- [a: b]: c` (after
6515  // terminating the seq, ie, after `]`). All other cases
6516  // (ie colon after scalars) are caught elsewhere (ie, in
6517  // RVAL state).
6518  if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags & RMAP)))
6519  {
6520  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6521  m_evt_handler->end_seq_block();
6522  goto seqblck_finish;
6523  }
6524  else
6525  {
6526  _c4err("parse error");
6527  }
6528  }
6529  else if(first == '.')
6530  {
6531  _c4dbgp("seqblck[RNXT]: maybe doc?");
6532  if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6533  {
6534  _c4dbgp("seqblck[RNXT]: end doc");
6535  _end_doc_suddenly();
6536  _line_progressed(3);
6537  _maybe_skip_whitespace_tokens();
6538  _check_doc_end_tokens();
6539  goto seqblck_finish;
6540  }
6541  else
6542  {
6543  _c4err("parse error");
6544  }
6545  }
6546  else
6547  {
6548  // may be an indentless sequence nested in a map...
6549  #ifdef RYML_DBG
6550  _print_state_stack();
6551  #endif
6552  if(m_evt_handler->m_parent
6553  && has_all(RMAP|RBLCK, m_evt_handler->m_parent)
6554  && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6555  {
6556  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6557  _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6558  _handle_indentation_pop(m_evt_handler->m_parent);
6559  _RYML_ASSERT_PARSE_(this->callbacks(), has_all(RMAP|RBLCK), m_evt_handler->m_curr->pos);
6560  m_evt_handler->add_sibling();
6561  addrem_flags(RKEY, RNXT);
6562  goto seqblck_finish;
6563  }
6564  else if(first == '\t')
6565  {
6566  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of('\t');
6567  if(pos == npos)
6568  {
6569  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6570  goto seqblck_again;
6571  }
6572  }
6573  _c4err("parse error");
6574  }
6575  }
6576 
6577  seqblck_again:
6578  _c4dbgt("seqblck: go again", 0);
6579  if(_finished_line())
6580  {
6581  m_bom_len = 0;
6582  _line_ended();
6583  _scan_line();
6584  if(_finished_file())
6585  {
6586  _c4dbgp("seqblck: finish!");
6587  _end_seq_blck();
6588  goto seqblck_finish;
6589  }
6590  _c4dbgnextline();
6591  }
6592  goto seqblck_start;
6593 
6594  seqblck_finish:
6595  _c4dbgp("seqblck: finish");
6596 }
6597 
6598 
6599 //-----------------------------------------------------------------------------
6600 
6601 template<class EventHandler>
6602 void ParseEngine<EventHandler>::_handle_map_block()
6603 {
6604 mapblck_start:
6605  _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6606 
6607  // states: RKEY -> RVAL -> RNXT
6608  // states: QMRK -> RKCL -> RVAL -> RNXT
6609  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
6610  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6611  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
6612  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
6613 
6614  _maybe_skip_comment();
6615  if(!m_evt_handler->m_curr->line_contents.rem.len)
6616  goto mapblck_again;
6617 
6618  if(has_any(RKEY))
6619  {
6620  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6621  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6622  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6623  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6624  //
6625  // handle indentation
6626  //
6627  if(m_evt_handler->m_curr->at_line_beginning())
6628  {
6629  if(m_evt_handler->m_curr->indentation_eq())
6630  {
6631  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6632  _line_progressed(m_evt_handler->m_curr->indref);
6633  if(!m_evt_handler->m_curr->line_contents.rem.len)
6634  goto mapblck_again;
6635  }
6636  else if(m_evt_handler->m_curr->indentation_lt())
6637  {
6638  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6639  _handle_indentation_pop_from_block_map();
6640  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6641  if(has_all(RMAP|RBLCK))
6642  {
6643  _c4dbgp("mapblck[RKEY]: still mapblck!");
6644  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY), m_evt_handler->m_curr->pos);
6645  if(!m_evt_handler->m_curr->line_contents.rem.len)
6646  goto mapblck_again;
6647  }
6648  else
6649  {
6650  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6651  goto mapblck_finish;
6652  }
6653  }
6654  else
6655  {
6656  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6657  _c4err("invalid indentation");
6658  }
6659  }
6660  //
6661  // now handle the tokens
6662  //
6663  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6664  const size_t startline = m_evt_handler->m_curr->pos.line;
6665  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6666  _c4dbgpf("mapblck[RKEY]: '{}'", _c4prc(first));
6667  ScannedScalar sc;
6668  if(first == '\'')
6669  {
6670  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6671  sc = _scan_scalar_squot();
6672  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6673  _handle_annotations_before_blck_key_scalar();
6674  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6675  addrem_flags(RVAL, RKEY);
6676  if(!_maybe_scan_following_colon())
6677  _c4err("could not find ':' colon after key");
6678  _handle_colon();
6679  _maybe_skip_whitespace_tokens();
6680  }
6681  else if(first == '"')
6682  {
6683  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6684  sc = _scan_scalar_dquot();
6685  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6686  _handle_annotations_before_blck_key_scalar();
6687  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6688  addrem_flags(RVAL, RKEY);
6689  if(!_maybe_scan_following_colon())
6690  _c4err("could not find ':' colon after key");
6691  _handle_colon();
6692  _maybe_skip_whitespace_tokens();
6693  }
6694  // block scalars (| and >) can not be used as keys unless they
6695  // appear in an explicit QMRK scope (ie, after the ? token),
6696  else if(C4_UNLIKELY(first == '|'))
6697  {
6698  _c4err("block map: literal keys must be enclosed in '?'");
6699  }
6700  else if(C4_UNLIKELY(first == '>'))
6701  {
6702  _c4err("block map: folded keys must be enclosed in '?'");
6703  }
6704  else if(_scan_scalar_plain_map_blck(&sc))
6705  {
6706  _c4dbgp("mapblck[RKEY]: plain scalar");
6707  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6708  _handle_annotations_before_blck_key_scalar();
6709  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6710  addrem_flags(RVAL, RKEY);
6711  if(!_maybe_scan_following_colon())
6712  _c4err("could not find ':' colon after key");
6713  _handle_colon();
6714  _maybe_skip_whitespace_tokens();
6715  }
6716  else if(first == '?')
6717  {
6718  _c4dbgp("mapblck[RKEY]: key token!");
6719  addrem_flags(QMRK, RKEY);
6720  _line_progressed(1);
6721  _maybe_skipchars(' ');
6722  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6723  {
6724  _c4dbgp("mapblck[RKEY]: seqblck starts after ?");
6725  addrem_flags(RKCL, QMRK);
6726  m_evt_handler->begin_seq_key_block();
6727  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6728  _save_indentation();
6729  _line_progressed(1);
6730  _maybe_skipchars(' ');
6731  goto mapblck_finish;
6732  }
6733  goto mapblck_again;
6734  }
6735  else if(first == ':')
6736  {
6737  _c4dbgp("mapblck[RKEY]: setting empty key");
6738  _handle_annotations_before_blck_key_scalar();
6739  m_evt_handler->set_key_scalar_plain_empty();
6740  addrem_flags(RVAL, RKEY);
6741  _line_progressed(1);
6742  _handle_colon();
6743  _maybe_skip_whitespace_tokens();
6744  }
6745  else if(first == '*')
6746  {
6747  csubstr ref = _scan_ref_map();
6748  _c4dbgpf("mapblck[RKEY]: key ref! {}", _prs(ref));
6749  _handle_keyref(ref);
6750  addrem_flags(RVAL, RKEY);
6751  if(!_maybe_scan_following_colon())
6752  _c4err("could not find ':' colon after key");
6753  _handle_colon();
6754  _maybe_skip_whitespace_tokens();
6755  }
6756  else if(first == '&')
6757  {
6758  csubstr anchor = _scan_anchor();
6759  _c4dbgpf("mapblck[RKEY]: key anchor! {}", _prs(anchor));
6760  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6761  }
6762  else if(first == '!')
6763  {
6764  csubstr tag = _scan_tag();
6765  _c4dbgpf("mapblck[RKEY]: key tag! {}", _prs(tag));
6766  _add_annotation(&m_pending_tags, tag, startindent, startline);
6767  }
6768  else if(first == '[')
6769  {
6770  // RYML's tree cannot store container keys, but that's
6771  // handled inside the tree handler. Other handlers may be
6772  // able to handle it.
6773  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6774  _handle_annotations_before_blck_key_scalar();
6775  m_evt_handler->begin_seq_key_flow();
6776  addrem_flags(RSEQ|RFLOW|RVAL, RKEY|RMAP|RBLCK);
6777  _line_progressed(1);
6778  _set_indentation(startindent);
6779  goto mapblck_finish;
6780  }
6781  else if(first == '{')
6782  {
6783  // RYML's tree cannot store container keys, but that's
6784  // handled inside the tree handler. Other handlers may be
6785  // able to handle it.
6786  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6787  _handle_annotations_before_blck_key_scalar();
6788  m_evt_handler->begin_map_key_flow();
6789  addrem_flags(RFLOW|RKEY, RBLCK);
6790  _line_progressed(1);
6791  _set_indentation(startindent);
6792  goto mapblck_finish;
6793  }
6794  else if(first == '-')
6795  {
6796  _c4dbgp("mapblck[RKEY]: maybe doc?");
6797  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6798  {
6799  _c4dbgp("mapblck[RKEY]: end+start doc");
6800  _start_doc_suddenly();
6801  _line_progressed(3);
6802  _maybe_skip_whitespace_tokens();
6803  goto mapblck_finish;
6804  }
6805  else
6806  {
6807  _c4err("parse error");
6808  }
6809  }
6810  else if(first == '.')
6811  {
6812  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6813  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6814  {
6815  _c4dbgp("mapblck[RKEY]: end doc");
6816  _end_doc_suddenly();
6817  _line_progressed(3);
6818  _maybe_skip_whitespace_tokens();
6819  _check_doc_end_tokens();
6820  goto mapblck_finish;
6821  }
6822  else
6823  {
6824  _c4err("parse error"); // LCOV_EXCL_LINE
6825  }
6826  }
6827  else
6828  {
6829  _c4err("parse error");
6830  }
6831  }
6832  else if(has_any(RVAL))
6833  {
6834  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6835  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6836  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6837  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6838  //
6839  // handle indentation
6840  //
6841  if(m_evt_handler->m_curr->at_line_beginning())
6842  {
6843  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6844  m_evt_handler->m_curr->more_indented = false;
6845  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6846  if(m_evt_handler->m_curr->indentation_eq_extra())
6847  {
6848  _c4dbgp("mapblck[RVAL]: skip indentation!");
6849  _line_progressed(m_evt_handler->m_curr->indref + 1);
6850  if(!m_evt_handler->m_curr->line_contents.rem.len)
6851  goto mapblck_again;
6852  }
6853  else if(m_evt_handler->m_curr->indentation_gt_extra())
6854  {
6855  _c4dbgp("mapblck[RVAL]: more indented!");
6856  m_evt_handler->m_curr->more_indented = true;
6857  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6858  if(!m_evt_handler->m_curr->line_contents.rem.len)
6859  goto mapblck_again; // LCOV_EXCL_LINE
6860  }
6861  else if(m_evt_handler->m_curr->indentation_lt_extra())
6862  {
6863  if(m_evt_handler->m_curr->indentation_eq())
6864  {
6865  _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6866  // watchout for indentless seqs
6867  if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6868  {
6869  _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6870  _handle_annotations_before_blck_val_scalar();
6871  m_evt_handler->set_val_scalar_plain_empty();
6872  addrem_flags(RNXT, RVAL);
6873  goto mapblck_again;
6874  }
6875  }
6876  else
6877  {
6878  _c4dbgp("mapblck[RVAL]: smaller indentation than RKEY!");
6879  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6880  _handle_indentation_pop_from_block_map();
6881  if(has_all(RMAP|RBLCK))
6882  {
6883  _c4dbgp("mapblck[RVAL]: still mapblck!");
6884  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6885  if(has_any(RNXT))
6886  {
6887  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6888  m_evt_handler->add_sibling();
6889  addrem_flags(RKEY, RNXT);
6890  }
6891  goto mapblck_again;
6892  }
6893  else
6894  {
6895  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6896  goto mapblck_finish;
6897  }
6898  }
6899  }
6900  }
6901  const size_t startcol = _handle_block_skip_leading_whitespace();
6902  if(startcol == npos)
6903  {
6904  _c4dbgp("mapblck[RVAL]: whitespace only");
6905  goto mapblck_again; // LCOV_EXCL_LINE
6906  }
6907  const size_t tabmark = _handle_block_get_whitespace_mark();
6908  //
6909  // now handle the tokens
6910  //
6911  _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6912  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6913  const size_t startline = m_evt_handler->m_curr->pos.line;
6914  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6915  _c4dbgpf("mapblck[RVAL]: '{}'", _c4prc(first));
6916  ScannedScalar sc;
6917  if(first == '\'')
6918  {
6919  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6920  sc = _scan_scalar_squot();
6921  if(!_maybe_scan_following_colon())
6922  {
6923  _c4dbgp("mapblck[RVAL]: set as val");
6924  _handle_annotations_before_blck_val_scalar();
6925  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6926  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6927  addrem_flags(RNXT, RVAL);
6928  }
6929  else
6930  {
6931  _c4assert(m_evt_handler->m_curr->indref != npos);
6932  _c4assert(startindent > m_evt_handler->m_curr->indref);
6933  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6934  _handle_block_check_leading_tabs(startcol);
6935  _handle_annotations_before_start_mapblck(startline);
6936  addrem_flags(RNXT, RVAL);
6937  _handle_colon();
6938  m_evt_handler->begin_map_val_block();
6939  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6940  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6941  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6942  _maybe_skip_whitespace_tokens();
6943  // keep the child state on RVAL
6944  addrem_flags(RVAL, RNXT);
6945  }
6946  }
6947  else if(first == '"')
6948  {
6949  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6950  sc = _scan_scalar_dquot();
6951  if(!_maybe_scan_following_colon())
6952  {
6953  _c4dbgp("mapblck[RVAL]: set as val");
6954  _handle_annotations_before_blck_val_scalar();
6955  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6956  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6957  addrem_flags(RNXT, RVAL);
6958  }
6959  else
6960  {
6961  _c4assert(m_evt_handler->m_curr->indref != npos);
6962  _c4assert(startindent > m_evt_handler->m_curr->indref);
6963  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6964  _handle_block_check_leading_tabs(startcol);
6965  _handle_annotations_before_start_mapblck(startline);
6966  addrem_flags(RNXT, RVAL);
6967  _handle_colon();
6968  m_evt_handler->begin_map_val_block();
6969  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6970  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6971  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6972  _maybe_skip_whitespace_tokens();
6973  // keep the child state on RVAL
6974  addrem_flags(RVAL, RNXT);
6975  }
6976  }
6977  // block scalars can only appear as keys when in QMRK scope
6978  // (ie, after ? tokens), so no need to scan following colon
6979  else if(first == '|')
6980  {
6981  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6982  ScannedBlock sb;
6983  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6984  _handle_annotations_before_blck_val_scalar();
6985  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6986  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6987  addrem_flags(RNXT, RVAL);
6988  }
6989  else if(first == '>')
6990  {
6991  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6992  ScannedBlock sb;
6993  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6994  _handle_annotations_before_blck_val_scalar();
6995  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6996  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6997  addrem_flags(RNXT, RVAL);
6998  }
6999  else if(_scan_scalar_plain_map_blck(&sc))
7000  {
7001  _c4dbgp("mapblck[RVAL]: plain scalar.");
7002  if(!_maybe_scan_following_colon())
7003  {
7004  _c4dbgp("mapblck[RVAL]: set as val");
7005  _handle_annotations_before_blck_val_scalar();
7006  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
7007  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7008  addrem_flags(RNXT, RVAL);
7009  }
7010  else
7011  {
7012  _c4assert(m_evt_handler->m_curr->indref != npos);
7013  _c4assert(startindent > m_evt_handler->m_curr->indref);
7014  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7015  _handle_block_check_leading_tabs(startcol, tabmark);
7016  addrem_flags(RNXT, RVAL);
7017  _handle_annotations_before_start_mapblck(startline);
7018  _handle_colon();
7019  m_evt_handler->begin_map_val_block();
7020  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7021  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7022  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7023  _maybe_skip_whitespace_tokens();
7024  // keep the child state on RVAL
7025  addrem_flags(RVAL, RNXT);
7026  }
7027  }
7028  else if(first == '-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7029  {
7030  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7031  _c4err("parse error");
7032  _c4dbgp("mapblck[RVAL]: start val seqblck");
7033  _handle_block_check_leading_tabs(startcol);
7034  addrem_flags(RNXT, RVAL);
7035  _handle_annotations_before_blck_val_scalar();
7036  m_evt_handler->begin_seq_val_block();
7037  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7038  _set_indentation(startindent);
7039  _line_progressed(1);
7040  _maybe_skip_whitespace_tokens();
7041  goto mapblck_finish;
7042  }
7043  else if(first == '[')
7044  {
7045  _c4dbgp("mapblck[RVAL]: start val seqflow");
7046  addrem_flags(RNXT, RVAL);
7047  _handle_annotations_before_blck_val_scalar();
7048  m_evt_handler->begin_seq_val_flow();
7049  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
7050  _set_indentation(m_evt_handler->m_parent->indref + 1u);
7051  _line_progressed(1);
7052  goto mapblck_finish;
7053  }
7054  else if(first == '{')
7055  {
7056  _c4dbgp("mapblck[RVAL]: start val mapflow");
7057  addrem_flags(RNXT, RVAL);
7058  _handle_annotations_before_blck_val_scalar();
7059  m_evt_handler->begin_map_val_flow();
7060  addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
7061  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7062  _set_indentation(m_evt_handler->m_parent->indref + 1u);
7063  _line_progressed(1);
7064  goto mapblck_finish;
7065  }
7066  else if(first == '*')
7067  {
7068  csubstr ref = _scan_ref_map();
7069  _c4dbgpf("mapblck[RVAL]: ref! {}", _prs(ref));
7070  if(_maybe_scan_following_colon())
7071  {
7072  _c4dbgp("mapblck[RVAL]: start child map, block");
7073  addrem_flags(RNXT, RVAL);
7074  _handle_annotations_before_blck_val_scalar();
7075  m_evt_handler->begin_map_val_block();
7076  _handle_keyref(ref);
7077  _set_indentation(startindent);
7078  // keep going in RVAL
7079  addrem_flags(RVAL, RNXT);
7080  }
7081  else
7082  {
7083  _c4dbgp("mapblck[RVAL]: was val ref");
7084  _handle_valref(ref);
7085  addrem_flags(RNXT, RVAL);
7086  }
7087  _maybe_skip_whitespace_tokens();
7088  }
7089  else if(first == '&')
7090  {
7091  csubstr anchor = _scan_anchor();
7092  _c4dbgpf("mapblck[RVAL]: anchor! {}", _prs(anchor));
7093  // we need to buffer the anchors, as there may be two
7094  // consecutive anchors in here
7095  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7096  }
7097  else if(first == '!')
7098  {
7099  csubstr tag = _scan_tag();
7100  _c4dbgpf("mapblck[RVAL]: tag! {}", _prs(tag));
7101  // we need to buffer the tags, as there may be two
7102  // consecutive tags in here
7103  _add_annotation(&m_pending_tags, tag, startindent, startline);
7104  }
7105  else if(first == '?')
7106  {
7107  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7108  _c4err("parse error");
7109  _c4dbgp("mapblck[RVAL]: start val mapblck");
7110  addrem_flags(RNXT, RVAL);
7111  _handle_annotations_before_blck_val_scalar();
7112  m_evt_handler->begin_map_val_block();
7113  addrem_flags(QMRK, RNXT);
7114  _set_indentation(startindent);
7115  _line_progressed(1);
7116  _maybe_skipchars(' ');
7117  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7118  {
7119  _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7120  addrem_flags(RKCL, QMRK);
7121  m_evt_handler->begin_seq_key_block();
7122  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7123  _save_indentation();
7124  _line_progressed(1);
7125  _maybe_skipchars(' ');
7126  goto mapblck_finish;
7127  }
7128  goto mapblck_again;
7129  }
7130  else if(first == ':')
7131  {
7132  _c4dbgp("mapblck[RVAL]: start val mapblck");
7133  addrem_flags(RNXT, RVAL);
7134  _handle_annotations_before_start_mapblck(startline);
7135  _handle_colon();
7136  m_evt_handler->begin_map_val_block();
7137  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7138  m_evt_handler->set_key_scalar_plain_empty();
7139  // keep the child state on RVAL
7140  addrem_flags(RVAL, RNXT);
7141  _line_progressed(1);
7142  _maybe_skip_whitespace_tokens();
7143  goto mapblck_again;
7144  }
7145  else
7146  {
7147  _c4err("parse error"); // LCOV_EXCL_LINE
7148  }
7149  }
7150  else if(has_any(RNXT))
7151  {
7152  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7153  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7154  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7155  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7156  //
7157  // handle indentation
7158  //
7159  if(m_evt_handler->m_curr->at_line_beginning())
7160  {
7161  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7162  if(m_evt_handler->m_curr->indentation_eq())
7163  {
7164  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7165  _line_progressed(m_evt_handler->m_curr->indref);
7166  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7167  m_evt_handler->add_sibling();
7168  addrem_flags(RKEY, RNXT);
7169  goto mapblck_again;
7170  }
7171  else if(m_evt_handler->m_curr->indentation_lt())
7172  {
7173  _c4dbgp("mapblck[RNXT]: smaller indentation!");
7174  _handle_indentation_pop_from_block_map();
7175  if(has_all(RMAP|RBLCK))
7176  {
7177  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7178  if(!has_any(RKCL))
7179  {
7180  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7181  m_evt_handler->add_sibling();
7182  addrem_flags(RKEY, RNXT);
7183  }
7184  goto mapblck_again;
7185  }
7186  else
7187  {
7188  goto mapblck_finish;
7189  }
7190  }
7191  }
7192  else
7193  {
7194  _c4dbgp("mapblck[RNXT]: NOT at line begin");
7195  if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
7196  {
7197  _c4err("parse error");
7198  }
7199  else
7200  {
7201  _skipchars(" \t");
7202  if(!m_evt_handler->m_curr->line_contents.rem.len)
7203  {
7204  _c4dbgp("seqblck[RNXT]: again");
7205  goto mapblck_again; // LCOV_EXCL_LINE
7206  }
7207  }
7208  }
7209  //
7210  // handle tokens
7211  //
7212  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7213  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7214  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
7215  if(first == ' ')
7216  {
7217  _c4dbgp("mapblck[RNXT]: skip spaces");
7218  _maybe_skip_whitespace_tokens();
7219  }
7220  else
7221  {
7222  _c4err("parse error");
7223  }
7224  }
7225  else if(has_any(QMRK))
7226  {
7227  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7228  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7229  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7230  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7231  if(_handle_map_block_qmrk())
7232  goto mapblck_again;
7233  else
7234  goto mapblck_finish;
7235  }
7236  else if(has_any(RKCL)) // read the key colon (after QMRK)
7237  {
7238  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7239  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7240  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7241  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7242  if(_handle_map_block_rkcl())
7243  goto mapblck_again;
7244  else
7245  goto mapblck_finish;
7246  }
7247 
7248  mapblck_again:
7249  _c4dbgt("mapblck: again", 0);
7250  if(_finished_line())
7251  {
7252  _line_ended();
7253  _scan_line();
7254  if(_finished_file())
7255  {
7256  _c4dbgp("mapblck: file finished!");
7257  _end_map_blck();
7258  goto mapblck_finish;
7259  }
7260  _c4dbgnextline();
7261  }
7262  goto mapblck_start;
7263 
7264  mapblck_finish:
7265  _c4dbgp("mapblck: finish");
7266 }
7267 
7268 
7269 //-----------------------------------------------------------------------------
7270 
7271 // return true if we should remain in map_block
7272 template<class EventHandler>
7273 bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7274 {
7275  //
7276  // handle indentation
7277  //
7278  if(m_evt_handler->m_curr->at_line_beginning())
7279  {
7280  _c4dbgpf("mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7281  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos, m_evt_handler->m_curr->pos);
7282  if(m_evt_handler->m_curr->indentation_eq_extra())
7283  {
7284  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7285  _line_progressed(m_evt_handler->m_curr->indref + 1);
7286  if(!m_evt_handler->m_curr->line_contents.rem.len)
7287  return true; // go again
7288  }
7289  // indentation can be larger in QMRK state
7290  else if(m_evt_handler->m_curr->indentation_gt_extra())
7291  {
7292  _c4dbgp("mapblck[QMRK]: larger indentation !");
7293  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7294  if(!m_evt_handler->m_curr->line_contents.rem.len)
7295  return true; // go again
7296  }
7297  else
7298  {
7299  _c4dbgp("mapblck[QMRK]: smaller indentation!");
7300  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7301  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7302  if(m_evt_handler->m_curr->indentation_eq()
7303  // defend against docs or indentless seqs
7304  && m_evt_handler->m_curr->line_contents.rem.str[0] != '-')
7305  {
7306  _c4dbgp("mapblck[QMRK]: QMRK finished!");
7307  _handle_annotations_before_blck_key_scalar();
7308  m_evt_handler->set_key_scalar_plain_empty();
7309  addrem_flags(RKCL, QMRK);
7310  return true; // go again
7311  }
7312  else if(m_evt_handler->m_curr->indentation_lt())
7313  {
7314  _c4dbgp("mapblck[QMRK]: indentation pop!");
7315  _handle_indentation_pop_from_block_map();
7316  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7317  if(has_all(RMAP|RBLCK))
7318  {
7319  _c4dbgp("mapblck[QMRK]: still mapblck!");
7320  return true; // go again
7321  }
7322  else
7323  {
7324  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7325  return false; // finish mapblck
7326  }
7327  }
7328  }
7329  }
7330  //
7331  // now handle the tokens
7332  //
7333  _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7334  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7335  const size_t startline = m_evt_handler->m_curr->pos.line;
7336  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7337  _c4dbgpf("mapblck[QMRK]: '{}'", first);
7338  ScannedScalar sc;
7339  if(first == '\'')
7340  {
7341  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7342  sc = _scan_scalar_squot();
7343  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7344  addrem_flags(RKCL, QMRK);
7345  if(!_maybe_scan_following_colon())
7346  {
7347  _c4dbgp("mapblck[QMRK]: set as key");
7348  _handle_annotations_before_blck_key_scalar();
7349  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7350  }
7351  else
7352  {
7353  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7354  _handle_annotations_before_start_mapblck_as_key();
7355  m_evt_handler->begin_map_key_block();
7356  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7357  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7358  _maybe_skip_whitespace_tokens();
7359  _set_indentation(startindent);
7360  // keep the child state on RVAL
7361  addrem_flags(RVAL, RKCL);
7362  }
7363  }
7364  else if(first == '"')
7365  {
7366  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7367  sc = _scan_scalar_dquot();
7368  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7369  addrem_flags(RKCL, QMRK);
7370  if(!_maybe_scan_following_colon())
7371  {
7372  _c4dbgp("mapblck[QMRK]: set as key");
7373  _handle_annotations_before_blck_key_scalar();
7374  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7375  }
7376  else
7377  {
7378  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7379  _handle_annotations_before_start_mapblck_as_key();
7380  m_evt_handler->begin_map_key_block();
7381  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7382  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7383  _maybe_skip_whitespace_tokens();
7384  _set_indentation(startindent);
7385  // keep the child state on RVAL
7386  addrem_flags(RVAL, RKCL);
7387  }
7388  }
7389  else if(first == '|')
7390  {
7391  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7392  ScannedBlock sb;
7393  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7394  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7395  _handle_annotations_before_blck_key_scalar();
7396  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7397  addrem_flags(RKCL, QMRK);
7398  }
7399  else if(first == '>')
7400  {
7401  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7402  ScannedBlock sb;
7403  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7404  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7405  _handle_annotations_before_blck_key_scalar();
7406  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7407  addrem_flags(RKCL, QMRK);
7408  }
7409  else if(_scan_scalar_plain_map_blck(&sc))
7410  {
7411  _c4dbgp("mapblck[QMRK]: plain scalar");
7412  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7413  addrem_flags(RKCL, QMRK);
7414  if(!_maybe_scan_following_colon())
7415  {
7416  _c4dbgp("mapblck[QMRK]: set as key");
7417  _handle_annotations_before_blck_key_scalar();
7418  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7419  }
7420  else
7421  {
7422  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7423  _handle_annotations_before_start_mapblck_as_key();
7424  m_evt_handler->begin_map_key_block();
7425  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7426  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7427  _maybe_skip_whitespace_tokens();
7428  _set_indentation(startindent);
7429  // keep the child state on RVAL
7430  addrem_flags(RVAL, RKCL);
7431  }
7432  }
7433  else if(first == ':')
7434  {
7435  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7436  addrem_flags(RKCL, QMRK);
7437  _handle_annotations_before_start_mapblck_as_key();
7438  m_evt_handler->begin_map_key_block();
7439  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7440  m_evt_handler->set_key_scalar_plain_empty();
7441  _line_progressed(1);
7442  _maybe_skip_whitespace_tokens();
7443  _set_indentation(startindent);
7444  // keep the child state on RVAL
7445  addrem_flags(RVAL, RKCL);
7446  }
7447  else if(first == '*')
7448  {
7449  csubstr ref = _scan_ref_map();
7450  _c4dbgpf("mapblck[QMRK]: key ref! {}", _prs(ref));
7451  addrem_flags(RKCL, QMRK);
7452  if(!_maybe_scan_following_colon())
7453  {
7454  _c4dbgp("mapblck[QMRK]: set ref as key");
7455  _handle_keyref(ref);
7456  }
7457  else
7458  {
7459  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7460  _handle_annotations_before_start_mapblck_as_key();
7461  m_evt_handler->begin_map_key_block();
7462  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7463  _handle_keyref(ref);
7464  _set_indentation(startindent);
7465  // keep the child state on RVAL
7466  addrem_flags(RVAL, RKCL|QMRK);
7467  }
7468  _maybe_skip_whitespace_tokens();
7469  }
7470  else if(first == '&')
7471  {
7472  csubstr anchor = _scan_anchor();
7473  _c4dbgpf("mapblck[QMRK]: key anchor! {}", _prs(anchor));
7474  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7475  }
7476  else if(first == '!')
7477  {
7478  csubstr tag = _scan_tag();
7479  _c4dbgpf("mapblck[QMRK]: key tag! {}", _prs(tag));
7480  _add_annotation(&m_pending_tags, tag, startindent, startline);
7481  }
7482  else if(first == '-')
7483  {
7484  _c4dbgp("mapblck[QMRK]: maybe seq or doc?");
7485  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7486  {
7487  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7488  addrem_flags(RKCL, QMRK);
7489  _handle_annotations_before_blck_key_scalar();
7490  m_evt_handler->begin_seq_key_block();
7491  addrem_flags(RVAL|RSEQ, RMAP|RKCL);
7492  _set_indentation(startindent);
7493  _line_progressed(1);
7494  }
7495  else
7496  {
7497  _c4dbgp("mapblck[QMRK]: end+start doc");
7498  _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7499  _start_doc_suddenly();
7500  _line_progressed(3);
7501  }
7502  _maybe_skip_whitespace_tokens();
7503  return false; // finish mapblck
7504  }
7505  else if(first == '[')
7506  {
7507  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7508  addrem_flags(RKCL, QMRK);
7509  _handle_annotations_before_blck_key_scalar();
7510  m_evt_handler->begin_seq_key_flow();
7511  addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|RBLCK);
7512  _set_indentation(m_evt_handler->m_parent->indref + 1);
7513  _line_progressed(1);
7514  return false; // finish mapblck
7515  }
7516  else if(first == '{')
7517  {
7518  _c4dbgp("mapblck[QMRK]: start child mapflow (!)");
7519  addrem_flags(RKCL, QMRK);
7520  _handle_annotations_before_blck_key_scalar();
7521  m_evt_handler->begin_map_key_flow();
7522  addrem_flags(RKEY|RFLOW, RVAL|RKCL|RBLCK);
7523  _set_indentation(m_evt_handler->m_parent->indref + 1);
7524  _line_progressed(1);
7525  return false; // finish mapblck
7526  }
7527  else if(first == '?')
7528  {
7529  _c4dbgpf("mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7530  _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7531  _c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7532  addrem_flags(RKCL, QMRK);
7533  _handle_annotations_before_blck_key_scalar();
7534  m_evt_handler->begin_map_key_block();
7535  addrem_flags(QMRK, RKCL);
7536  _set_indentation(startindent);
7537  // indentation_lt() should be handled elsewhere
7538  _line_progressed(1);
7539  _maybe_skipchars(' ');
7540  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7541  {
7542  _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7543  addrem_flags(RKCL, QMRK);
7544  m_evt_handler->begin_seq_key_block();
7545  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7546  _save_indentation();
7547  _line_progressed(1);
7548  _maybe_skipchars(' ');
7549  return false;
7550  }
7551  }
7552  else
7553  {
7554  _c4err("parse error");
7555  }
7556  return true; // continue in mapblck
7557 }
7558 
7559 
7560 //-----------------------------------------------------------------------------
7561 
7562 // return true if we should remain in map_block
7563 template<class EventHandler>
7564 bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7565 {
7566  //
7567  // handle indentation
7568  //
7569  if(m_evt_handler->m_curr->at_line_beginning())
7570  {
7571  if(m_evt_handler->m_curr->indentation_eq())
7572  {
7573  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7574  _line_progressed(m_evt_handler->m_curr->indref);
7575  if(!m_evt_handler->m_curr->line_contents.rem.len)
7576  return true; // continue in mapblck
7577  }
7578  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7579  {
7580  _c4err("invalid indentation");
7581  }
7582  }
7583  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7584  _c4dbgpf("mapblck[RKCL]: '{}'", first);
7585  if(first == ':')
7586  {
7587  _c4dbgp("mapblck[RKCL]: found the colon");
7588  _line_progressed(1);
7589  _maybe_skipchars(' ');
7590  #if defined(__GNUC__) && (__GNUC__ >= 12) \
7591  && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))
7592  C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7593  #endif
7594  // sequence is valid after the RKCL ':'
7595  if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7596  {
7597  addrem_flags(RVAL, RKCL);
7598  return true; // continue in mapblck
7599  }
7600  else
7601  {
7602  _c4dbgp("mapblck[RKCL]: start val seqblck");
7603  addrem_flags(RNXT, RKCL);
7604  m_evt_handler->begin_seq_val_block();
7605  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7606  _save_indentation();
7607  _line_progressed(1);
7608  _maybe_skipchars(' ');
7609  return false; // finish mapblck
7610  }
7611  }
7612  else if(first == '?')
7613  {
7614  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
7615  m_evt_handler->set_val_scalar_plain_empty();
7616  m_evt_handler->add_sibling();
7617  addrem_flags(QMRK, RKCL);
7618  _line_progressed(1);
7619  _maybe_skipchars(' ');
7620  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7621  {
7622  _c4dbgp("mapblck[RKCL]: seqblck starts after ?");
7623  addrem_flags(RKCL, QMRK);
7624  m_evt_handler->begin_seq_key_block();
7625  addrem_flags(RSEQ|RVAL, RMAP|QMRK);
7626  _save_indentation();
7627  _line_progressed(1);
7628  _maybe_skipchars(' ');
7629  return false;
7630  }
7631  }
7632  else if(first == '-')
7633  {
7634  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7635  {
7636  _c4dbgp("mapblck[RKCL]: end+start doc");
7637  _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7638  _start_doc_suddenly();
7639  _line_progressed(3);
7640  _maybe_skip_whitespace_tokens();
7641  return false; // finish mapblck
7642  }
7643  else
7644  {
7645  _c4err("parse error"); // LCOV_EXCL_LINE
7646  }
7647  }
7648  else if(first == '.')
7649  {
7650  _c4dbgp("mapblck[RKCL]: maybe end doc?");
7651  csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7652  if(rs == ".." || rs.begins_with(".. "))
7653  {
7654  _c4dbgp("mapblck[RKCL]: end+start doc");
7655  _end_doc_suddenly();
7656  _line_progressed(3);
7657  _maybe_skip_whitespace_tokens();
7658  _check_doc_end_tokens();
7659  return false; // finish mapblck
7660  }
7661  else
7662  {
7663  _c4err("parse error"); // LCOV_EXCL_LINE
7664  }
7665  }
7666  else/* if(m_was_inside_qmrk) */
7667  {
7668  _c4dbgp("mapblck[RKCL]: missing :");
7669  if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7670  _c4err("parse error"); // LCOV_EXCL_LINE
7671  m_evt_handler->set_val_scalar_plain_empty();
7672  m_evt_handler->add_sibling();
7673  addrem_flags(RKEY, RKCL);
7674  }
7675  return true;
7676 }
7677 
7678 
7679 //-----------------------------------------------------------------------------
7680 
7681 template<class EventHandler>
7682 void ParseEngine<EventHandler>::_handle_unk_json()
7683 {
7684  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7685 
7686  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7687  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7688 
7689  _maybe_skip_comment();
7690  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7691  if(!rem.len)
7692  return;
7693 
7694  size_t pos = rem.first_not_of(" \t");
7695  if(pos)
7696  {
7697  pos = pos != npos ? pos : rem.len;
7698  _c4dbgpf("skipping indentation of {}", pos);
7699  _line_progressed(pos);
7700  rem = m_evt_handler->m_curr->line_contents.rem;
7701  if(!rem.len)
7702  return;
7703  _c4dbgpf("rem is now {}", _prs(rem));
7704  }
7705 
7706  if(rem.begins_with('['))
7707  {
7708  _c4dbgp("it's a seq");
7709  _check_trailing_doc_token();
7710  _maybe_begin_doc();
7711  m_evt_handler->begin_seq_val_flow();
7712  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7713  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7714  m_doc_empty = false;
7715  _line_progressed(1);
7716  }
7717  else if(rem.begins_with('{'))
7718  {
7719  _c4dbgp("it's a map");
7720  _check_trailing_doc_token();
7721  _maybe_begin_doc();
7722  m_evt_handler->begin_map_val_flow();
7723  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7724  m_doc_empty = false;
7725  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7726  _line_progressed(1);
7727  }
7728  else if(_handle_bom())
7729  {
7730  _c4dbgp("byte order mark");
7731  }
7732  else
7733  {
7734  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
7735  _maybe_skip_whitespace_tokens();
7736  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7737  if(!s.len)
7738  return;
7739  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7740  const char first = s.str[0];
7741  ScannedScalar sc;
7742  if(first == '"')
7743  {
7744  _c4dbgp("runk_json: scanning double-quoted scalar");
7745  _check_trailing_doc_token();
7746  _maybe_begin_doc();
7747  add_flags(RDOC);
7748  m_doc_empty = false;
7749  sc = _scan_scalar_dquot();
7750  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7751  if(!_maybe_scan_following_colon())
7752  {
7753  _c4dbgp("runk_json: set as val");
7754  _handle_annotations_before_blck_val_scalar();
7755  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7756  }
7757  else
7758  {
7759  _c4err("parse error");
7760  }
7761  }
7762  else if(_scan_scalar_plain_unk(&sc))
7763  {
7764  _c4dbgp("runk_json: got a plain scalar");
7765  _check_trailing_doc_token();
7766  _maybe_begin_doc();
7767  add_flags(RDOC);
7768  m_doc_empty = false;
7769  if(!_maybe_scan_following_colon())
7770  {
7771  _c4dbgp("runk_json: set as val");
7772  _handle_annotations_before_blck_val_scalar();
7773  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7774  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7775  }
7776  else
7777  {
7778  _c4err("parse error"); // LCOV_EXCL_LINE
7779  }
7780  }
7781  else
7782  {
7783  _c4err("parse error"); // LCOV_EXCL_LINE
7784  }
7785  }
7786 }
7787 
7788 
7789 //-----------------------------------------------------------------------------
7790 
7791 template<class EventHandler>
7792 void ParseEngine<EventHandler>::_handle_unk()
7793 {
7794  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7795 
7796  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7797  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7798 
7799  _maybe_skipchars(' ');
7800  _maybe_skip_comment();
7801 
7802  if(!m_evt_handler->m_curr->line_contents.rem.len)
7803  return;
7804 
7805  _c4dbgpf("runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7806 
7807  if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7808  {
7809  _c4dbgpf("runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7810  _c4dbgp("runk: check BOM");
7811  if(_handle_bom())
7812  {
7813  m_bom_line = m_evt_handler->m_curr->pos.line;
7814  _c4dbgpf("runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7815  return;
7816  }
7817  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7818  _c4dbgpf("runk: rtop: first={}", _c4prc(first));
7819  if(first == '-')
7820  {
7821  _c4dbgp("runk: rtop: suspecting doc");
7822  if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7823  {
7824  _c4dbgp("runk: rtop: begin doc");
7825  _maybe_end_doc();
7826  _begin2_doc_expl();
7827  _set_indentation(0);
7828  addrem_flags(RDOC|RUNK, NDOC);
7829  _line_progressed(3u);
7830  _maybe_skip_whitespace_tokens();
7831  return;
7832  }
7833  }
7834  else if(first == '.')
7835  {
7836  _c4dbgp("runk: rtop: suspecting doc end");
7837  if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7838  {
7839  _c4dbgp("runk: rtop: end doc");
7840  if(has_any(RDOC))
7841  {
7842  _end2_doc_expl();
7843  }
7844  else
7845  {
7846  _c4dbgp("runk: rtop: ignore end doc");
7847  }
7848  addrem_flags(NDOC|RUNK, RDOC);
7849  _line_progressed(3u);
7850  _maybe_skip_whitespace_tokens();
7851  _check_doc_end_tokens();
7852  return;
7853  }
7854  }
7855  else if(first == '%')
7856  {
7857  _c4dbgpf("directive: {}", m_evt_handler->m_curr->line_contents.rem);
7858  if(C4_UNLIKELY(has_any(RDOC) || (!m_doc_empty && has_none(NDOC))))
7859  _c4err("need document footer before directives");
7860  _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7861  return;
7862  }
7863  }
7864 
7865  /* no else-if! */
7866 
7867  size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7868  size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7869  if(m_bom_len)
7870  {
7871  _c4dbgpf("runk: prev BOMlen={}", m_bom_len);
7872  if(m_evt_handler->m_curr->pos.line == m_bom_line)
7873  {
7874  _c4dbgpf("runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7875  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7876  remindent -= m_bom_len;
7877  }
7878  else
7879  {
7880  m_bom_len = 0;
7881  }
7882  }
7883 
7884  size_t startcol = _handle_block_skip_leading_whitespace();
7885  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7886 
7887  if(first == '[')
7888  {
7889  _c4dbgp("runk: flow seq?");
7890  _handle_unk_begin_doc();
7891  if(C4_LIKELY( ! _annotations_require_key_container()))
7892  {
7893  _c4dbgp("runk: it's a seq, flow");
7894  _handle_annotations_before_blck_val_scalar();
7895  m_evt_handler->begin_seq_val_flow();
7896  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7897  _set_indentation(0);
7898  }
7899  else
7900  {
7901  _c4dbgp("runk: start new block map, set flow seq as key (!)");
7902  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7903  m_evt_handler->begin_map_val_block();
7904  addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7905  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7906  m_evt_handler->begin_seq_key_flow();
7907  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKEY);
7908  _set_indentation(0);
7909  }
7910  _line_progressed(1);
7911  }
7912  else if(first == '{')
7913  {
7914  _c4dbgp("runk: flow map?");
7915  _handle_unk_begin_doc();
7916  if(C4_LIKELY( ! _annotations_require_key_container()))
7917  {
7918  _c4dbgp("runk: it's a map, flow");
7919  _handle_annotations_before_blck_val_scalar();
7920  m_evt_handler->begin_map_val_flow();
7921  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7922  _set_indentation(0);
7923  }
7924  else
7925  {
7926  _c4dbgp("runk: start new block map, set flow map as key (!)");
7927  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7928  m_evt_handler->begin_map_val_block();
7929  addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7930  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7931  m_evt_handler->begin_map_key_flow();
7932  addrem_flags(RMAP|RFLOW, RBLCK);
7933  _set_indentation(0);
7934  }
7935  _line_progressed(1);
7936  }
7937  else if(first == '-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7938  {
7939  _c4dbgp("runk: it's a seq, block");
7940  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7941  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7942  _handle_unk_begin_doc();
7943  _handle_annotations_before_blck_val_scalar();
7944  m_evt_handler->begin_seq_val_block();
7945  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7946  _set_indentation(startindent);
7947  _line_progressed(1);
7948  _maybe_skipchars(' ');
7949  }
7950  else if(first == '?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7951  {
7952  _c4dbgp("runk: it's a map + this key is complex");
7953  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7954  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7955  _handle_block_check_leading_tabs(startcol);
7956  _handle_unk_begin_doc();
7957  _handle_annotations_before_blck_val_scalar();
7958  m_evt_handler->begin_map_val_block();
7959  addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
7960  _set_indentation(startindent);
7961  _line_progressed(1);
7962  _maybe_skipchars(' ');
7963  if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7964  {
7965  _c4dbgp("runk: seqblck key starts after ?");
7966  addrem_flags(RKCL, QMRK);
7967  m_evt_handler->begin_seq_key_block();
7968  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7969  _save_indentation();
7970  _line_progressed(1);
7971  _maybe_skipchars(' ');
7972  }
7973  }
7974  else if(first == ':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7975  {
7976  if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7977  {
7978  _c4dbgp("runk: it's a map with an empty key");
7979  if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7980  startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7981  _handle_block_check_leading_tabs(startcol);
7982  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7983  _handle_unk_begin_doc();
7984  _handle_annotations_before_start_mapblck(startline);
7985  _handle_colon();
7986  m_evt_handler->begin_map_val_block();
7987  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7988  m_evt_handler->set_key_scalar_plain_empty();
7989  _set_indentation(startindent);
7990  }
7991  else
7992  {
7993  _c4err("block colon cannot occur on a new line unless ? is used");
7994  }
7995  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7996  _line_progressed(1);
7997  _maybe_skip_whitespace_tokens();
7998  }
7999  else if(first == '&')
8000  {
8001  csubstr anchor = _scan_anchor();
8002  _c4dbgpf("anchor! {}", _prs(anchor));
8003  const size_t line = m_evt_handler->m_curr->pos.line;
8004  _handle_unk_begin_doc();
8005  _add_annotation(&m_pending_anchors, anchor, remindent, line);
8006  _set_indentation(0);
8007  }
8008  else if(first == '*')
8009  {
8010  csubstr ref = _scan_ref_map();
8011  _c4dbgpf("runk: ref! {}", _prs(ref));
8012  _handle_unk_begin_doc();
8013  if(!_maybe_scan_following_colon())
8014  {
8015  _c4dbgp("runk: set val ref");
8016  _handle_valref(ref);
8017  }
8018  else
8019  {
8020  _c4dbgp("runk: start new block map, set ref as key");
8021  _handle_block_check_leading_tabs(startcol);
8022  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8023  _handle_annotations_before_start_mapblck(startline);
8024  m_evt_handler->begin_map_val_block();
8025  _handle_keyref(ref);
8026  _maybe_skip_whitespace_tokens();
8027  _set_indentation(0);
8028  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8029  }
8030  }
8031  else if(first == '!')
8032  {
8033  csubstr tag_orig;
8034  csubstr tag = _scan_tag(&tag_orig);
8035  _c4dbgpf("runk: val tag! {}", _prs(tag));
8036  // we need to buffer the tags, as there may be two
8037  // consecutive tags in here
8038  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8039  const size_t line = m_evt_handler->m_curr->pos.line;
8040  _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8041  }
8042  else
8043  {
8044  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8045  const size_t startscalar = _handle_block_get_whitespace_mark();
8046  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8047  auto beginmap = [&](size_t startindent_){
8048  if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8049  _c4err("multiline scalars cannot be used as implicit keys");
8050  _handle_block_check_leading_tabs(startcol, startscalar);
8051  _handle_annotations_before_start_mapblck(startline);
8052  _handle_colon();
8053  m_evt_handler->begin_map_val_block();
8054  _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8055  };
8056  auto after_beginmap = [&](size_t startindent_){
8057  _maybe_skip_whitespace_tokens();
8058  _set_indentation(startindent_);
8059  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8060  };
8061  if(first == '|')
8062  {
8063  _c4dbgp("runk: block-literal scalar");
8064  _handle_unk_begin_doc();
8065  ScannedBlock sb;
8066  _scan_block(&sb, startindent);
8067  _handle_annotations_before_blck_val_scalar();
8068  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8069  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8070  }
8071  else if(first == '>')
8072  {
8073  _c4dbgp("runk: block-folded scalar");
8074  _handle_unk_begin_doc();
8075  ScannedBlock sb;
8076  _scan_block(&sb, startindent);
8077  _handle_annotations_before_blck_val_scalar();
8078  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8079  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8080  }
8081  else if(first == '\'')
8082  {
8083  _c4dbgp("runk: single-quoted scalar");
8084  _handle_unk_begin_doc();
8085  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8086  size_t col = m_evt_handler->m_curr->pos.col;
8087  ScannedScalar sc = _scan_scalar_squot();
8088  if(!_maybe_scan_following_colon())
8089  {
8090  _c4dbgp("runk: set as val");
8091  _handle_annotations_before_blck_val_scalar();
8092  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8093  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8094  }
8095  else
8096  {
8097  _c4dbgp("runk: start new block map, set single-quoted scalar as key");
8098  if(!firsttoken)
8099  startindent = _handle_unk_check_left_tokens(startindent, col);
8100  beginmap(startindent);
8101  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8102  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8103  after_beginmap(startindent);
8104  }
8105  }
8106  else if(first == '"')
8107  {
8108  _c4dbgp("runk: double-quoted scalar");
8109  _handle_unk_begin_doc();
8110  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8111  size_t col = m_evt_handler->m_curr->pos.col;
8112  ScannedScalar sc = _scan_scalar_dquot();
8113  if(!_maybe_scan_following_colon())
8114  {
8115  _c4dbgp("runk: set as val");
8116  _handle_annotations_before_blck_val_scalar();
8117  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8118  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8119  }
8120  else
8121  {
8122  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
8123  if(!firsttoken)
8124  startindent = _handle_unk_check_left_tokens(startindent, col);
8125  beginmap(startindent);
8126  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8127  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8128  after_beginmap(startindent);
8129  }
8130  }
8131  else
8132  {
8133  bool firsttoken = m_evt_handler->m_curr->at_first_token();
8134  size_t col = m_evt_handler->m_curr->pos.col;
8135  ScannedScalar sc;
8136  if(_scan_scalar_plain_unk(&sc))
8137  {
8138  _c4dbgp("runk: plain scalar");
8139  _handle_unk_begin_doc();
8140  if(!_maybe_scan_following_colon())
8141  {
8142  _c4dbgp("runk: set as val");
8143  _handle_annotations_before_blck_val_scalar();
8144  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8145  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8146  }
8147  else
8148  {
8149  _c4dbgp("runk: start new block map, set plain scalar as key");
8150  if(!firsttoken)
8151  startindent = _handle_unk_check_left_tokens(startindent, col);
8152  beginmap(startindent);
8153  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8154  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8155  after_beginmap(startindent);
8156  }
8157  }
8158  else
8159  {
8160  _c4err("parse error"); // LCOV_EXCL_LINE
8161  }
8162  }
8163  }
8164 }
8165 
8166 template<class EventHandler>
8167 void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8168 {
8169  _c4dbgp("runk: begin doc");
8170  _check_trailing_doc_token();
8171  _maybe_begin_doc();
8172  add_flags(RDOC);
8173  m_doc_empty = false;
8174 }
8175 
8176 template<class EventHandler>
8177 size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(size_t realindent, size_t col, bool skip_annotations)
8178 {
8179  _c4assert(col >= 1);
8180  col -= 1;
8181  _c4assert(col >= m_bom_len);
8182  csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8183  size_t pos = 0;
8184  _c4dbgpf("runk: check left tokens: s={}", _prs(s, /*escape*/true));
8185  if(skip_annotations)
8186  {
8187  _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8188  _c4dbgpf("runk: skip annotations: realindent={} pos={}", realindent, pos);
8189  }
8190  size_t firstns = s.first_not_of(' ', pos);
8191  if(firstns == npos)
8192  firstns = s.len;
8193  _c4dbgpf("runk: check left tokens:\n"
8194  " tokens={} skipped={}\n"
8195  " bomlen={} first={} col={}\n"
8196  " (bomlen+first)={} vs {}=col\n"
8197  " startindent={} lineindent={}"
8198  , _prs(s, /*escape*/true), _prs(s.sub(firstns), /*escape*/true)
8199  , m_bom_len, firstns, col
8200  , m_bom_len+firstns, col,
8201  realindent, m_evt_handler->m_curr->line_contents.indentation);
8202  if(m_bom_len + firstns != col)
8203  _c4err("parse error");
8204  if(!skip_annotations)
8205  realindent = firstns;
8206  _c4dbgpf("runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8207  return realindent;
8208 }
8209 
8210 
8211 /** skip annotations which are pending on the same line */
8212 template<class EventHandler>
8213 void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s, size_t *indent, size_t *first_non_token_pos)
8214 {
8215  csubstr first, second;
8216  uint32_t total = _get_annotations_same_line(s, &first, &second);
8217  _c4dbgpf("runk: before skip: {}", _prs(s, true));
8218  size_t pos = s.first_not_of(" \t");
8219  if(pos == npos)
8220  pos = s.len;
8221  if(!total)
8222  {
8223  *indent = *first_non_token_pos = pos;
8224  return;
8225  }
8226  _c4assert(!s.sub(pos).begins_with_any(" \t"));
8227  _c4dbgpf("runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos), true));
8228  _c4dbgpf("runk: first annotation: {}", first);
8229  _c4assert(first.len);
8230  _c4assert(first.is_sub(s));
8231  _c4assert(first.is_sub(s.sub(pos)));
8232  _c4assert(s.sub(pos).begins_with(first));
8233  *indent = pos;
8234  pos += first.len;
8235  _c4dbgpf("runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos), true));
8236  if(total > 1)
8237  {
8238  _c4dbgpf("runk: second annotation: {}", second);
8239  _c4assert(total == 2);
8240  _c4assert(second.len);
8241  _c4assert(second.is_sub(s));
8242  _c4assert(second.is_sub(s.sub(pos)));
8243  csubstr spos = s.sub(pos);
8244  size_t more = spos.first_not_of(" \t");
8245  _c4assert(more != npos); // because the annotations are on the same line
8246  _c4dbgpf("runk: next nonspace: {}", pos + more);
8247  pos += more;
8248  _c4dbgpf("runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos), true));
8249  _c4assert(s.sub(pos).begins_with(second));
8250  pos += second.len;
8251  _c4dbgpf("runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos), true));
8252  }
8253  *first_non_token_pos = pos;
8254 }
8255 
8256 
8257 template<class EventHandler>
8258 uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_) const
8259 {
8260  _c4assert(!m_evt_handler->m_curr->at_first_token());
8261  (void)token_soup;
8262  using EntryPtr = typename Annotation::Entry const* C4_RESTRICT;
8263  EntryPtr first = nullptr;
8264  EntryPtr second = nullptr;
8265  uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8266  if(total)
8267  {
8268  _c4dbgpf("there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8269  auto valid_if_same_line = [this](EntryPtr entry){
8270  _c4dbgpf("pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8271  return (entry->line == m_evt_handler->m_curr->pos.line) ? entry : nullptr;
8272  };
8273  // now select annotations only on the same line
8274  total = 0;
8275  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8276  total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8277  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8278  total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8279  _c4dbgpf("{} annotations on same line", total);
8280  _c4assert(total > 0); // because this function is only called
8281  // while not at the first token. That
8282  // means we must have same-line
8283  // annotations.
8284  auto get_first_on_same_line = [this](EntryPtr not_this_one){
8285  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8286  if(&m_pending_anchors.annotations[i] != not_this_one
8287  && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8288  return &m_pending_anchors.annotations[i];
8289  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8290  if(&m_pending_tags.annotations[i] != not_this_one
8291  && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8292  return &m_pending_tags.annotations[i];
8293  return (EntryPtr)nullptr; // LCOV_EXCL_LINE
8294  };
8295  _c4assert(total >= 1);
8296  // assign to first
8297  first = get_first_on_same_line(nullptr);
8298  _c4assert(first);
8299  _c4dbgpf("first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8300  if(total > 1)
8301  {
8302  _c4assert(total == 2);
8303  // assign to second
8304  second = get_first_on_same_line(first);
8305  _c4assert(second);
8306  _c4dbgpf("second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8307  }
8308  auto extract_string = [&](EntryPtr e){
8309  // tags can be null when the arena ran out of space
8310  if(!e->str.str || e->str.begins_with_any("!<"))
8311  {
8312  csubstr tag = e->orig;
8313  _c4assert(tag.str);
8314  _c4assert(tag.len);
8315  _c4assert(tag.is_sub(token_soup));
8316  _c4dbgpf("tag: {} -> {}", _maybe_null_str(e->str), tag);
8317  return tag;
8318  }
8319  csubstr anchor = e->str;
8320  _c4assert(anchor.len);
8321  _c4assert(anchor.str);
8322  _c4assert(anchor.is_sub(token_soup));
8323  _c4assert(!anchor.begins_with('&'));
8324  _c4assert(anchor.str - token_soup.str > 0);
8325  // add back the anchor's &
8326  --anchor.str;
8327  ++anchor.len;
8328  _c4assert(anchor.begins_with('&'));
8329  _c4dbgpf("anchor: {} -> {}", e->str, anchor);
8330  return anchor;
8331  };
8332  *first_ = first ? extract_string(first) : nullptr;
8333  *second_ = second ? extract_string(second) : nullptr;
8334  if(total > 1 && (first_->str > second_->str))
8335  {
8336  csubstr tmp = *first_;
8337  *first_ = *second_;
8338  *second_ = tmp;
8339  _c4dbgpf("swap first and second: {} -> {}", *first_, *second_);
8340  }
8341  }
8342  return total;
8343 }
8344 
8345 
8346 //-----------------------------------------------------------------------------
8347 
8348 template<class EventHandler>
8349 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
8350 {
8351  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8352 
8353  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW), m_evt_handler->m_curr->pos);
8354 
8355  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8356  if(has_any(RNXT))
8357  {
8358  _c4dbgp("usty[RNXT]: finishing!");
8359  _end_stream();
8360  }
8361  #endif
8362 
8363  _maybe_skip_comment();
8364  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8365  if(!rem.len)
8366  return;
8367 
8368  size_t pos = rem.first_not_of(" \t");
8369  if(pos)
8370  {
8371  pos = pos != npos ? pos : rem.len;
8372  _c4dbgpf("skipping indentation of {}", pos);
8373  _line_progressed(pos);
8374  rem = m_evt_handler->m_curr->line_contents.rem;
8375  if(!rem.len)
8376  return;
8377  _c4dbgpf("rem is now {}", _prs(rem));
8378  }
8379 
8380  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8381  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8382  char first = rem.str[0];
8383  if(has_any(RSEQ)) // destination is a sequence
8384  {
8385  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP), m_evt_handler->m_curr->pos);
8386  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
8387  if(first == '[')
8388  {
8389  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
8390  add_flags(RNXT);
8391  m_evt_handler->_push();
8392  addrem_flags(RFLOW|RVAL, RNXT|USTY);
8393  _set_indentation(startindent);
8394  _line_progressed(1);
8395  _maybe_skip_whitespace_tokens();
8396  }
8397  else if(first == '-' && _is_blck_token(rem))
8398  {
8399  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
8400  add_flags(RNXT);
8401  m_evt_handler->_push();
8402  addrem_flags(RBLCK|RVAL, RNXT|USTY);
8403  _set_indentation(startindent);
8404  _line_progressed(1);
8405  _maybe_skip_whitespace_tokens();
8406  }
8407  else
8408  {
8409  _c4err("can only parse a seq into an existing seq");
8410  }
8411  }
8412  else if(has_any(RMAP)) // destination is a map
8413  {
8414  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8415  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
8416  if(first == '{')
8417  {
8418  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
8419  add_flags(RNXT);
8420  _handle_annotations_before_blck_val_scalar();
8421  m_evt_handler->_push();
8422  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8423  _set_indentation(startindent);
8424  _line_progressed(1);
8425  _maybe_skip_whitespace_tokens();
8426  }
8427  else if(first == '?' && _is_blck_token(rem))
8428  {
8429  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
8430  add_flags(RNXT);
8431  _handle_annotations_before_blck_val_scalar();
8432  m_evt_handler->_push();
8433  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8434  _save_indentation();
8435  _line_progressed(1);
8436  _maybe_skip_whitespace_tokens();
8437  }
8438  else if(first == ':' && _is_blck_token(rem))
8439  {
8440  _c4dbgp("usty[RMAP]: it's a map with an empty key");
8441  add_flags(RNXT);
8442  _handle_annotations_before_blck_val_scalar();
8443  m_evt_handler->_push();
8444  m_evt_handler->set_key_scalar_plain_empty();
8445  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8446  _save_indentation();
8447  _line_progressed(1);
8448  _maybe_skip_whitespace_tokens();
8449  }
8450  else if(rem.begins_with('&'))
8451  {
8452  csubstr anchor = _scan_anchor();
8453  _c4dbgpf("usty[RMAP]: anchor! {}", _prs(anchor));
8454  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8455  const size_t line = m_evt_handler->m_curr->pos.line;
8456  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8457  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8458  }
8459  else if(first == '*')
8460  {
8461  csubstr ref = _scan_ref_map();
8462  _c4dbgpf("usty[RMAP]: ref! {}", _prs(ref));
8463  if(!_maybe_scan_following_colon())
8464  {
8465  _c4err("cannot read a VAL to a map");
8466  }
8467  else
8468  {
8469  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
8470  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8471  add_flags(RNXT);
8472  _handle_annotations_before_start_mapblck(startline);
8473  m_evt_handler->_push();
8474  _handle_keyref(ref);
8475  _maybe_skip_whitespace_tokens();
8476  _set_indentation(startindent);
8477  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8478  }
8479  }
8480  else if(first == '!')
8481  {
8482  csubstr tag = _scan_tag();
8483  _c4dbgpf("usty[RMAP]: val tag! {}", _prs(tag));
8484  // we need to buffer the tags, as there may be two
8485  // consecutive tags in here
8486  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8487  const size_t line = m_evt_handler->m_curr->pos.line;
8488  _add_annotation(&m_pending_tags, tag, indentation, line);
8489  }
8490  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
8491  {
8492  _c4err("cannot parse a seq into an existing map");
8493  }
8494  else
8495  {
8496  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8497  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8498  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8499  ScannedScalar sc;
8500  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8501  if(first == '\'')
8502  {
8503  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
8504  sc = _scan_scalar_squot();
8505  if(!_maybe_scan_following_colon())
8506  {
8507  _c4err("cannot read a VAL to a map");
8508  }
8509  else
8510  {
8511  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8512  add_flags(RNXT);
8513  _handle_annotations_before_start_mapblck(startline);
8514  m_evt_handler->_push();
8515  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8516  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8517  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8518  _set_indentation(startindent);
8519  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8520  _maybe_skip_whitespace_tokens();
8521  }
8522  }
8523  else if(first == '"')
8524  {
8525  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
8526  sc = _scan_scalar_dquot();
8527  if(!_maybe_scan_following_colon())
8528  {
8529  _c4err("cannot read a VAL to a map");
8530  }
8531  else
8532  {
8533  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
8534  add_flags(RNXT);
8535  _handle_annotations_before_start_mapblck(startline);
8536  m_evt_handler->_push();
8537  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8538  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8539  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8540  _set_indentation(startindent);
8541  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8542  _maybe_skip_whitespace_tokens();
8543  }
8544  }
8545  else if(first == '|')
8546  {
8547  _c4err("block literal keys must be enclosed in '?'");
8548  }
8549  else if(first == '>')
8550  {
8551  _c4err("block literal keys must be enclosed in '?'");
8552  }
8553  else if(_scan_scalar_plain_unk(&sc))
8554  {
8555  _c4dbgp("usty[RMAP]: got a plain scalar");
8556  if(!_maybe_scan_following_colon())
8557  {
8558  _c4err("cannot read a VAL to a map");
8559  }
8560  else
8561  {
8562  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8563  add_flags(RNXT);
8564  _handle_annotations_before_start_mapblck(startline);
8565  m_evt_handler->_push();
8566  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8567  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8568  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8569  _set_indentation(startindent);
8570  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8571  _maybe_skip_whitespace_tokens();
8572  }
8573  }
8574  else
8575  {
8576  _c4err("parse error"); // LCOV_EXCL_LINE
8577  }
8578  }
8579  }
8580  else // destination is unknown
8581  {
8582  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8583  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8584  if(first == '[')
8585  {
8586  _c4dbgp("usty[UNK]: it's a flow seq");
8587  add_flags(RNXT);
8588  _handle_annotations_before_blck_val_scalar();
8589  m_evt_handler->begin_seq_val_flow();
8590  addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8591  _set_indentation(startindent);
8592  _line_progressed(1);
8593  _maybe_skip_whitespace_tokens();
8594  }
8595  else if(first == '-' && _is_blck_token(rem))
8596  {
8597  _c4dbgp("usty[UNK]: it's a block seq");
8598  add_flags(RNXT);
8599  _handle_annotations_before_blck_val_scalar();
8600  m_evt_handler->begin_seq_val_block();
8601  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8602  _set_indentation(startindent);
8603  _line_progressed(1);
8604  _maybe_skip_whitespace_tokens();
8605  }
8606  else if(first == '{')
8607  {
8608  _c4dbgp("usty[UNK]: it's a flow map");
8609  add_flags(RNXT);
8610  _handle_annotations_before_blck_val_scalar();
8611  m_evt_handler->begin_map_val_flow();
8612  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8613  _set_indentation(startindent);
8614  _line_progressed(1);
8615  _maybe_skip_whitespace_tokens();
8616  }
8617  else if(first == '?' && _is_blck_token(rem))
8618  {
8619  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8620  add_flags(RNXT);
8621  _handle_annotations_before_blck_val_scalar();
8622  m_evt_handler->begin_map_val_block();
8623  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8624  _save_indentation();
8625  _line_progressed(1);
8626  _maybe_skip_whitespace_tokens();
8627  }
8628  else if(first == ':' && _is_blck_token(rem))
8629  {
8630  _c4dbgp("usty[UNK]: it's a map with an empty key");
8631  add_flags(RNXT);
8632  _handle_annotations_before_blck_val_scalar();
8633  m_evt_handler->begin_map_val_block();
8634  m_evt_handler->set_key_scalar_plain_empty();
8635  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8636  _save_indentation();
8637  _line_progressed(1);
8638  _maybe_skip_whitespace_tokens();
8639  }
8640  else if(first == '&')
8641  {
8642  csubstr anchor = _scan_anchor();
8643  _c4dbgpf("usty[UNK]: anchor! {}", _prs(anchor));
8644  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8645  const size_t line = m_evt_handler->m_curr->pos.line;
8646  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8647  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8648  }
8649  else if(first == '*')
8650  {
8651  csubstr ref = _scan_ref_map();
8652  _c4dbgpf("usty[UNK]: ref! {}", _prs(ref));
8653  if(!_maybe_scan_following_colon())
8654  {
8655  _c4dbgp("usty[UNK]: set val ref");
8656  _handle_valref(ref);
8657  }
8658  else
8659  {
8660  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8661  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8662  add_flags(RNXT);
8663  _handle_annotations_before_start_mapblck(startline);
8664  m_evt_handler->begin_map_val_block();
8665  _handle_keyref(ref);
8666  _maybe_skip_whitespace_tokens();
8667  _set_indentation(startindent);
8668  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8669  }
8670  }
8671  else if(first == '!')
8672  {
8673  csubstr tag = _scan_tag();
8674  _c4dbgpf("usty[UNK]: val tag! {}", _prs(tag));
8675  // we need to buffer the tags, as there may be two
8676  // consecutive tags in here
8677  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8678  const size_t line = m_evt_handler->m_curr->pos.line;
8679  _add_annotation(&m_pending_tags, tag, indentation, line);
8680  }
8681  else
8682  {
8683  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8684  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8685  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8686  first = rem.str[0];
8687  ScannedScalar sc;
8688  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8689  if(first == '\'')
8690  {
8691  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8692  sc = _scan_scalar_squot();
8693  if(!_maybe_scan_following_colon())
8694  {
8695  _c4dbgp("usty[UNK]: set as val");
8696  _handle_annotations_before_blck_val_scalar();
8697  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8698  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8699  _end_stream();
8700  }
8701  else
8702  {
8703  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8704  add_flags(RNXT);
8705  _handle_annotations_before_start_mapblck(startline);
8706  m_evt_handler->begin_map_val_block();
8707  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8708  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8709  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8710  _set_indentation(startindent);
8711  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8712  _maybe_skip_whitespace_tokens();
8713  }
8714  }
8715  else if(first == '"')
8716  {
8717  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8718  sc = _scan_scalar_dquot();
8719  if(!_maybe_scan_following_colon())
8720  {
8721  _c4dbgp("usty[UNK]: set as val");
8722  _handle_annotations_before_blck_val_scalar();
8723  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8724  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8725  _end_stream();
8726  }
8727  else
8728  {
8729  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8730  add_flags(RNXT);
8731  _handle_annotations_before_start_mapblck(startline);
8732  m_evt_handler->begin_map_val_block();
8733  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8734  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8735  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8736  _set_indentation(startindent);
8737  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8738  _maybe_skip_whitespace_tokens();
8739  }
8740  }
8741  else if(first == '|')
8742  {
8743  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8744  ScannedBlock sb;
8745  _scan_block(&sb, startindent);
8746  _c4dbgp("usty[UNK]: set as val");
8747  _handle_annotations_before_blck_val_scalar();
8748  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8749  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8750  _end_stream();
8751  }
8752  else if(first == '>')
8753  {
8754  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8755  ScannedBlock sb;
8756  _scan_block(&sb, startindent);
8757  _c4dbgp("usty[UNK]: set as val");
8758  _handle_annotations_before_blck_val_scalar();
8759  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8760  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8761  _end_stream();
8762  }
8763  else if(_scan_scalar_plain_unk(&sc))
8764  {
8765  _c4dbgp("usty[UNK]: got a plain scalar");
8766  if(!_maybe_scan_following_colon())
8767  {
8768  _c4dbgp("usty[UNK]: set as val");
8769  _handle_annotations_before_blck_val_scalar();
8770  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8771  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8772  _end_stream();
8773  }
8774  else
8775  {
8776  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8777  add_flags(RNXT);
8778  _handle_annotations_before_start_mapblck(startline);
8779  m_evt_handler->begin_map_val_block();
8780  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8781  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8782  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8783  _set_indentation(startindent);
8784  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8785  _maybe_skip_whitespace_tokens();
8786  }
8787  }
8788  else
8789  {
8790  _c4err("parse error"); // LCOV_EXCL_LINE
8791  }
8792  }
8793  }
8794 }
8795 
8796 
8797 //-----------------------------------------------------------------------------
8798 
8799 template<class EventHandler>
8800 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8801 {
8802  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8803  _RYML_SAVE_TEST_JSON(filename, src);
8804  m_evt_handler->start_parse(filename.str, src);
8805  m_evt_handler->begin_stream();
8806  _reset();
8807  while( ! _finished_file())
8808  {
8809  _scan_line();
8810  while( ! _finished_line())
8811  {
8812  _c4dbgnextline();
8813  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8814  if(has_any(RSEQ))
8815  {
8816  _handle_seq_json();
8817  }
8818  else if(has_any(RMAP))
8819  {
8820  _handle_map_json();
8821  }
8822  else if(has_any(RUNK))
8823  {
8824  _handle_unk_json();
8825  }
8826  else
8827  {
8828  _c4err("internal error"); // LCOV_EXCL_LINE
8829  }
8830  }
8831  if(_finished_file())
8832  break; // it may have finished because of multiline blocks
8833  _line_ended();
8834  }
8835  _end_stream();
8836  m_evt_handler->finish_parse();
8837 }
8838 
8839 
8840 //-----------------------------------------------------------------------------
8841 
8842 template<class EventHandler>
8843 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8844 {
8845  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8846  _RYML_SAVE_TEST_YAML(filename, src);
8847  m_evt_handler->start_parse(filename.str, src);
8848  m_evt_handler->begin_stream();
8849  _reset();
8850  while( ! _finished_file())
8851  {
8852  _scan_line();
8853  while( ! _finished_line())
8854  {
8855  _c4dbgnextline();
8856  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8857  if(has_any(RFLOW))
8858  {
8859  if(has_none(RSEQIMAP))
8860  {
8861  if(has_any(RSEQ))
8862  {
8863  _handle_seq_flow();
8864  }
8865  else
8866  {
8867  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8868  _handle_map_flow();
8869  }
8870  }
8871  else
8872  {
8873  _handle_seq_imap();
8874  }
8875  }
8876  else if(has_any(RBLCK))
8877  {
8878  if(has_any(RSEQ))
8879  {
8880  _handle_seq_block();
8881  }
8882  else
8883  {
8884  _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8885  _handle_map_block();
8886  }
8887  }
8888  else if(has_any(RUNK))
8889  {
8890  _handle_unk();
8891  }
8892  else if(has_any(USTY))
8893  {
8894  _handle_usty();
8895  }
8896  else
8897  {
8898  _c4err("internal error"); // LCOV_EXCL_LINE
8899  }
8900  }
8901  if(_finished_file())
8902  break; // it may have finished because of multiline blocks
8903  _line_ended();
8904  }
8905  _end_stream();
8906  m_evt_handler->finish_parse();
8907 }
8908 /** @endcond */
8909 
8910 } // namespace yml
8911 } // namespace c4
8912 
8913 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
8914 
8915 #undef _c4dbgnextline
8916 #undef _c4assert
8917 #undef _c4err
8918 
8919 #if defined(_MSC_VER)
8920 # pragma warning(pop)
8921 #elif defined(__clang__)
8922 # pragma clang diagnostic pop
8923 #elif defined(__GNUC__)
8924 # pragma GCC diagnostic pop
8925 #endif
8926 
8927 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:28
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:192
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1546
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition: common.cpp:210
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:889
bool is_valid_tag_handle(csubstr handle)
Definition: tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition: tag.cpp:9
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ npos
a null string position
Definition: common.hpp:258
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
@ UTF16BE
UTF16, Big-Endian.
Definition: common.hpp:266
@ UTF8
UTF8.
Definition: common.hpp:264
@ UTF16LE
UTF16, Little-Endian.
Definition: common.hpp:265
@ NOBOM
No Byte Order Mark was found.
Definition: common.hpp:263
@ UTF32BE
UTF32, Big-Endian.
Definition: common.hpp:268
@ UTF32LE
UTF32, Little-Endian.
Definition: common.hpp:267
enum c4::yml::Encoding_ Encoding_e
csubstr version()
Definition: version.cpp:6
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _ryml_relocate(s)
#define _c4err(...)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _c4assert(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
csubstr name
name of the file
Definition: common.hpp:287
Options to give to the parser to control its behavior.
Definition: common.hpp:347
utilities for UTF and Byte Order Mark