rapidyaml  0.11.1
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 
9 #include <ctype.h>
10 
11 #include "c4/yml/detail/dbgprint.hpp"
13 #ifdef RYML_DBG
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
16 #define _c4err(...) \
17  do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
18 #else
19 #define _c4err(...) \
20  this->_err(RYML_LOC_HERE(), __VA_ARGS__)
21 #endif
22 
23 
24 #if defined(RYML_WITH_TAB_TOKENS)
25 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITHOUT_TAB_TOKENS(...)
27 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
28 #else
29 #define _RYML_WITH_TAB_TOKENS(...)
30 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
31 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
32 #endif
33 
34 
35 // scaffold:
36 #define _c4dbgnextline() \
37  do { \
38  _c4dbgq("\n-----------"); \
39  _c4dbgt("handling line={}, offset={}B", \
40  m_evt_handler->m_curr->pos.line, \
41  m_evt_handler->m_curr->pos.offset); \
42  } while(0)
43 
44 
45 #if defined(_MSC_VER)
46 # pragma warning(push)
47 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
48 # pragma warning(disable: 4702/*unreachable code*/)
49 #elif defined(__clang__)
50 # pragma clang diagnostic push
51 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma clang diagnostic ignored "-Wformat-nonliteral"
53 # pragma clang diagnostic ignored "-Wold-style-cast"
54 #elif defined(__GNUC__)
55 # pragma GCC diagnostic push
56 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
57 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 # pragma GCC diagnostic ignored "-Wold-style-cast"
59 # if __GNUC__ >= 7
60 # pragma GCC diagnostic ignored "-Wduplicated-branches"
61 # endif
62 #endif
63 
64 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
65 
66 namespace c4 {
67 namespace yml {
68 
69 namespace { // NOLINT
70 
71 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
72 {
73  _RYML_ASSERT_BASIC(s.len > 0);
74  _RYML_ASSERT_BASIC(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
75  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
76 }
77 
78 inline bool _is_doc_begin_token(csubstr s)
79 {
80  _RYML_ASSERT_BASIC(s.begins_with('-'));
81  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
82  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
83  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
84  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
85 }
86 
87 inline bool _is_doc_end_token(csubstr s)
88 {
89  _RYML_ASSERT_BASIC(s.begins_with('.'));
90  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
91  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
92  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
93  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
94 }
95 
96 inline bool _is_doc_token(csubstr s) noexcept
97 {
98  //
99  // NOTE: this function was failing under some scenarios when
100  // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
101  // related to optimizer assumptions on the input string and
102  // possibly caused from UB around assignment to that string (the
103  // call site was in _scan_block()). For more details see:
104  //
105  // https://github.com/biojppm/rapidyaml/issues/440
106  //
107  // The current version does not suffer this problem, but it may
108  // appear again.
109  //
110  //
111  // UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
112  // (but not any other optimization level, nor any other compiler
113  // or version), because the assignment to s is being hoisted out
114  // of the loop which calls this function. Then the length doesn't
115  // enter the s.len >= 3 when it should. Adding a
116  // C4_DONT_OPTIMIZE(var) makes the problem go away.
117  //
118  if(s.len >= 3)
119  {
120  switch(s.str[0])
121  {
122  case '-':
123  //return _is_doc_begin_token(s); // this was failing with gcc -O2
124  return (s.str[1] == '-' && s.str[2] == '-')
125  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
126  case '.':
127  //return _is_doc_end_token(s); // this was failing with gcc -O2
128  return (s.str[1] == '.' && s.str[2] == '.')
129  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
130  }
131  }
132  return false;
133 }
134 
135 inline size_t _is_special_json_scalar(csubstr s)
136 {
137  _RYML_ASSERT_BASIC(s.len);
138  switch(s.str[0])
139  {
140  case 'f':
141  if(s.len >= 5 && s.begins_with("false"))
142  return 5u;
143  break;
144  case 't':
145  if(s.len >= 4 && s.begins_with("true"))
146  return 4u;
147  break;
148  case 'n':
149  if(s.len >= 4 && s.begins_with("null"))
150  return 4u;
151  break;
152  }
153  return 0u;
154 }
155 
156 
157 //-----------------------------------------------------------------------------
158 
159 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
160 {
161  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
162 }
163 
164 //! look for the next newline chars, and jump to the right of those
165 inline substr from_next_line(substr rem)
166 {
167  size_t nlpos = rem.first_of("\r\n");
168  if(nlpos == csubstr::npos)
169  return {};
170  const char nl = rem[nlpos];
171  rem = rem.right_of(nlpos);
172  if(rem.empty())
173  return {};
174  if(_extend_from_combined_newline(nl, rem.front()))
175  rem = rem.sub(1);
176  return rem;
177 }
178 
179 
180 //-----------------------------------------------------------------------------
181 
182 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
183 {
184  _RYML_ASSERT_BASIC(r[*i] == '\n');
185  size_t numnl_following = 0;
186  ++(*i);
187  for( ; *i < r.len; ++(*i))
188  {
189  if(r.str[*i] == '\n')
190  ++numnl_following;
191  // skip leading whitespace
192  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
193  ;
194  else
195  break;
196  }
197  return numnl_following;
198 }
199 
200 /** @p i is set to the first non whitespace character after the line
201  * @return the number of empty lines after the initial position */
202 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
203 {
204  _RYML_ASSERT_BASIC(r[*i] == '\n');
205  size_t numnl_following = 0;
206  ++(*i);
207  if(indentation == 0)
208  {
209  for( ; *i < r.len; ++(*i))
210  {
211  if(r.str[*i] == '\n')
212  ++numnl_following;
213  // skip leading whitespace
214  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
215  ;
216  else
217  break;
218  }
219  }
220  else
221  {
222  for( ; *i < r.len; ++(*i))
223  {
224  if(r.str[*i] == '\n')
225  {
226  ++numnl_following;
227  // skip the indentation after the newline
228  size_t stop = *i + indentation;
229  for( ; *i < r.len; ++(*i))
230  {
231  if(r.str[*i] != ' ' && r.str[*i] != '\r')
232  break;
233  _RYML_ASSERT_BASIC(*i < stop);
234  }
235  C4_UNUSED(stop);
236  }
237  // skip leading whitespace
238  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
239  ;
240  else
241  break;
242  }
243  }
244  return numnl_following;
245 }
246 
247 } // anon namespace
248 
249 
250 //-----------------------------------------------------------------------------
251 //-----------------------------------------------------------------------------
252 //-----------------------------------------------------------------------------
253 
254 template<class EventHandler>
256 {
257  _free();
258  _clr();
259 }
260 
261 template<class EventHandler>
263  : m_options(opts)
264  , m_file()
265  , m_buf()
266  , m_evt_handler(evt_handler)
267  , m_pending_anchors()
268  , m_pending_tags()
269  , m_was_inside_qmrk(false)
270  , m_doc_empty(false)
271  , m_prev_colon(npos)
272  , m_encoding(NOBOM)
273  , m_newline_offsets()
274  , m_newline_offsets_size(0)
275  , m_newline_offsets_capacity(0)
276  , m_newline_offsets_buf()
277 {
278  _RYML_CHECK_BASIC(evt_handler);
279 }
280 
281 template<class EventHandler>
283  : m_options(that.m_options)
284  , m_file(that.m_file)
285  , m_buf(that.m_buf)
286  , m_evt_handler(that.m_evt_handler)
287  , m_pending_anchors(that.m_pending_anchors)
288  , m_pending_tags(that.m_pending_tags)
289  , m_was_inside_qmrk(false)
290  , m_doc_empty(false)
291  , m_prev_colon(npos)
292  , m_encoding(NOBOM)
293  , m_newline_offsets(that.m_newline_offsets)
294  , m_newline_offsets_size(that.m_newline_offsets_size)
295  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
296  , m_newline_offsets_buf(that.m_newline_offsets_buf)
297 {
298  that._clr();
299 }
300 
301 template<class EventHandler>
303  : m_options(that.m_options)
304  , m_file(that.m_file)
305  , m_buf(that.m_buf)
306  , m_evt_handler(that.m_evt_handler)
307  , m_pending_anchors(that.m_pending_anchors)
308  , m_pending_tags(that.m_pending_tags)
309  , m_was_inside_qmrk(false)
310  , m_doc_empty(false)
311  , m_prev_colon(npos)
312  , m_encoding(NOBOM)
313  , m_newline_offsets()
314  , m_newline_offsets_size()
315  , m_newline_offsets_capacity()
316  , m_newline_offsets_buf()
317 {
318  if(that.m_newline_offsets_capacity)
319  {
320  _resize_locations(that.m_newline_offsets_capacity);
321  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
322  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
323  m_newline_offsets_size = that.m_newline_offsets_size;
324  }
325 }
326 
327 template<class EventHandler>
329 {
330  _free();
331  m_options = (that.m_options);
332  m_file = (that.m_file);
333  m_buf = (that.m_buf);
334  m_evt_handler = that.m_evt_handler;
335  m_pending_anchors = that.m_pending_anchors;
336  m_pending_tags = that.m_pending_tags;
337  m_was_inside_qmrk = that.m_was_inside_qmrk;
338  m_doc_empty = that.m_doc_empty;
339  m_prev_colon = that.m_prev_colon;
340  m_encoding = that.m_encoding;
341  m_newline_offsets = (that.m_newline_offsets);
342  m_newline_offsets_size = (that.m_newline_offsets_size);
343  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
344  m_newline_offsets_buf = (that.m_newline_offsets_buf);
345  that._clr();
346  return *this;
347 }
348 
349 template<class EventHandler>
351 {
352  if(&that != this)
353  {
354  _free();
355  m_options = (that.m_options);
356  m_file = (that.m_file);
357  m_buf = (that.m_buf);
358  m_evt_handler = that.m_evt_handler;
359  m_pending_anchors = that.m_pending_anchors;
360  m_pending_tags = that.m_pending_tags;
361  m_was_inside_qmrk = that.m_was_inside_qmrk;
362  m_doc_empty = that.m_doc_empty;
363  m_prev_colon = that.m_prev_colon;
364  m_encoding = that.m_encoding;
365  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
366  _resize_locations(that.m_newline_offsets_capacity);
367  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
368  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
369  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
370  m_newline_offsets_size = that.m_newline_offsets_size;
371  m_newline_offsets_buf = that.m_newline_offsets_buf;
372  }
373  return *this;
374 }
375 
376 template<class EventHandler>
378 {
379  m_options = {};
380  m_file = {};
381  m_buf = {};
382  m_evt_handler = {};
383  m_pending_anchors = {};
384  m_pending_tags = {};
385  m_was_inside_qmrk = false;
386  m_doc_empty = true;
387  m_prev_colon = npos;
388  m_encoding = NOBOM;
389  m_newline_offsets = {};
390  m_newline_offsets_size = {};
391  m_newline_offsets_capacity = {};
392  m_newline_offsets_buf = {};
393 }
394 
395 template<class EventHandler>
396 void ParseEngine<EventHandler>::_free()
397 {
398  if(m_newline_offsets)
399  {
400  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
401  m_newline_offsets = nullptr;
402  m_newline_offsets_size = 0u;
403  m_newline_offsets_capacity = 0u;
404  m_newline_offsets_buf = nullptr;
405  }
406 }
407 
408 
409 //-----------------------------------------------------------------------------
410 
411 template<class EventHandler>
412 void ParseEngine<EventHandler>::_reset()
413 {
414  m_pending_anchors = {};
415  m_pending_tags = {};
416  m_doc_empty = true;
417  m_was_inside_qmrk = false;
418  m_prev_colon = npos;
419  m_bom_len = 0;
420  m_encoding = NOBOM;
421  m_bom_line = 0;
422  if(m_options.locations())
423  {
424  _prepare_locations();
425  }
426 }
427 
428 
429 //-----------------------------------------------------------------------------
430 
431 template<class EventHandler>
432 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
433 {
434  #define _ryml_relocate(s) \
435  if((s).is_sub(prev_arena)) \
436  { \
437  (s).str = next_arena.str + ((s).str - prev_arena.str); \
438  }
439  _ryml_relocate(m_buf);
440  _ryml_relocate(m_newline_offsets_buf);
441  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
442  _ryml_relocate(m_pending_tags.annotations[i].str);
443  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
444  _ryml_relocate(m_pending_anchors.annotations[i].str);
445  #undef _ryml_relocate
446 }
447 
448 template<class EventHandler>
449 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
450 {
451  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
452 }
453 
454 
455 //-----------------------------------------------------------------------------
456 
457 #ifdef RYML_DBG
458 template<class EventHandler>
459 template<class DumpFn>
460 C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
461 {
462  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
463  auto const& lc = st->line_contents;
464  csubstr contents = lc.full.first(lc.num_cols);
465  if(contents.len)
466  {
467  // print the yaml src line
468  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
469  if(m_file.len)
470  {
471  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
472  offs += m_file.len + 1;
473  }
474  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
475  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
476  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
477  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
478  // highlight the remaining portion of the previous line
479  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
480  size_t lastcol = firstcol + lc.rem.len;
481  for(size_t i = 0; i < offs + firstcol; ++i)
482  std::forward<DumpFn>(dumpfn)(" ");
483  std::forward<DumpFn>(dumpfn)("^");
484  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
485  std::forward<DumpFn>(dumpfn)("~");
486  _dbg_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
487  }
488  else
489  {
490  std::forward<DumpFn>(dumpfn)("\n");
491  }
492  // next line: print the state flags
493  {
494  char flagbuf_[128];
495  _dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
496  }
497 }
498 
499 template<class EventHandler>
500 void ParseEngine<EventHandler>::_print_state_stack(substr buf) const
501 {
502  if(_dbg_enabled())
503  {
504  for(typename EventHandler::state const& s : m_evt_handler->m_stack)
505  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
506  }
507 }
508 
509 template<class EventHandler>
510 void ParseEngine<EventHandler>::_print_state_stack() const
511 {
512  char buf[128];
513  _print_state_stack(buf);
514 }
515 #endif
516 
517 
518 //-----------------------------------------------------------------------------
519 
520 template<class EventHandler>
521 template<class ...Args>
522 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
523 {
524  m_evt_handler->cancel_parse();
525  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
526 }
527 
528 template<class EventHandler>
529 template<class ...Args>
530 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
531 {
532  m_evt_handler->cancel_parse();
533  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
534 }
535 
536 
537 //-----------------------------------------------------------------------------
538 #ifdef RYML_DBG
539 template<class EventHandler>
540 template<class ...Args>
541 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
542 {
543  if(_dbg_enabled())
544  {
545  _dbg_printf(fmt, args...);
546  _dbg_dumper("\n");
547  _fmt_msg(_dbg_dumper);
548  }
549 }
550 #endif
551 
552 
553 //-----------------------------------------------------------------------------
554 template<class EventHandler>
555 bool ParseEngine<EventHandler>::_finished_file() const
556 {
557  bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
558  if(ret)
559  {
560  _c4dbgp("finished file!!!");
561  }
562  return ret;
563 }
564 
565 template<class EventHandler>
566 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
567 {
568  return m_evt_handler->m_curr->line_contents.rem.empty();
569 }
570 
571 
572 //-----------------------------------------------------------------------------
573 
574 template<class EventHandler>
575 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
576 {
577  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
578  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
579  {
580  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
581  if(pos == npos)
582  pos = rem.len; // maybe the line is just all whitespace
583  _c4dbgpf("skip {} whitespace characters", pos);
584  _line_progressed(pos);
585  }
586 }
587 
588 template<class EventHandler>
589 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
590 {
591  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
592  if(rem.len && rem.str[0] == c)
593  {
594  size_t pos = rem.first_not_of(c);
595  if(pos == npos)
596  pos = rem.len; // maybe the line is just all c
597  _c4dbgpf("skip {}x'{}'", pos, c);
598  _line_progressed(pos);
599  }
600 }
601 
602 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
603 template<class EventHandler>
604 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
605 {
606  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
607  if(rem.len && rem.str[0] == c)
608  {
609  size_t pos = rem.first_not_of(c);
610  if(pos == npos)
611  pos = rem.len; // maybe the line is just all c
612  if(pos > max_to_skip)
613  pos = max_to_skip;
614  _c4dbgpf("skip {}x'{}'", pos, c);
615  _line_progressed(pos);
616  }
617 }
618 #endif
619 
620 template<class EventHandler>
621 template<size_t N>
622 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
623 {
624  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
625  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
626  if(pos == npos)
627  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
628  _c4dbgpf("skip {} characters", pos);
629  _line_progressed(pos);
630 }
631 
632 template<class EventHandler>
633 void ParseEngine<EventHandler>::_skip_comment()
634 {
635  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
636  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
637  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
638  csubstr line = m_evt_handler->m_curr->line_contents.full;
639  // raise an error if the comment is not preceded by whitespace
640  if(!line.begins_with('#'))
641  {
642  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str > line.str);
643  const char c = line[(size_t)(rem.str - line.str - 1)];
644  if(C4_UNLIKELY(c != ' ' && c != '\t'))
645  _c4err("comment not preceded by whitespace");
646  }
647  else
648  {
649  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str == line.str);
650  }
651  _c4dbgpf("comment was '{}'", rem);
652  _line_progressed(rem.len);
653 }
654 
655 template<class EventHandler>
656 void ParseEngine<EventHandler>::_maybe_skip_comment()
657 {
658  csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
659  if(s.begins_with('#'))
660  {
661  _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
662  _skip_comment();
663  }
664 }
665 
666 template<class EventHandler>
667 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
668 {
669  if(m_evt_handler->m_curr->line_contents.rem.len)
670  {
671  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
672  {
673  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
674  if(pos == npos)
675  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
676  _c4dbgpf("skip {}x'{}'", pos, ' ');
677  _line_progressed(pos);
678  }
679  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
680  {
681  if(m_evt_handler->m_curr->line_contents.rem.len == 1
682  || m_evt_handler->m_curr->line_contents.rem.str[1] == ' '
683  _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[1] == '\t')
684  )
685  {
686  _c4dbgp("found ':' colon next");
687  _line_progressed(1);
688  return true;
689  }
690  }
691  }
692  return false;
693 }
694 
695 template<class EventHandler>
696 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
697 {
698  if(m_evt_handler->m_curr->line_contents.rem.len)
699  {
700  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
701  {
702  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
703  if(pos == npos)
704  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
705  _c4dbgpf("skip {}x'{}'", pos, ' ');
706  _line_progressed(pos);
707  }
708  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
709  {
710  _c4dbgp("found ',' comma next");
711  _line_progressed(1);
712  return true;
713  }
714  }
715  return false;
716 }
717 
718 
719 //-----------------------------------------------------------------------------
720 
721 template<class EventHandler>
722 csubstr ParseEngine<EventHandler>::_scan_anchor()
723 {
724  csubstr s = m_evt_handler->m_curr->line_contents.rem;
725  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
726  csubstr anchor = s.range(1, s.first_of(' '));
727  _line_progressed(1u + anchor.len);
728  _maybe_skipchars(' ');
729  return anchor;
730 }
731 
732 template<class EventHandler>
733 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
734 {
735  csubstr s = m_evt_handler->m_curr->line_contents.rem;
736  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
737  csubstr ref = s.first(s.first_of(",] :"));
738  _line_progressed(ref.len);
739  return ref;
740 }
741 
742 template<class EventHandler>
743 csubstr ParseEngine<EventHandler>::_scan_ref_map()
744 {
745  csubstr s = m_evt_handler->m_curr->line_contents.rem;
746  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
747  csubstr ref = s.first(s.first_of(",} "));
748  _line_progressed(ref.len);
749  return ref;
750 }
751 
752 template<class EventHandler>
753 csubstr ParseEngine<EventHandler>::_scan_tag()
754 {
755  csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
756  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
757  csubstr t;
758  if(rem.begins_with("!!"))
759  {
760  _c4dbgp("begins with '!!'");
761  if(has_any(RFLOW))
762  t = rem.left_of(rem.first_of(" ,"));
763  else
764  t = rem.left_of(rem.first_of(' '));
765  }
766  else if(rem.begins_with("!<"))
767  {
768  _c4dbgp("begins with '!<'");
769  t = rem.left_of(rem.first_of('>'), true);
770  }
771  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
772  else if(rem.begins_with("!h!"))
773  {
774  _c4dbgp("begins with '!h!'");
775  t = rem.left_of(rem.first_of(' '));
776  }
777  #endif
778  else
779  {
780  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
781  _c4dbgp("begins with '!'");
782  if(has_any(RFLOW))
783  t = rem.left_of(rem.first_of(" ,"));
784  else
785  t = rem.left_of(rem.first_of(' '));
786  }
787  _line_progressed(t.len);
788  _maybe_skip_whitespace_tokens();
789  return t;
790 }
791 
792 
793 //-----------------------------------------------------------------------------
794 
795 template<class EventHandler>
796 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
797 {
798  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.empty());
799 
800  // it's not a scalar if it starts with any of these characters:
801  switch(s.str[0])
802  {
803  // these are all legal tokens which mean no scalar is starting:
804  case '[':
805  case ']':
806  case '{':
807  case '}':
808  case '!':
809  case '&':
810  case '*':
811  case '|':
812  case '>':
813  case '#':
814  case ',':
815  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
816  return false;
817  // '-' and ':' are illegal at the beginning if not followed by a scalar character
818  case '-':
819  case ':':
820  if(s.len > 1)
821  {
822  switch(s.str[1])
823  {
824  case ' ':
825  case '\n':
826  case '}':
827  case ']':
828  case '\r':
829  _RYML_WITH_TAB_TOKENS(case '\t':)
830  if(s.str[0] == ':')
831  {
832  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
833  return false;
834  }
835  break;
836  case '{':
837  case '[':
838  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
839  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
840  break;
841  default:
842  break;
843  }
844  }
845  else
846  {
847  return false;
848  }
849  break;
850  case '?':
851  if(s.len > 1)
852  {
853  switch(s.str[1])
854  {
855  case ' ':
856  case '\n':
857  case '\r':
858  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
859  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
860  return false;
861  case '{':
862  case '}':
863  case '[':
864  case ']':
865  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
866  break;
867  default:
868  break;
869  }
870  }
871  else
872  {
873  return false;
874  }
875  break;
876  // everything else is a legal starting character
877  default:
878  break;
879  }
880 
881  return true;
882 }
883 
884 template<class EventHandler>
885 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
886 {
887  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
888  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
889  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
890  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
891  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
892 
893  substr s = m_buf.sub(m_evt_handler->m_curr->pos.offset);
894  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
895  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
896 
897  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem));
898 
899  if(!s.len || !_is_valid_start_scalar_plain_flow(s))
900  return false;
901 
902  _c4dbgp("scanning seqflow scalar...");
903 
904  bool needs_filter = false;
905  size_t col = 0; // zero-based column
906  size_t offs = 0;
907  size_t offsp1;
908  for( ; offs < s.len; ++offs, ++col)
909  {
910  const char c = s.str[offs];
911  switch(c)
912  {
913  case ',':
914  case ']':
915  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
916  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
917  goto ended_scalar;
918  case '\n':
919  _c4dbgpf("found newline. offs={} col={}", offs, col);
920  offsp1 = offs + 1;
921  if(s.len > offsp1)
922  {
923  csubstr next_line = s.sub(offsp1).triml(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
924  if(next_line.begins_with_any(",]#")) // any of the characters we're interested in
925  {
926  _c4dbgpf("found terminating character beginning next line: '{}'", next_line.str[0]);
927  goto ended_scalar;
928  }
929  }
930  col = (size_t)-1; // so that col is 0 in the next loop iteration
931  needs_filter = true;
932  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
933  _line_ended();
934  _scan_line();
935  break;
936  case '\r':
937  --col; // don't count \r when calling _line_progressed()
938  needs_filter = true;
939  break;
940  case ':':
941  _c4dbgp("found suspicious ':'");
942  offsp1 = offs + 1;
943  if(s.len > offsp1)
944  {
945  char next = s.str[offsp1];
946  _c4dbgpf("next char is '{}'", _c4prc(next));
947  if(next == '\r')
948  {
949  csubstr after = s.sub(offsp1).triml('\r');
950  if(after.len)
951  {
952  next = after.str[0];
953  _c4dbgpf("skip \\r to '{}'", _c4prc(next));
954  }
955  }
956  // no else here.
957  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t') || next == ',' || next == '\n' || next == ']')
958  {
959  _c4dbgp("map starting!");
960  goto ended_scalar;
961  }
962  else
963  {
964  _c4dbgp("':' nothing to see here");
965  }
966  }
967  else
968  {
969  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.len == offsp1);
970  _line_progressed(col);
971  _c4err("missing termination: '{}'", c); // noreturn
972  }
973  break;
974  case '#':
975  {
976  _c4dbgp("found suspicious '#'");
977  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
978  char prev = s.str[offs - 1];
979  if(prev == ' ' _RYML_WITH_TAB_TOKENS(|| prev == '\t'))
980  {
981  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
982  goto ended_scalar;
983  }
984  }
985  break;
986  case '[':
987  case '{':
988  case '}':
989  _line_progressed(col);
990  _c4err("invalid character: '{}'", c); // noreturn
991  default:
992  ;
993  }
994  }
995 
996 ended_scalar:
997 
998  _line_progressed(col);
999  s = s.first(offs);
1000  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1001  sc->needs_filter = needs_filter;
1002 
1003  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1004 
1005  return true;
1006 }
1007 
1008 template<class EventHandler>
1009 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1010 {
1011  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
1012  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
1013  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
1014  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
1015  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1016 
1017  substr s = m_evt_handler->m_curr->line_contents.rem;
1018  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1019 
1020  if(!s.len)
1021  return false;
1022 
1023  if(!_is_valid_start_scalar_plain_flow(s))
1024  return false;
1025 
1026  _c4dbgp("scanning scalar...");
1027 
1028  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1029  bool needs_filter = false;
1030  while(true)
1031  {
1032  for(size_t i = 0; i < s.len; ++i)
1033  {
1034  const char c = s.str[i];
1035  switch(c)
1036  {
1037  case ',':
1038  case '}':
1039  _line_progressed(i);
1040  _c4dbgpf("found terminating character: '{}'", c);
1041  goto ended_scalar;
1042  case ':':
1043  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
1044  {
1045  _line_progressed(i);
1046  _c4dbgpf("found terminating character: '{}'", c);
1047  goto ended_scalar;
1048  }
1049  break;
1050  case '{':
1051  case '[':
1052  _line_progressed(i);
1053  _c4err("invalid character: '{}'", c); // noreturn
1054  break;
1055  case ']':
1056  _line_progressed(i);
1057  if(has_any(RSEQIMAP))
1058  goto ended_scalar;
1059  else
1060  _c4err("invalid character: '{}'", c); // noreturn
1061  break;
1062  case '#':
1063  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
1064  {
1065  _line_progressed(i);
1066  _c4dbgpf("found terminating character: '{}'", c);
1067  goto ended_scalar;
1068  }
1069  break;
1070  default:
1071  ;
1072  }
1073  }
1074  _c4dbgp("next line!");
1075  _line_progressed(s.len);
1076  if(!_finished_file())
1077  {
1078  _c4dbgp("next line!");
1079  _line_ended();
1080  _scan_line();
1081  }
1082  else
1083  {
1084  _c4dbgp("file finished!");
1085  goto ended_scalar;
1086  }
1087  s = m_evt_handler->m_curr->line_contents.rem;
1088  needs_filter = true;
1089  }
1090 
1091 ended_scalar:
1092 
1093  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1094  sc->needs_filter = needs_filter;
1095 
1096  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1097 
1098  return sc->scalar.len > 0u;
1099 }
1100 
1101 template<class EventHandler>
1102 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1103 {
1104  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1105  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
1106  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1107  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
1108 
1109  substr s = m_evt_handler->m_curr->line_contents.rem;
1110  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1111 
1112  if(!s.len)
1113  return false;
1114 
1115  _c4dbgp("scanning scalar...");
1116 
1117  switch(s.str[0])
1118  {
1119  case ']':
1120  case '{':
1121  case ',':
1122  _c4dbgp("not a scalar.");
1123  return false;
1124  }
1125 
1126  {
1127  const size_t len = _is_special_json_scalar(s);
1128  if(len)
1129  {
1130  sc->scalar = s.first(len);
1131  sc->needs_filter = false;
1132  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1133  _line_progressed(len);
1134  return true;
1135  }
1136  }
1137 
1138  // must be a number
1139  size_t i = 0;
1140  for( ; i < s.len; ++i)
1141  {
1142  const char c = s.str[i];
1143  switch(c)
1144  {
1145  case ',':
1146  case ']':
1147  case ' ':
1148  case '\t':
1149  _c4dbgpf("found terminating character: '{}'", c);
1150  goto ended_scalar;
1151  case '#':
1152  if(!i || s.str[i-1] == ' ')
1153  {
1154  _c4dbgpf("found terminating character: '{}'", c);
1155  goto ended_scalar;
1156  }
1157  break;
1158  default:
1159  ;
1160  }
1161  }
1162 
1163 ended_scalar:
1164 
1165  if(C4_LIKELY(i > 0))
1166  {
1167  _line_progressed(i);
1168  sc->scalar = s.first(i);
1169  sc->needs_filter = false;
1170  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1171  return true;
1172  }
1173 
1174  return false;
1175 }
1176 
1177 template<class EventHandler>
1178 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1179 {
1180  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1181  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
1182  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1183  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
1184  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1185 
1186  substr s = m_evt_handler->m_curr->line_contents.rem;
1187  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1188 
1189  if(!s.len)
1190  return false;
1191 
1192  _c4dbgp("scanning scalar...");
1193 
1194  {
1195  const size_t len = _is_special_json_scalar(s);
1196  if(len)
1197  {
1198  sc->scalar = s.first(len);
1199  sc->needs_filter = false;
1200  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1201  _line_progressed(len);
1202  return true;
1203  }
1204  }
1205 
1206  // must be a number
1207  size_t i = 0;
1208  for( ; i < s.len; ++i)
1209  {
1210  const char c = s.str[i];
1211  switch(c)
1212  {
1213  case ',':
1214  case '}':
1215  case ' ':
1216  case '\t':
1217  _c4dbgpf("found terminating character: '{}'", c);
1218  goto ended_scalar;
1219  case '#':
1220  if(!i || s.str[i-1] == ' ')
1221  {
1222  _c4dbgpf("found terminating character: '{}'", c);
1223  goto ended_scalar;
1224  }
1225  break;
1226  default:
1227  ;
1228  }
1229  }
1230 
1231 ended_scalar:
1232 
1233  if(C4_LIKELY(i > 0))
1234  {
1235  _line_progressed(i);
1236  sc->scalar = s.first(i);
1237  sc->needs_filter = false;
1238  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1239  return true;
1240  }
1241 
1242  return false;
1243 }
1244 
1245 template<class EventHandler>
1246 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1247 {
1248  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1249  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1250 }
1251 
1252 template<class EventHandler>
1253 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1254 {
1255  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1256  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1257 }
1258 
1259 template<class EventHandler>
1260 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1261 {
1262  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1263  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1264  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY));
1265 
1266  substr s = m_evt_handler->m_curr->line_contents.rem;
1267  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1268 
1269  if(!s.len)
1270  return false;
1271 
1272  switch(s.str[0])
1273  {
1274  case '-':
1275  if(_is_blck_token(s))
1276  {
1277  return false;
1278  }
1279  else if(_is_doc_begin(s))
1280  {
1281  _c4dbgp("token is doc start");
1282  return false;
1283  }
1284  break;
1285  case ':':
1286  case '?':
1287  if(_is_blck_token(s))
1288  return false;
1289  break;
1290  case '[':
1291  case '{':
1292  case '&':
1293  case '*':
1294  case '!':
1295  _RYML_WITH_TAB_TOKENS(case '\t':)
1296  return false;
1297  case '.':
1298  if(_is_doc_end(s))
1299  {
1300  _c4dbgp("token is doc end");
1301  return false;
1302  }
1303  break;
1304  }
1305 
1306  _c4dbgpf("plain scalar! indentation={}", indentation);
1307 
1308  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1309  const size_t start_line = m_evt_handler->m_curr->pos.line;
1310 
1311  bool needs_filter = false;
1312  while(true)
1313  {
1314  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1315  for(size_t i = 0; i < s.len; ++i)
1316  {
1317  const char curr = s.str[i];
1318  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1319  switch(curr)
1320  {
1321  case ':':
1322  _c4dbgpf("[{}]: got suspicious ':'", i);
1323  // are there more characters?
1324  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1325  {
1326  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1327  _line_progressed(i);
1328  // ': ' is accepted only on the first line
1329  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1330  {
1331  _c4dbgp("start line. scalar ends here");
1332  goto ended_scalar;
1333  }
1334  else
1335  {
1336  _c4err("parse error");
1337  }
1338  }
1339  else
1340  {
1341  size_t j = i;
1342  while(j + 1 < s.len && s.str[j+1] == ':')
1343  {
1344  _c4dbgp("skip colon");
1345  ++j;
1346  }
1347  i = j > i ? j-1 : i;
1348  _c4dbgp("nothing to see here");
1349  }
1350  break;
1351  case '#':
1352  _c4dbgp("got suspicious '#'");
1353  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1354  {
1355  _c4dbgp("comment! scalar ends here");
1356  _line_progressed(i);
1357  goto ended_scalar;
1358  }
1359  else
1360  {
1361  _c4dbgp("nothing to see here");
1362  }
1363  break;
1364  }
1365  }
1366  _line_progressed(s.len);
1367  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1368  next_peeked = next_peeked.trimr("\n\r");
1369  const size_t next_indentation = next_peeked.first_not_of(' ');
1370  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1371  if(next_indentation < indentation)
1372  {
1373  _c4dbgp("smaller indentation! scalar ended");
1374  goto ended_scalar;
1375  }
1376  else if(next_indentation == 0 && next_peeked.len > 0)
1377  {
1378  const char first = next_peeked.str[0];
1379  switch(first)
1380  {
1381  case '-':
1382  next_peeked = next_peeked.trimr("\n\r");
1383  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1384  if(_is_doc_begin_token(next_peeked))
1385  {
1386  _c4dbgp("doc begin! scalar ended");
1387  goto ended_scalar;
1388  }
1389  break;
1390  case '.':
1391  next_peeked = next_peeked.trimr("\n\r");
1392  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1393  if(_is_doc_end_token(next_peeked))
1394  {
1395  _c4dbgp("doc end! scalar ended");
1396  goto ended_scalar;
1397  }
1398  break;
1399  }
1400  }
1401  // load with next line
1402  _c4dbgp("next line!");
1403  if(!_finished_file())
1404  {
1405  _c4dbgp("next line!");
1406  _line_ended();
1407  _scan_line();
1408  }
1409  else
1410  {
1411  _c4dbgp("file finished!");
1412  goto ended_scalar;
1413  }
1414  s = m_evt_handler->m_curr->line_contents.rem;
1415  needs_filter = true;
1416  }
1417 
1418 ended_scalar:
1419 
1420  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1421  sc->needs_filter = needs_filter;
1422 
1423  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1424 
1425  return true;
1426 }
1427 
1428 template<class EventHandler>
1429 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1430 {
1431  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1432  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1433  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1434  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1435  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK));
1436  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1437  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1438 }
1439 
1440 template<class EventHandler>
1441 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1442 {
1443  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1444  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1445  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1446  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK));
1447  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1448  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1449 }
1450 
1451 template<class EventHandler>
1452 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1453 {
1454  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1455  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1456 }
1457 
1458 
1459 //-----------------------------------------------------------------------------
1460 
1461 template<class EventHandler>
1462 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1463 {
1464  substr rem{}; // declare here because of the goto
1465  size_t nlpos{}; // declare here because of the goto
1466  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1467  if(pos >= m_buf.len)
1468  goto next_is_empty;
1469 
1470  // look for the next newline chars, and jump to the right of those
1471  rem = from_next_line(m_buf.sub(pos));
1472  if(rem.empty())
1473  goto next_is_empty;
1474 
1475  // now get everything up to and including the following newline chars
1476  nlpos = rem.first_of("\r\n");
1477  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1478  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1479  rem = rem.left_of(nlpos, /*include_pos*/true);
1480 
1481  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1482  return rem;
1483 
1484 next_is_empty:
1485  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1486  return {};
1487 }
1488 
1489 //-----------------------------------------------------------------------------
1490 
1491 template<class EventHandler>
1492 void ParseEngine<EventHandler>::_scan_line()
1493 {
1494  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1495  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1496  else
1497  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf.last(0), 0);
1498 }
1499 
1500 template<class EventHandler>
1501 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1502 {
1503  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1504  m_evt_handler->m_curr->pos.line,
1505  m_evt_handler->m_curr->line_contents.full.len,
1506  ahead, m_evt_handler->m_curr->pos.col,
1507  m_evt_handler->m_curr->pos.col+ahead,
1508  m_evt_handler->m_curr->pos.offset,
1509  m_evt_handler->m_curr->pos.offset+ahead);
1510  m_evt_handler->m_curr->pos.offset += ahead;
1511  m_evt_handler->m_curr->pos.col += ahead;
1512  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1);
1513  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1514 }
1515 
1516 template<class EventHandler>
1517 void ParseEngine<EventHandler>::_line_ended()
1518 {
1519  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1520  m_evt_handler->m_curr->pos.line,
1521  m_evt_handler->m_curr->line_contents.full.len,
1522  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1523  m_evt_handler->m_curr->pos.col, 1);
1524  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1);
1525  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1526  ++m_evt_handler->m_curr->pos.line;
1527  m_evt_handler->m_curr->pos.col = 1;
1528 }
1529 
1530 template<class EventHandler>
1531 void ParseEngine<EventHandler>::_line_ended_undo()
1532 {
1533  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1534  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1535  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols);
1536  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1537  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1538  m_evt_handler->m_curr->pos.offset -= delta;
1539  --m_evt_handler->m_curr->pos.line;
1540  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1541  // don't forget to undo also the changes to the remainder of the line
1542  //_RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
1543  m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1544 }
1545 
1546 
1547 //-----------------------------------------------------------------------------
1548 template<class EventHandler>
1549 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1550 {
1551  m_evt_handler->m_curr->indref = indentation;
1552  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1553 }
1554 
1555 template<class EventHandler>
1556 void ParseEngine<EventHandler>::_save_indentation()
1557 {
1558  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
1559  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1560  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1561 }
1562 
1563 
1564 //-----------------------------------------------------------------------------
1565 
1566 template<class EventHandler>
1567 void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1568 {
1569  if(_maybe_scan_following_colon())
1570  {
1571  _c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1572  m_evt_handler->actually_val_is_first_key_of_new_map_block();
1573  addrem_flags(RMAP|RVAL|RBLCK, RKCL|RUNK);
1574  _set_indentation(orig_indent);
1575  _maybe_skip_whitespace_tokens();
1576  }
1577 }
1578 
1579 template<class EventHandler>
1580 void ParseEngine<EventHandler>::_end_map_flow()
1581 {
1582  bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1583  size_t orig_indent = m_evt_handler->m_curr->indref;
1584  _c4dbgpf("mapflow: end, multiline={}", multiline);
1585  m_evt_handler->end_map_flow(multiline);
1586  if(has_none(RFLOW) && (has_any(RUNK|RSEQ) || m_was_inside_qmrk))
1587  _flow_container_was_a_key(orig_indent);
1588 }
1589 
1590 template<class EventHandler>
1591 void ParseEngine<EventHandler>::_end_seq_flow()
1592 {
1593  bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1594  size_t orig_indent = m_evt_handler->m_curr->indref;
1595  _c4dbgpf("seqflow: end, multiline={}", multiline);
1596  m_evt_handler->end_seq_flow(multiline);
1597  if(has_none(RFLOW) && (has_any(RUNK|RSEQ) || m_was_inside_qmrk))
1598  _flow_container_was_a_key(orig_indent);
1599 }
1600 
1601 template<class EventHandler>
1602 void ParseEngine<EventHandler>::_end_map_blck()
1603 {
1604  _c4dbgp("mapblck: end");
1605  if(has_any(RKCL|RVAL))
1606  {
1607  _c4dbgp("mapblck: set missing val");
1608  _handle_annotations_before_blck_val_scalar();
1609  m_evt_handler->set_val_scalar_plain_empty();
1610  }
1611  else if(has_any(QMRK))
1612  {
1613  _c4dbgp("mapblck: set missing keyval");
1614  _handle_annotations_before_blck_key_scalar();
1615  m_evt_handler->set_key_scalar_plain_empty();
1616  _handle_annotations_before_blck_val_scalar();
1617  m_evt_handler->set_val_scalar_plain_empty();
1618  }
1619  m_evt_handler->end_map_block();
1620 }
1621 
1622 template<class EventHandler>
1623 void ParseEngine<EventHandler>::_end_seq_blck()
1624 {
1625  if(has_any(RVAL))
1626  {
1627  _c4dbgp("seqblck: set missing val");
1628  _handle_annotations_before_blck_val_scalar();
1629  m_evt_handler->set_val_scalar_plain_empty();
1630  }
1631  m_evt_handler->end_seq_block();
1632 }
1633 
1634 template<class EventHandler>
1635 void ParseEngine<EventHandler>::_end2_map()
1636 {
1637  _c4dbgp("map: end");
1638  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1639  if(has_any(RBLCK))
1640  {
1641  _end_map_blck();
1642  }
1643  else
1644  {
1645  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1646  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1647  m_evt_handler->_pop();
1648  }
1649 }
1650 
1651 template<class EventHandler>
1652 void ParseEngine<EventHandler>::_end2_seq()
1653 {
1654  _c4dbgp("seq: end");
1655  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1656  if(has_any(RBLCK))
1657  {
1658  _end_seq_blck();
1659  }
1660  else
1661  {
1662  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1663  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1664  m_evt_handler->_pop();
1665  }
1666 }
1667 
1668 template<class EventHandler>
1669 void ParseEngine<EventHandler>::_begin2_doc()
1670 {
1671  _c4dbgp("begin_doc");
1672  m_doc_empty = true;
1673  add_flags(RDOC);
1674  m_evt_handler->begin_doc();
1675  m_evt_handler->m_curr->indref = 0; // ?
1676 }
1677 
1678 template<class EventHandler>
1679 void ParseEngine<EventHandler>::_begin2_doc_expl()
1680 {
1681  _c4dbgp("begin_doc_expl");
1682  m_doc_empty = true;
1683  add_flags(RDOC);
1684  m_evt_handler->begin_doc_expl();
1685  m_evt_handler->m_curr->indref = 0; // ?
1686 }
1687 
1688 template<class EventHandler>
1689 void ParseEngine<EventHandler>::_end2_doc()
1690 {
1691  _c4dbgp("doc: end");
1692  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1693  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1694  {
1695  _c4dbgp("doc was empty; add empty val");
1696  _handle_annotations_before_blck_val_scalar();
1697  m_evt_handler->set_val_scalar_plain_empty();
1698  }
1699  m_evt_handler->end_doc();
1700  m_bom_len = 0;
1701 }
1702 
1703 template<class EventHandler>
1704 void ParseEngine<EventHandler>::_end2_doc_expl()
1705 {
1706  _c4dbgp("doc: end");
1707  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1708  {
1709  _c4dbgp("doc: no children; add empty val");
1710  _handle_annotations_before_blck_val_scalar();
1711  m_evt_handler->set_val_scalar_plain_empty();
1712  }
1713  m_evt_handler->end_doc_expl();
1714  m_bom_len = 0;
1715 }
1716 
1717 template<class EventHandler>
1718 void ParseEngine<EventHandler>::_maybe_begin_doc()
1719 {
1720  if(has_none(RDOC))
1721  {
1722  _c4dbgp("doc must be started");
1723  _begin2_doc();
1724  }
1725 }
1726 template<class EventHandler>
1727 void ParseEngine<EventHandler>::_maybe_end_doc()
1728 {
1729  if(has_any(RDOC))
1730  {
1731  _c4dbgp("doc must be finished");
1732  _end2_doc();
1733  }
1734  else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1735  {
1736  _c4dbgp("no doc to finish, but pending annotations");
1737  m_evt_handler->begin_doc();
1738  _handle_annotations_before_blck_val_scalar();
1739  m_evt_handler->set_val_scalar_plain_empty();
1740  m_evt_handler->end_doc();
1741  }
1742 }
1743 
1744 template<class EventHandler>
1745 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1746 {
1747  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1748  if(m_evt_handler->m_stack[0].flags & RDOC)
1749  {
1750  _c4dbgp("root is RDOC");
1751  if(m_evt_handler->m_curr->level != 0)
1752  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1753  }
1754  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1755  {
1756  _c4dbgp("root is STREAM");
1757  if(m_evt_handler->m_curr->level != 1)
1758  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1759  }
1760  else
1761  {
1762  _c4err("internal error");
1763  }
1764  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1765 }
1766 
1767 template<class EventHandler>
1768 void ParseEngine<EventHandler>::_end_doc_suddenly()
1769 {
1770  _c4dbgp("end doc suddenly");
1771  _end_doc_suddenly__pop();
1772  _end2_doc_expl();
1773  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1774 }
1775 
1776 template<class EventHandler>
1777 void ParseEngine<EventHandler>::_start_doc_suddenly()
1778 {
1779  _c4dbgp("start doc suddenly");
1780  _end_doc_suddenly__pop();
1781  _end2_doc();
1782  _begin2_doc_expl();
1783 }
1784 
1785 template<class EventHandler>
1786 void ParseEngine<EventHandler>::_end_stream()
1787 {
1788  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1789  if(has_all(RSEQ|RFLOW))
1790  _c4err("missing terminating ]");
1791  else if(has_all(RMAP|RFLOW))
1792  _c4err("missing terminating }");
1793  if(m_evt_handler->m_stack.size() > 1)
1794  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1795  if(has_all(RDOC))
1796  {
1797  _end2_doc();
1798  }
1799  else if(has_all(RTOP|RUNK))
1800  {
1801  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1802  {
1803  if(m_doc_empty)
1804  {
1805  m_evt_handler->begin_doc();
1806  _handle_annotations_before_blck_val_scalar();
1807  m_evt_handler->set_val_scalar_plain_empty();
1808  m_evt_handler->end_doc();
1809  }
1810  }
1811  }
1812  m_evt_handler->end_stream();
1813 }
1814 
1815 
1816 template<class EventHandler>
1817 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1818 {
1819  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1820  while(m_evt_handler->m_curr != popto)
1821  {
1822  if(has_any(RSEQ))
1823  {
1824  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1825  _end2_seq();
1826  }
1827  else if(has_any(RMAP))
1828  {
1829  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1830  _end2_map();
1831  }
1832  else
1833  {
1834  break;
1835  }
1836  }
1837  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1838 }
1839 
1840 template<class EventHandler>
1841 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1842 {
1843  // search the stack frame to jump to based on its indentation
1844  using state_type = typename EventHandler::state;
1845  state_type const* popto = nullptr;
1846  auto &stack = m_evt_handler->m_stack;
1847  _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1848  _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1849  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1850  #ifdef RYML_DBG
1851  _print_state_stack();
1852  #endif
1853  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1854  {
1855  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1856  if(s->indref == ind)
1857  {
1858  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1859  popto = s;
1860  break;
1861  }
1862  }
1863  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1864  {
1865  _c4err("parse error: incorrect indentation?");
1866  }
1867  _handle_indentation_pop(popto);
1868 }
1869 
1870 template<class EventHandler>
1871 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1872 {
1873  // search the stack frame to jump to based on its indentation
1874  using state_type = typename EventHandler::state;
1875  auto &stack = m_evt_handler->m_stack;
1876  _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1877  _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1878  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1879  state_type const* popto = nullptr;
1880  #ifdef RYML_DBG
1881  char flagbuf_[128];
1882  _print_state_stack(flagbuf_);
1883  #endif
1884  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1885  {
1886  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1887  if(s->indref < ind)
1888  {
1889  break;
1890  }
1891  else if(s->indref == ind)
1892  {
1893  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1894  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1895  {
1896  break;
1897  }
1898  popto = s;
1899  if(has_all(RSEQ|RBLCK, s))
1900  {
1901  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1902  const size_t first = rem.first_not_of(' ');
1903  _RYML_ASSERT_BASIC_(stack.m_callbacks, first == ind || first == npos);
1904  rem = rem.right_of(first, true);
1905  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1906  if(rem.begins_with('-') && _is_blck_token(rem))
1907  {
1908  _c4dbgp("parent was indentless seq");
1909  break;
1910  }
1911  }
1912  }
1913  }
1914  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1915  {
1916  _c4err("parse error: incorrect indentation?");
1917  }
1918  _handle_indentation_pop(popto);
1919 }
1920 
1921 
1922 //-----------------------------------------------------------------------------
1923 template<class EventHandler>
1924 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1925 {
1926  // quoted scalars can spread over multiple lines!
1927  // nice explanation here: http://yaml-multiline.info/
1928 
1929  // a span to the end of the file
1930  size_t b = m_evt_handler->m_curr->pos.offset;
1931  substr s = m_buf.sub(b);
1932  if(s.begins_with(' '))
1933  {
1934  s = s.triml(' ');
1935  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1936  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1937  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1938  }
1939  b = m_evt_handler->m_curr->pos.offset; // take this into account
1940  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1941 
1942  // skip the opening quote
1943  _line_progressed(1);
1944  s = s.sub(1);
1945 
1946  bool needs_filter = false;
1947 
1948  size_t numlines = 1; // we already have one line
1949  size_t pos = npos; // find the pos of the matching quote
1950  while( ! _finished_file())
1951  {
1952  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1953  bool line_is_blank = true;
1954  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1955  for(size_t i = 0; i < line.len; ++i)
1956  {
1957  const char curr = line.str[i];
1958  if(curr == '\'') // single quotes are escaped with two single quotes
1959  {
1960  const char next = i+1 < line.len ? line.str[i+1] : '~';
1961  if(next != '\'') // so just look for the first quote
1962  { // without another after it
1963  pos = i;
1964  break;
1965  }
1966  else
1967  {
1968  needs_filter = true; // needs filter to remove escaped quotes
1969  ++i; // skip the escaped quote
1970  }
1971  }
1972  else if(curr != ' ')
1973  {
1974  line_is_blank = false;
1975  }
1976  }
1977 
1978  // leading whitespace also needs filtering
1979  needs_filter = needs_filter
1980  || (numlines > 1)
1981  || line_is_blank
1982  || (_at_line_begin() && line.begins_with(' '));
1983 
1984  if(pos == npos)
1985  {
1986  _line_progressed(line.len);
1987  ++numlines;
1988  }
1989  else
1990  {
1991  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1992  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
1993  _line_progressed(pos + 1); // progress beyond the quote
1994  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1995  break;
1996  }
1997 
1998  _line_ended();
1999  _scan_line();
2000  }
2001 
2002  if(pos == npos)
2003  {
2004  _c4err("reached end of file while looking for closing quote");
2005  }
2006  else
2007  {
2008  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
2009  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2010  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
2011  s = s.sub(0, pos-1);
2012  }
2013 
2014  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
2015 
2016  return ScannedScalar { s, needs_filter };
2017 }
2018 
2019 
2020 //-----------------------------------------------------------------------------
2021 template<class EventHandler>
2022 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2023 {
2024  // quoted scalars can spread over multiple lines!
2025  // nice explanation here: http://yaml-multiline.info/
2026 
2027  // a span to the end of the file
2028  size_t b = m_evt_handler->m_curr->pos.offset;
2029  substr s = m_buf.sub(b);
2030  if(s.begins_with(' '))
2031  {
2032  s = s.triml(' ');
2033  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
2034  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
2035  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
2036  }
2037  b = m_evt_handler->m_curr->pos.offset; // take this into account
2038  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
2039 
2040  // skip the opening quote
2041  _line_progressed(1);
2042  s = s.sub(1);
2043 
2044  bool needs_filter = false;
2045 
2046  size_t numlines = 1; // we already have one line
2047  size_t pos = npos; // find the pos of the matching quote
2048  auto *st = m_evt_handler->m_curr; // prevent erroneous hoist of the assignment out of the loop
2049  while( ! _finished_file())
2050  {
2051  const csubstr line = st->line_contents.rem;
2052  #if defined(__GNUC__) && (__GNUC__ == 11 || __GNUC__ == 8)
2053  C4_DONT_OPTIMIZE(line); // prevent erroneous hoist of the assignment out of the loop
2054  #endif
2055  bool line_is_blank = true;
2056  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", st->pos.line, line);
2057  for(size_t i = 0; i < line.len; ++i)
2058  {
2059  const char curr = line.str[i];
2060  if(curr != ' ')
2061  line_is_blank = false;
2062  // every \ is an escape
2063  if(curr == '\\')
2064  {
2065  const char next = i+1 < line.len ? line.str[i+1] : '~';
2066  needs_filter = true;
2067  if(next == '"' || next == '\\')
2068  ++i;
2069  }
2070  else if(curr == '"')
2071  {
2072  pos = i;
2073  break;
2074  }
2075  }
2076 
2077  // leading whitespace also needs filtering
2078  needs_filter = needs_filter
2079  || (numlines > 1)
2080  || line_is_blank
2081  || (_at_line_begin() && line.begins_with(' '));
2082 
2083  if(pos == npos)
2084  {
2085  _line_progressed(line.len);
2086  ++numlines;
2087  }
2088  else
2089  {
2090  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2091  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[st->pos.offset + pos] == '"');
2092  _line_progressed(pos + 1); // progress beyond the quote
2093  pos = st->pos.offset - b - 1; // but we stop before it
2094  break;
2095  }
2096 
2097  _line_ended();
2098  _scan_line();
2099  }
2100 
2101  if(pos == npos)
2102  {
2103  _c4err("reached end of file looking for closing quote");
2104  }
2105  else
2106  {
2107  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
2108  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
2109  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2110  s = s.sub(0, pos-1);
2111  }
2112 
2113  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2114 
2115  return ScannedScalar{s, needs_filter};
2116 }
2117 
2118 
2119 //-----------------------------------------------------------------------------
2120 template<class EventHandler>
2121 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2122 {
2123  _c4dbgpf("blck: indref={}", indref);
2124  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, indref != npos);
2125 
2126  // nice explanation here: http://yaml-multiline.info/
2127  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2128  csubstr trimmed = s.triml(' ');
2129  if(trimmed.str > s.str)
2130  {
2131  _c4dbgp("skipping whitespace");
2132  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2133  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2134  s = trimmed;
2135  }
2136  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2137 
2138  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2139 
2140  // parse the spec
2141  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2142  size_t indentation = npos; // have to find out if no spec is given
2143  csubstr digits;
2144  if(s.len > 1)
2145  {
2146  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2147  csubstr t = s.sub(1);
2148  _c4dbgpf("blck: spec is multichar: '{}'", t);
2149  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2150  size_t pos = t.first_of("-+");
2151  _c4dbgpf("blck: spec chomp char at {}", pos);
2152  if(pos != npos)
2153  {
2154  if(t[pos] == '-')
2155  chomp = CHOMP_STRIP;
2156  else if(t[pos] == '+')
2157  chomp = CHOMP_KEEP;
2158  if(pos == 0)
2159  t = t.sub(1);
2160  else
2161  t = t.first(pos);
2162  }
2163  // from here to the end, only digits are considered
2164  digits = t.left_of(t.first_not_of("0123456789"));
2165  if( ! digits.empty())
2166  {
2167  if(C4_UNLIKELY(digits.len > 1))
2168  _c4err("parse error: invalid indentation");
2169  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2170  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2171  _c4err("parse error: could not read indentation as decimal");
2172  if(C4_UNLIKELY( ! indentation))
2173  _c4err("parse error: null indentation");
2174  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2175  indentation += m_evt_handler->m_curr->indref;
2176  }
2177  }
2178 
2179  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2180 
2181  // finish the current line
2182  _line_progressed(s.len);
2183  _line_ended();
2184  _scan_line();
2185 
2186  // start with a zero-length block, already pointing at the right place
2187  substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
2188  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str);
2189 
2190  // read every full line into a raw block,
2191  // from which newlines are to be stripped as needed.
2192  //
2193  // If no explicit indentation was given, pick it from the first
2194  // non-empty line. See
2195  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2196  size_t num_lines = 0;
2197  size_t first = m_evt_handler->m_curr->pos.line;
2198  size_t provisional_indentation = npos;
2199  LineContents lc;
2200  while(( ! _finished_file()))
2201  {
2202  // peek next line, but do not advance immediately
2203  lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2204  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2205  C4_DONT_OPTIMIZE(lc.rem);
2206  #endif
2207  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.rem.trimr("\r\n").len, lc.rem.trimr("\r\n"));
2208  // evaluate termination conditions
2209  if(indentation != npos)
2210  {
2211  _c4dbgpf("blck: indentation={}", indentation);
2212  // stop when the line is deindented and not empty
2213  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2214  {
2215  if(raw_block.len)
2216  {
2217  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2218  }
2219  else
2220  {
2221  _c4err("indentation decreased without any scalar");
2222  }
2223  break;
2224  }
2225  else if(indentation == 0)
2226  {
2227  _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2228  if(_is_doc_token(lc.rem))
2229  {
2230  _c4dbgp("blck: stop. indentation=0 and doc ended");
2231  break;
2232  }
2233  }
2234  }
2235  else
2236  {
2237  const size_t fns = lc.rem.first_not_of(' ');
2238  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2239  if(fns != npos) // non-empty line
2240  {
2242  if(C4_UNLIKELY(lc.full.begins_with('\t')))
2243  _c4err("parse error");
2244  )
2245  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2246  if(provisional_indentation == npos)
2247  {
2248  if(lc.indentation < indref)
2249  {
2250  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2251  if(raw_block.len == 0)
2252  {
2253  _c4dbgp("blck: was empty, undo next line");
2254  _line_ended_undo();
2255  }
2256  break;
2257  }
2258  else if(lc.indentation == m_evt_handler->m_curr->indref)
2259  {
2260  if(has_any(RSEQ|RMAP))
2261  {
2262  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2263  break;
2264  }
2265  }
2266  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2267  indentation = lc.indentation;
2268  }
2269  else
2270  {
2271  if(lc.indentation >= provisional_indentation)
2272  {
2273  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2274  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2275  indentation = lc.indentation;
2276  }
2277  else
2278  {
2279  break;
2280  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2281  }
2282  }
2283  }
2284  else // empty line
2285  {
2286  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2287  if(provisional_indentation != npos)
2288  {
2289  if(lc.rem.len >= provisional_indentation)
2290  {
2291  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2292  provisional_indentation = lc.rem.len;
2293  }
2294  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2295  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2296  {
2297  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2298  provisional_indentation = lc.indentation;
2299  }
2300  #endif
2301  }
2302  else
2303  {
2304  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2305  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2306  if(provisional_indentation == npos)
2307  {
2308  provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2309  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2310  }
2311  if(provisional_indentation < indref)
2312  {
2313  provisional_indentation = indref;
2314  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2315  }
2316  }
2317  }
2318  }
2319  // advance now that we know the folded scalar continues
2320  m_evt_handler->m_curr->line_contents = lc;
2321  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2322  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2323  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2324  _line_ended();
2325  ++num_lines;
2326  }
2327  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2328  C4_UNUSED(num_lines);
2329  C4_UNUSED(first);
2330 
2331  if(indentation == npos)
2332  {
2333  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2334  indentation = provisional_indentation;
2335  }
2336 
2337  if(num_lines)
2338  _line_ended_undo();
2339 
2340  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2341 
2342  sb->scalar = raw_block;
2343  sb->indentation = indentation;
2344  sb->chomp = chomp;
2345 }
2346 
2347 
2348 //-----------------------------------------------------------------------------
2349 //-----------------------------------------------------------------------------
2350 //-----------------------------------------------------------------------------
2351 /** @cond dev */
2352 
2353 // a debugging scaffold:
2354 #if 0
2355 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2356 #else
2357 #define _c4dbgfws(...)
2358 #endif
2359 
2360 template<class EventHandler>
2361 template<class FilterProcessor>
2362 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2363 {
2364  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2365  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2366 
2367  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2368  if(first_pos != npos)
2369  {
2370  const char first_char = proc.src[first_pos];
2371  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2372  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2373  {
2374  _c4dbgfws("whitespace is trailing on line", "");
2375  proc.skip(first_pos - proc.rpos);
2376  }
2377  else // a legit whitespace
2378  {
2379  proc.copy();
2380  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2381  }
2382  return true;
2383  }
2384  _c4dbgfws("whitespace is trailing on line", "");
2385  return false;
2386 }
2387 
2388 template<class EventHandler>
2389 template<class FilterProcessor>
2390 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2391 {
2392  if(!_filter_ws_handle_to_first_non_space(proc))
2393  {
2394  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2395  proc.copy(proc.src.len - proc.rpos);
2396  }
2397 }
2398 
2399 template<class EventHandler>
2400 template<class FilterProcessor>
2401 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2402 {
2403  if(!_filter_ws_handle_to_first_non_space(proc))
2404  {
2405  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2406  proc.skip(proc.src.len - proc.rpos);
2407  }
2408 }
2409 
2410 #undef _c4dbgfws
2411 
2412 
2413 //-----------------------------------------------------------------------------
2414 //-----------------------------------------------------------------------------
2415 //-----------------------------------------------------------------------------
2416 /* plain scalars */
2417 
2418 // a debugging scaffold:
2419 #if 0
2420 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2421 #else
2422 #define _c4dbgfps(fmt, ...)
2423 #endif
2424 
2425 template<class EventHandler>
2426 template<class FilterProcessor>
2427 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2428 {
2429  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
2430 
2431  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2432  size_t ii = proc.rpos;
2433  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2434  if(numnl_following)
2435  {
2436  proc.set('\n', numnl_following);
2437  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2438  }
2439  else
2440  {
2441  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2442  if(ret != npos)
2443  {
2444  proc.set(' ');
2445  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2446  }
2447  else
2448  {
2449  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2450  ii = proc.src.len;
2451  }
2452  }
2453  proc.rpos = ii;
2454 }
2455 
2456 template<class EventHandler>
2457 template<class FilterProcessor>
2458 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2459 {
2460  _RYML_ASSERT_BASIC_(this->callbacks(), indentation != npos);
2461  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2462 
2463  while(proc.has_more_chars())
2464  {
2465  const char curr = proc.curr();
2466  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2467  switch(curr)
2468  {
2469  case ' ':
2470  _RYML_WITH_TAB_TOKENS(case '\t':)
2471  _c4dbgfps("whitespace", curr);
2472  _filter_ws_skip_trailing(proc);
2473  break;
2474  case '\n':
2475  _c4dbgfps("newline", curr);
2476  _filter_nl_plain(proc, /*indentation*/indentation);
2477  break;
2478  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2479  _c4dbgfps("carriage return, ignore", curr);
2480  proc.skip();
2481  break;
2482  default:
2483  proc.copy();
2484  break;
2485  }
2486  }
2487 
2488  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2489 
2490  return proc.result();
2491 }
2492 
2493 #undef _c4dbgfps
2494 
2495 
2496 template<class EventHandler>
2497 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2498 {
2499  FilterProcessorSrcDst proc(scalar, dst);
2500  return _filter_plain(proc, indentation);
2501 }
2502 
2503 template<class EventHandler>
2504 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2505 {
2506  FilterProcessorInplaceEndExtending proc(dst, cap);
2507  return _filter_plain(proc, indentation);
2508 }
2509 
2510 
2511 //-----------------------------------------------------------------------------
2512 //-----------------------------------------------------------------------------
2513 //-----------------------------------------------------------------------------
2514 /* single quoted */
2515 
2516 // a debugging scaffold:
2517 #if 0
2518 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2519 #else
2520 #define _c4dbgfsq(fmt, ...)
2521 #endif
2522 
2523 template<class EventHandler>
2524 template<class FilterProcessor>
2525 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2526 {
2527  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
2528 
2529  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2530  size_t ii = proc.rpos;
2531  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2532  if(numnl_following)
2533  {
2534  proc.set('\n', numnl_following);
2535  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2536  }
2537  else
2538  {
2539  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2540  if(ret != npos)
2541  {
2542  proc.set(' ');
2543  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2544  }
2545  else
2546  {
2547  proc.set(' ');
2548  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2549  }
2550  }
2551  proc.rpos = ii;
2552 }
2553 
2554 template<class EventHandler>
2555 template<class FilterProcessor>
2556 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2557 {
2558  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2559 
2560  // from the YAML spec for double-quoted scalars:
2561  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2562  while(proc.has_more_chars())
2563  {
2564  const char curr = proc.curr();
2565  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2566  switch(curr)
2567  {
2568  case ' ':
2569  case '\t':
2570  _c4dbgfsq("whitespace", curr);
2571  _filter_ws_copy_trailing(proc);
2572  break;
2573  case '\n':
2574  _c4dbgfsq("newline", curr);
2575  _filter_nl_squoted(proc);
2576  break;
2577  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2578  _c4dbgfsq("skip cr", curr);
2579  proc.skip();
2580  break;
2581  case '\'':
2582  _c4dbgfsq("squote", curr);
2583  if(proc.next() == '\'')
2584  {
2585  _c4dbgfsq("two consecutive squotes", curr);
2586  proc.skip();
2587  proc.copy();
2588  }
2589  else
2590  {
2591  _c4err("filter error");
2592  }
2593  break;
2594  default:
2595  proc.copy();
2596  break;
2597  }
2598  }
2599 
2600  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2601 
2602  return proc.result();
2603 }
2604 
2605 #undef _c4dbgfsq
2606 
2607 template<class EventHandler>
2608 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2609 {
2610  FilterProcessorSrcDst proc(scalar, dst);
2611  return _filter_squoted(proc);
2612 }
2613 
2614 template<class EventHandler>
2615 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2616 {
2617  FilterProcessorInplaceEndExtending proc(dst, cap);
2618  return _filter_squoted(proc);
2619 }
2620 
2621 
2622 //-----------------------------------------------------------------------------
2623 //-----------------------------------------------------------------------------
2624 //-----------------------------------------------------------------------------
2625 /* double quoted */
2626 
2627 // a debugging scaffold:
2628 #if 0
2629 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2630 #else
2631 #define _c4dbgfdq(...)
2632 #endif
2633 
2634 template<class EventHandler>
2635 template<class FilterProcessor>
2636 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2637 {
2638  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
2639 
2640  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2641  size_t ii = proc.rpos;
2642  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2643  if(numnl_following)
2644  {
2645  proc.set('\n', numnl_following);
2646  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2647  }
2648  else
2649  {
2650  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2651  if(ret != npos)
2652  {
2653  proc.set(' ');
2654  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2655  }
2656  else
2657  {
2658  proc.set(' ');
2659  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2660  }
2661  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2662  {
2663  _c4dbgfdq("backslash at [{}]", ii);
2664  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2665  if(next == ' ' || next == '\t')
2666  {
2667  _c4dbgfdq("extend skip to backslash", "");
2668  ++ii;
2669  }
2670  }
2671  }
2672  proc.rpos = ii;
2673 }
2674 
2675 template<class EventHandler>
2676 template<class FilterProcessor>
2677 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2678 {
2679  char next = proc.next();
2680  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2681  if(next == '\r')
2682  {
2683  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2684  {
2685  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2686  next = '\n';
2687  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2688  }
2689  }
2690 
2691  if(next == '\n')
2692  {
2693  size_t ii = proc.rpos + 2;
2694  for( ; ii < proc.src.len; ++ii)
2695  {
2696  // skip leading whitespace
2697  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2698  ;
2699  else
2700  break;
2701  }
2702  proc.skip(ii - proc.rpos);
2703  }
2704  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2705  {
2706  // escapes for json compatibility
2707  proc.translate_esc(next);
2708  _c4dbgfdq("here, used '{}'", _c4prc(next));
2709  }
2710  else if(next == '\r')
2711  {
2712  proc.skip();
2713  }
2714  else if(next == 'n')
2715  {
2716  proc.translate_esc('\n');
2717  }
2718  else if(next == 'r')
2719  {
2720  proc.translate_esc('\r');
2721  }
2722  else if(next == 't')
2723  {
2724  proc.translate_esc('\t');
2725  }
2726  else if(next == '\\')
2727  {
2728  proc.translate_esc('\\');
2729  }
2730  else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2731  {
2732  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2733  _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2734  char readbuf[8];
2735  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2736  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2737  uint32_t codepoint_val = {};
2738  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2739  _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2740  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2741  if(C4_UNLIKELY(numbytes == 0))
2742  _c4err("failed to decode code point={}", proc.rpos);
2743  _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2744  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/3u);
2745  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2746  }
2747  else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2748  {
2749  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2750  _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2751  char readbuf[8];
2752  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2753  uint32_t codepoint_val = {};
2754  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2755  _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2756  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2757  if(C4_UNLIKELY(numbytes == 0))
2758  _c4err("failed to decode code point={}", proc.rpos);
2759  _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2760  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2761  }
2762  else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2763  {
2764  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2765  _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2766  char readbuf[8];
2767  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2768  uint32_t codepoint_val = {};
2769  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2770  _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2771  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2772  if(C4_UNLIKELY(numbytes == 0))
2773  _c4err("failed to decode code point={}", proc.rpos);
2774  _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2775  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2776  }
2777  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2778  else if(next == '0')
2779  {
2780  proc.translate_esc('\0');
2781  }
2782  else if(next == 'b') // backspace
2783  {
2784  proc.translate_esc('\b');
2785  }
2786  else if(next == 'f') // form feed
2787  {
2788  proc.translate_esc('\f');
2789  }
2790  else if(next == 'a') // bell character
2791  {
2792  proc.translate_esc('\a');
2793  }
2794  else if(next == 'v') // vertical tab
2795  {
2796  proc.translate_esc('\v');
2797  }
2798  else if(next == 'e') // escape character
2799  {
2800  proc.translate_esc('\x1b');
2801  }
2802  else if(next == '_') // unicode non breaking space \u00a0
2803  {
2804  // https://www.compart.com/en/unicode/U+00a0
2805  const char payload[] = {
2806  _RYML_CHCONST(-0x3e, 0xc2),
2807  _RYML_CHCONST(-0x60, 0xa0),
2808  };
2809  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2810  }
2811  else if(next == 'N') // unicode next line \u0085
2812  {
2813  // https://www.compart.com/en/unicode/U+0085
2814  const char payload[] = {
2815  _RYML_CHCONST(-0x3e, 0xc2),
2816  _RYML_CHCONST(-0x7b, 0x85),
2817  };
2818  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2819  }
2820  else if(next == 'L') // unicode line separator \u2028
2821  {
2822  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2823  const char payload[] = {
2824  _RYML_CHCONST(-0x1e, 0xe2),
2825  _RYML_CHCONST(-0x80, 0x80),
2826  _RYML_CHCONST(-0x58, 0xa8),
2827  };
2828  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2829  }
2830  else if(next == 'P') // unicode paragraph separator \u2029
2831  {
2832  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2833  const char payload[] = {
2834  _RYML_CHCONST(-0x1e, 0xe2),
2835  _RYML_CHCONST(-0x80, 0x80),
2836  _RYML_CHCONST(-0x57, 0xa9),
2837  };
2838  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2839  }
2840  else if(next == '\0')
2841  {
2842  proc.skip();
2843  }
2844  else
2845  {
2846  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2847  }
2848  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2849 }
2850 
2851 
2852 template<class EventHandler>
2853 template<class FilterProcessor>
2854 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2855 {
2856  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2857  // from the YAML spec for double-quoted scalars:
2858  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2859  while(proc.has_more_chars())
2860  {
2861  const char curr = proc.curr();
2862  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2863  switch(curr)
2864  {
2865  case ' ':
2866  case '\t':
2867  {
2868  _c4dbgfdq("whitespace", curr);
2869  _filter_ws_copy_trailing(proc);
2870  break;
2871  }
2872  case '\n':
2873  {
2874  _c4dbgfdq("newline", curr);
2875  _filter_nl_dquoted(proc);
2876  break;
2877  }
2878  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2879  {
2880  _c4dbgfdq("carriage return, ignore", curr);
2881  proc.skip();
2882  break;
2883  }
2884  case '\\':
2885  {
2886  _filter_dquoted_backslash(proc);
2887  break;
2888  }
2889  default:
2890  {
2891  proc.copy();
2892  break;
2893  }
2894  }
2895  }
2896  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2897  return proc.result();
2898 }
2899 
2900 #undef _c4dbgfdq
2901 
2902 
2903 template<class EventHandler>
2904 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2905 {
2906  FilterProcessorSrcDst proc(scalar, dst);
2907  return _filter_dquoted(proc);
2908 }
2909 
2910 template<class EventHandler>
2911 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2912 {
2913  FilterProcessorInplaceMidExtending proc(dst, cap);
2914  return _filter_dquoted(proc);
2915 }
2916 
2917 
2918 //-----------------------------------------------------------------------------
2919 //-----------------------------------------------------------------------------
2920 //-----------------------------------------------------------------------------
2921 // block filtering helpers
2922 
2923 C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
2924 {
2925  if(indentation + 1 > s.len)
2926  return npos;
2927  for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
2928  {
2929  if(s.str[i] == '\n')
2930  {
2931  csubstr rem = s.sub(i + 1);
2932  size_t first = rem.first_not_of(' ');
2933  first = (first != npos) ? first : rem.len;
2934  if(first > indentation)
2935  return i;
2936  }
2937  }
2938  return npos;
2939 }
2940 
2941 template<class EventHandler>
2942 template<class FilterProcessor>
2943 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2944 {
2945  _RYML_ASSERT_BASIC_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2946  _RYML_ASSERT_BASIC_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2947 
2948  // a debugging scaffold:
2949  #if 0
2950  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2951  #else
2952  #define _c4dbgchomp(...)
2953  #endif
2954 
2955  // advance to the last line having spaces beyond the indentation
2956  {
2957  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2958  if(last != npos)
2959  {
2960  _c4dbgchomp("found newline and larger indentation. last={}", last);
2961  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2962  _RYML_ASSERT_BASIC_(this->callbacks(), last <= proc.src.len);
2963  // remove indentation spaces, copy the rest
2964  while((proc.rpos < last) && proc.has_more_chars())
2965  {
2966  const char curr = proc.curr();
2967  _c4dbgchomp("curr='{}'", _c4prc(curr));
2968  switch(curr)
2969  {
2970  case '\n':
2971  {
2972  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2973  proc.copy();
2974  // are there spaces after the newline?
2975  csubstr at_next_line = proc.rem();
2976  if(at_next_line.begins_with(' '))
2977  {
2978  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2979  // there are spaces.
2980  size_t first_non_space = at_next_line.first_not_of(' ');
2981  _c4dbgchomp("first_non_space={}", first_non_space);
2982  if(first_non_space == npos)
2983  {
2984  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2985  first_non_space = at_next_line.len;
2986  }
2987  if(first_non_space <= indentation)
2988  {
2989  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2990  proc.skip(first_non_space);
2991  }
2992  else
2993  {
2994  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2995  proc.skip(indentation);
2996  // copy the spaces after the indentation
2997  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2998  proc.copy(first_non_space - indentation);
2999  }
3000  }
3001  break;
3002  }
3003  case '\r':
3004  proc.skip();
3005  break;
3006  default:
3007  _c4err("parse error");
3008  break;
3009  }
3010  }
3011  }
3012  }
3013 
3014  // from now on, we only have line ends (or indentation spaces)
3015  switch(chomp)
3016  {
3017  case CHOMP_CLIP:
3018  {
3019  bool had_one = false;
3020  while(proc.has_more_chars())
3021  {
3022  const char curr = proc.curr();
3023  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
3024  switch(curr)
3025  {
3026  case '\n':
3027  {
3028  _c4dbgchomp("copy newline!", curr);
3029  proc.copy();
3030  proc.set_at_end();
3031  had_one = true;
3032  break;
3033  }
3034  case ' ':
3035  case '\r':
3036  _c4dbgchomp("skip!", curr);
3037  proc.skip();
3038  break;
3039  }
3040  }
3041  if(!had_one) // there were no newline characters. add one.
3042  {
3043  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3044  proc.set('\n');
3045  }
3046  break;
3047  }
3048  case CHOMP_KEEP:
3049  {
3050  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3051  while(proc.has_more_chars())
3052  {
3053  const char curr = proc.curr();
3054  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3055  switch(curr)
3056  {
3057  case '\n':
3058  _c4dbgchomp("copy newline!", curr);
3059  proc.copy();
3060  break;
3061  case ' ':
3062  case '\r':
3063  _c4dbgchomp("skip!", curr);
3064  proc.skip();
3065  break;
3066  }
3067  }
3068  break;
3069  }
3070  case CHOMP_STRIP:
3071  {
3072  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3073  // nothing to do!
3074  break;
3075  }
3076  }
3077 
3078  #undef _c4dbgchomp
3079 }
3080 
3081 
3082 // a debugging scaffold:
3083 #if 0
3084 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3085 #else
3086 #define _c4dbgfb(...)
3087 #endif
3088 
3089 template<class EventHandler>
3090 template<class FilterProcessor>
3091 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3092 {
3093  csubstr rem = proc.rem(); // remaining
3094  if(rem.len)
3095  {
3096  size_t first = rem.first_not_of(' ');
3097  if(first != npos)
3098  {
3099  _c4dbgfb("{} spaces follow before next nonws character", first);
3100  if(first < indentation)
3101  {
3102  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3103  proc.skip(first);
3104  }
3105  else
3106  {
3107  _c4dbgfb("skip {} spaces from indentation", indentation);
3108  proc.skip(indentation);
3109  }
3110  }
3111  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3112  else
3113  {
3114  _c4dbgfb("all spaces to the end: {} spaces", first);
3115  first = rem.len;
3116  if(first)
3117  {
3118  if(first < indentation)
3119  {
3120  _c4dbgfb("skip everything", first);
3121  proc.skip(proc.src.len - proc.rpos);
3122  }
3123  else
3124  {
3125  _c4dbgfb("skip {} spaces from indentation", indentation);
3126  proc.skip(indentation);
3127  }
3128  }
3129  }
3130  #endif
3131  }
3132 }
3133 
3134 template<class EventHandler>
3135 template<class FilterProcessor>
3136 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3137 {
3138  csubstr contents = proc.src.trimr(" \n\r");
3139  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3140  if(!contents.len)
3141  {
3142  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3143  if(chomp == CHOMP_KEEP && proc.src.len)
3144  {
3145  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3146  while(proc.has_more_chars())
3147  {
3148  const char curr = proc.curr();
3149  if(curr == '\n')
3150  proc.copy();
3151  else
3152  proc.skip();
3153  }
3154  if(!proc.wpos)
3155  {
3156  proc.set('\n');
3157  }
3158  }
3159  }
3160  return contents.len;
3161 }
3162 
3163 template<class EventHandler>
3164 template<class FilterProcessor>
3165 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3166 {
3167  _c4dbgfb("contents_len={}", contents_len);
3168 
3169  _RYML_ASSERT_BASIC_(this->callbacks(), contents_len > 0u);
3170 
3171  // extend contents to just before the first newline at the end,
3172  // in case it is preceded by spaces
3173  size_t firstnewl = proc.src.first_of('\n', contents_len);
3174  if(firstnewl != npos)
3175  {
3176  contents_len = firstnewl;
3177  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3178  }
3179  else
3180  {
3181  contents_len = proc.src.len;
3182  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3183  }
3184 
3185  return contents_len;
3186 }
3187 
3188 #undef _c4dbgfb
3189 
3190 
3191 //-----------------------------------------------------------------------------
3192 //-----------------------------------------------------------------------------
3193 //-----------------------------------------------------------------------------
3194 
3195 // a debugging scaffold:
3196 #if 0
3197 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3198 #else
3199 #define _c4dbgfbl(...)
3200 #endif
3201 
3202 template<class EventHandler>
3203 template<class FilterProcessor>
3204 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3205 {
3206  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3207 
3208  size_t contents_len = _handle_all_whitespace(proc, chomp);
3209  if(!contents_len)
3210  return proc.result();
3211 
3212  contents_len = _extend_to_chomp(proc, contents_len);
3213 
3214  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3215 
3216  _filter_block_indentation(proc, indentation);
3217 
3218  // now filter the bulk
3219  while(proc.has_more_chars(/*maxpos*/contents_len))
3220  {
3221  const char curr = proc.curr();
3222  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3223  switch(curr)
3224  {
3225  case '\n':
3226  {
3227  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3228  proc.copy(); // copy the newline
3229  _filter_block_indentation(proc, indentation);
3230  break;
3231  }
3232  case '\r':
3233  proc.skip();
3234  break;
3235  default:
3236  proc.copy();
3237  break;
3238  }
3239  }
3240 
3241  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3242 
3243  _filter_chomp(proc, chomp, indentation);
3244 
3245  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3246 
3247  return proc.result();
3248 }
3249 
3250 #undef _c4dbgfbl
3251 
3252 template<class EventHandler>
3253 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3254 {
3255  FilterProcessorSrcDst proc(scalar, dst);
3256  return _filter_block_literal(proc, indentation, chomp);
3257 }
3258 
3259 template<class EventHandler>
3260 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3261 {
3262  FilterProcessorInplaceEndExtending proc(scalar, cap);
3263  return _filter_block_literal(proc, indentation, chomp);
3264 }
3265 
3266 
3267 //-----------------------------------------------------------------------------
3268 //-----------------------------------------------------------------------------
3269 //-----------------------------------------------------------------------------
3270 
3271 // a debugging scaffold:
3272 #if 0
3273 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3274 #else
3275 #define _c4dbgfbf(...)
3276 #endif
3277 
3278 
3279 template<class EventHandler>
3280 template<class FilterProcessor>
3281 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3282 {
3283  _filter_block_indentation(proc, indentation);
3284  while(proc.has_more_chars(len))
3285  {
3286  const char curr = proc.curr();
3287  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3288  switch(curr)
3289  {
3290  case '\n':
3291  _c4dbgfbf("newline.", curr);
3292  proc.copy();
3293  _filter_block_indentation(proc, indentation);
3294  break;
3295  case '\r':
3296  proc.skip();
3297  break;
3298  case ' ':
3299  case '\t':
3300  {
3301  size_t first = proc.rem().first_not_of(" \t");
3302  _c4dbgfbf("space. first={}", first);
3303  if(first == npos)
3304  first = proc.rem().len;
3305  _c4dbgfbf("... indentation increased to {}", first);
3306  _filter_block_folded_indented_block(proc, indentation, len, first);
3307  break;
3308  }
3309  default:
3310  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3311  return;
3312  }
3313  }
3314 }
3315 
3316 template<class EventHandler>
3317 template<class FilterProcessor>
3318 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3319 {
3320  switch(num_newl)
3321  {
3322  case 1u:
3323  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3324  wpos_at_first_newl = proc.wpos;
3325  proc.skip();
3326  proc.set(' ');
3327  break;
3328  case 2u:
3329  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3330  _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl != npos);
3331  _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3332  _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3333  proc.skip();
3334  proc.set_at(wpos_at_first_newl, '\n');
3335  _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3336  break;
3337  default:
3338  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3339  proc.copy();
3340  break;
3341  }
3342  return wpos_at_first_newl;
3343 }
3344 
3345 template<class EventHandler>
3346 template<class FilterProcessor>
3347 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3348 {
3349  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
3350  size_t num_newl = 0;
3351  size_t wpos_at_first_newl = npos;
3352  while(proc.has_more_chars(len))
3353  {
3354  const char curr = proc.curr();
3355  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3356  switch(curr)
3357  {
3358  case '\n':
3359  {
3360  _c4dbgfbf("newline. sofar={}", num_newl);
3361  // NOTE: vs2022-32bit-release builds were giving wrong
3362  // results in this block, if it was written as either
3363  // as a switch(num_newl) or its equivalent if-form.
3364  //
3365  // For this reason, we're using a dedicated function
3366  // (**_compress), which seems to work around the issue.
3367  //
3368  // The manifested problem was that somewhere between the
3369  // assignment to curr and this point, proc.wpos (the
3370  // write-position of the processor) jumped to npos, which
3371  // made the write wrap-around! To make things worse,
3372  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3373  // problem go away!
3374  //
3375  // The only way to make the problem appear with prints
3376  // enabled was by disabling all prints in this function
3377  // (including in the block which was moved to the compress
3378  // function) and then selectively enabling only some of
3379  // those prints.
3380  //
3381  // This may be due to some bug in the cl-x86 optimizer; or
3382  // it may be triggered by some UB which may be
3383  // inadvertedly present in this function or in the filter
3384  // processor. This is despite our best efforts to weed out
3385  // any such UB problem: neither clang-tidy nor none of the
3386  // sanitizers, or gcc's -fanalyzer pointed to any problems
3387  // in this code.
3388  //
3389  // In the end, moving this block to a separate function
3390  // was the only way to bury the problem. But it may
3391  // resurface again, as The Undead, rising to from the
3392  // grave to haunt us with his terrible presence.
3393  //
3394  // We may have to revisit this. With a stake, and lots of
3395  // garlic.
3396  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3397  _filter_block_indentation(proc, indentation);
3398  break;
3399  }
3400  case ' ':
3401  case '\t':
3402  {
3403  size_t first = proc.rem().first_not_of(" \t");
3404  _c4dbgfbf("space. first={}", first);
3405  if(first == npos)
3406  first = proc.rem().len;
3407  _c4dbgfbf("... indentation increased to {}", first);
3408  if(num_newl)
3409  {
3410  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3411  proc.set_at(wpos_at_first_newl, '\n');
3412  }
3413  if(num_newl > 1u)
3414  {
3415  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3416  proc.set('\n');
3417  }
3418  _filter_block_folded_indented_block(proc, indentation, len, first);
3419  num_newl = 0;
3420  wpos_at_first_newl = npos;
3421  break;
3422  }
3423  case '\r':
3424  proc.skip();
3425  break;
3426  default:
3427  _c4dbgfbf("not space, not newline. stop.", 0);
3428  return;
3429  }
3430  }
3431 }
3432 
3433 
3434 template<class EventHandler>
3435 template<class FilterProcessor>
3436 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3437 {
3438  _RYML_ASSERT_BASIC_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3439  if(curr_indentation)
3440  proc.copy(curr_indentation);
3441  while(proc.has_more_chars(len))
3442  {
3443  const char curr = proc.curr();
3444  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3445  switch(curr)
3446  {
3447  case '\n':
3448  {
3449  proc.copy();
3450  _filter_block_indentation(proc, indentation);
3451  csubstr rem = proc.rem();
3452  const size_t first = rem.first_not_of(' ');
3453  _c4dbgfbf("newline. firstns={}", first);
3454  if(first == 0)
3455  {
3456  const char c = rem[first];
3457  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3458  if(c == '\n' || c == '\r')
3459  {
3460  ;
3461  }
3462  else
3463  {
3464  _c4dbgfbf("done with indented block", first);
3465  goto endloop;
3466  }
3467  }
3468  else if(first != npos)
3469  {
3470  proc.copy(first);
3471  _c4dbgfbf("copy all {} spaces", first);
3472  }
3473  break;
3474  }
3475  break;
3476  case '\r':
3477  proc.skip();
3478  break;
3479  default:
3480  proc.copy();
3481  break;
3482  }
3483  }
3484  endloop:
3485  return;
3486 }
3487 
3488 
3489 template<class EventHandler>
3490 template<class FilterProcessor>
3491 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3492 {
3493  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3494 
3495  size_t contents_len = _handle_all_whitespace(proc, chomp);
3496  if(!contents_len)
3497  return proc.result();
3498 
3499  contents_len = _extend_to_chomp(proc, contents_len);
3500 
3501  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3502 
3503  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3504 
3505  // now filter the bulk
3506  while(proc.has_more_chars(/*maxpos*/contents_len))
3507  {
3508  const char curr = proc.curr();
3509  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3510  switch(curr)
3511  {
3512  case '\n':
3513  {
3514  _c4dbgfbf("found newline", curr);
3515  _filter_block_folded_newlines(proc, indentation, contents_len);
3516  break;
3517  }
3518  case '\r':
3519  proc.skip();
3520  break;
3521  default:
3522  proc.copy();
3523  break;
3524  }
3525  }
3526 
3527  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3528 
3529  _filter_chomp(proc, chomp, indentation);
3530 
3531  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3532 
3533  return proc.result();
3534 }
3535 
3536 #undef _c4dbgfbf
3537 
3538 template<class EventHandler>
3539 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3540 {
3541  FilterProcessorSrcDst proc(scalar, dst);
3542  return _filter_block_folded(proc, indentation, chomp);
3543 }
3544 
3545 template<class EventHandler>
3546 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3547 {
3548  FilterProcessorInplaceEndExtending proc(scalar, cap);
3549  return _filter_block_folded(proc, indentation, chomp);
3550 }
3551 
3552 
3553 //-----------------------------------------------------------------------------
3554 //-----------------------------------------------------------------------------
3555 //-----------------------------------------------------------------------------
3556 
3557 template<class EventHandler>
3558 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3559 {
3560  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3561  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3562  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.valid());
3563  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3564  return r.get();
3565 }
3566 
3567 //-----------------------------------------------------------------------------
3568 
3569 template<class EventHandler>
3570 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3571 {
3572  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3573  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3574  _RYML_ASSERT_BASIC_(this->callbacks(), r.valid());
3575  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3576  return r.get();
3577 }
3578 
3579 
3580 //-----------------------------------------------------------------------------
3581 
3582 template<class EventHandler>
3583 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3584 {
3585  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3586  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3587  if(C4_LIKELY(r.valid()))
3588  {
3589  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3590  return r.get();
3591  }
3592  else
3593  {
3594  const size_t len = r.required_len();
3595  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3596  substr dst = m_evt_handler->alloc_arena(len, &s);
3597  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3598  if(dst.str)
3599  {
3600  _RYML_ASSERT_BASIC_(this->callbacks(), dst.len == len);
3601  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3602  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3603  _RYML_ASSERT_BASIC_(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3604  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3605  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3606  return rsd.get();
3607  }
3608  return dst;
3609  }
3610 }
3611 
3612 
3613 //-----------------------------------------------------------------------------
3614 
3615 template<class EventHandler>
3616 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3617 {
3618  if(s.is_sub(m_buf))
3619  {
3620  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str > m_buf.str);
3621  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= m_buf.str);
3622  if(s.len)
3623  memmove(s.str - 1, s.str, s.len);
3624  --s.str;
3625  s.str[s.len] = '\n';
3626  ++s.len;
3627  return s;
3628  }
3629  else
3630  {
3631  substr dst = m_evt_handler->alloc_arena(s.len + 1);
3632  if(s.len)
3633  memcpy(dst.str, s.str, s.len);
3634  dst[s.len] = '\n';
3635  return dst;
3636  }
3637 }
3638 
3639 template<class EventHandler>
3640 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3641 {
3642  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3643  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3644  csubstr result;
3645  if(C4_LIKELY(r.valid()))
3646  {
3647  result = r.get();
3648  }
3649  else
3650  {
3651  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3652  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3653  // this can only happen when adding a single newline in clip mode.
3654  // so we shift left the scalar by one place
3655  result = _move_scalar_left_and_add_newline(s);
3656  }
3657  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", result.len, result);
3658  return result;
3659 }
3660 
3661 
3662 //-----------------------------------------------------------------------------
3663 template<class EventHandler>
3664 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3665 {
3666  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3667  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3668  csubstr result;
3669  if(C4_LIKELY(r.valid()))
3670  {
3671  result = r.get();
3672  }
3673  else
3674  {
3675  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3676  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3677  // this can only happen when adding a single newline in clip mode.
3678  // so we shift left the scalar by one place
3679  result = _move_scalar_left_and_add_newline(s);
3680  }
3681  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", result.len, result);
3682  return result;
3683 }
3684 
3685 
3686 //-----------------------------------------------------------------------------
3687 
3688 template<class EventHandler>
3689 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3690 {
3691  if(sc.needs_filter)
3692  {
3693  if(m_options.scalar_filtering())
3694  {
3695  return _filter_scalar_plain(sc.scalar, indentation);
3696  }
3697  else
3698  {
3699  _c4dbgp("plain scalar left unfiltered");
3700  m_evt_handler->mark_key_scalar_unfiltered();
3701  }
3702  }
3703  else
3704  {
3705  _c4dbgp("plain scalar doesn't need filtering");
3706  }
3707  return sc.scalar;
3708 }
3709 
3710 template<class EventHandler>
3711 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3712 {
3713  if(sc.needs_filter)
3714  {
3715  if(m_options.scalar_filtering())
3716  {
3717  return _filter_scalar_plain(sc.scalar, indentation);
3718  }
3719  else
3720  {
3721  _c4dbgp("plain scalar left unfiltered");
3722  m_evt_handler->mark_val_scalar_unfiltered();
3723  }
3724  }
3725  else
3726  {
3727  _c4dbgp("plain scalar doesn't need filtering");
3728  }
3729  return sc.scalar;
3730 }
3731 
3732 
3733 //-----------------------------------------------------------------------------
3734 
3735 template<class EventHandler>
3736 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3737 {
3738  if(sc.needs_filter)
3739  {
3740  if(m_options.scalar_filtering())
3741  {
3742  return _filter_scalar_squot(sc.scalar);
3743  }
3744  else
3745  {
3746  _c4dbgp("squo key scalar left unfiltered");
3747  m_evt_handler->mark_key_scalar_unfiltered();
3748  }
3749  }
3750  else
3751  {
3752  _c4dbgp("squo key scalar doesn't need filtering");
3753  }
3754  return sc.scalar;
3755 }
3756 
3757 template<class EventHandler>
3758 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3759 {
3760  if(sc.needs_filter)
3761  {
3762  if(m_options.scalar_filtering())
3763  {
3764  return _filter_scalar_squot(sc.scalar);
3765  }
3766  else
3767  {
3768  _c4dbgp("squo val scalar left unfiltered");
3769  m_evt_handler->mark_val_scalar_unfiltered();
3770  }
3771  }
3772  else
3773  {
3774  _c4dbgp("squo val scalar doesn't need filtering");
3775  }
3776  return sc.scalar;
3777 }
3778 
3779 
3780 //-----------------------------------------------------------------------------
3781 
3782 template<class EventHandler>
3783 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3784 {
3785  if(sc.needs_filter)
3786  {
3787  if(m_options.scalar_filtering())
3788  {
3789  return _filter_scalar_dquot(sc.scalar);
3790  }
3791  else
3792  {
3793  _c4dbgp("dquo scalar left unfiltered");
3794  m_evt_handler->mark_key_scalar_unfiltered();
3795  }
3796  }
3797  else
3798  {
3799  _c4dbgp("dquo scalar doesn't need filtering");
3800  }
3801  return sc.scalar;
3802 }
3803 
3804 template<class EventHandler>
3805 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3806 {
3807  if(sc.needs_filter)
3808  {
3809  if(m_options.scalar_filtering())
3810  {
3811  return _filter_scalar_dquot(sc.scalar);
3812  }
3813  else
3814  {
3815  _c4dbgp("dquo scalar left unfiltered");
3816  m_evt_handler->mark_val_scalar_unfiltered();
3817  }
3818  }
3819  else
3820  {
3821  _c4dbgp("dquo scalar doesn't need filtering");
3822  }
3823  return sc.scalar;
3824 }
3825 
3826 
3827 //-----------------------------------------------------------------------------
3828 
3829 template<class EventHandler>
3830 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3831 {
3832  if(m_options.scalar_filtering())
3833  {
3834  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3835  }
3836  else
3837  {
3838  _c4dbgp("literal scalar left unfiltered");
3839  m_evt_handler->mark_key_scalar_unfiltered();
3840  }
3841  return sb.scalar;
3842 }
3843 
3844 template<class EventHandler>
3845 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3846 {
3847  if(m_options.scalar_filtering())
3848  {
3849  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3850  }
3851  else
3852  {
3853  _c4dbgp("literal scalar left unfiltered");
3854  m_evt_handler->mark_val_scalar_unfiltered();
3855  }
3856  return sb.scalar;
3857 }
3858 
3859 
3860 //-----------------------------------------------------------------------------
3861 
3862 template<class EventHandler>
3863 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3864 {
3865  if(m_options.scalar_filtering())
3866  {
3867  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3868  }
3869  else
3870  {
3871  _c4dbgp("folded scalar left unfiltered");
3872  m_evt_handler->mark_key_scalar_unfiltered();
3873  }
3874  return sb.scalar;
3875 }
3876 
3877 template<class EventHandler>
3878 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3879 {
3880  if(m_options.scalar_filtering())
3881  {
3882  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3883  }
3884  else
3885  {
3886  _c4dbgp("folded scalar left unfiltered");
3887  m_evt_handler->mark_val_scalar_unfiltered();
3888  }
3889  return sb.scalar;
3890 }
3891 
3892 
3893 //-----------------------------------------------------------------------------
3894 //-----------------------------------------------------------------------------
3895 //-----------------------------------------------------------------------------
3896 
3897 #ifdef RYML_DBG // !!! <----------------------------------
3898 
3899 template<class EventHandler>
3900 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3901 {
3902  char buf1_[64], buf2_[64], buf3_[64];
3903  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3904  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3905  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3906  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3907  s->flags |= on;
3908 }
3909 
3910 template<class EventHandler>
3911 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3912 {
3913  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3914  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3915  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3916  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3917  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3918  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3919  s->flags |= on;
3920  s->flags &= ~off;
3921 }
3922 
3923 template<class EventHandler>
3924 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3925 {
3926  char buf1_[64], buf2_[64], buf3_[64];
3927  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3928  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3929  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3930  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3931  s->flags &= ~off;
3932 }
3933 
3934 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3935 {
3936  size_t pos = 0;
3937  bool gotone = false;
3938 
3939  #define _prflag(fl) \
3940  if((flags & fl) == (fl)) \
3941  { \
3942  if(gotone) \
3943  { \
3944  if(pos + 1 < buf.len) \
3945  buf[pos] = '|'; \
3946  ++pos; \
3947  } \
3948  csubstr fltxt = #fl; \
3949  if(pos + fltxt.len <= buf.len) \
3950  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3951  pos += fltxt.len; \
3952  gotone = true; \
3953  }
3954 
3955  _prflag(RTOP);
3956  _prflag(RUNK);
3957  _prflag(RMAP);
3958  _prflag(RSEQ);
3959  _prflag(RFLOW);
3960  _prflag(RBLCK);
3961  _prflag(QMRK);
3962  _prflag(RKEY);
3963  _prflag(RVAL);
3964  _prflag(RKCL);
3965  _prflag(RNXT);
3966  _prflag(SSCL);
3967  _prflag(QSCL);
3968  _prflag(RSET);
3969  _prflag(RDOC);
3970  _prflag(NDOC);
3971  _prflag(USTY);
3972  _prflag(RSEQIMAP);
3973 
3974  #undef _prflag
3975 
3976  if(pos == 0)
3977  if(buf.len > 0)
3978  buf[pos++] = '0';
3979 
3980  _RYML_CHECK_BASIC(pos <= buf.len);
3981 
3982  return buf.first(pos);
3983 }
3984 
3985 #endif // RYML_DBG !!! <----------------------------------
3986 
3987 
3988 //-----------------------------------------------------------------------------
3989 //-----------------------------------------------------------------------------
3990 //-----------------------------------------------------------------------------
3991 
3992 template<class EventHandler>
3993 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
3994 {
3995  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3996  return m_buf.sub(loc.offset);
3997 }
3998 
3999 template<class EventHandler>
4000 Location ParseEngine<EventHandler>::val_location(const char *val) const
4001 {
4002  if(C4_UNLIKELY(val == nullptr))
4003  return {m_file, 0, 0, 0};
4004  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4005  // NOTE: if any of these checks fails, the parser needs to be
4006  // instantiated with locations enabled.
4007  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
4008  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
4009  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4010  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4011  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4012  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4013  // NOTE: the pointer needs to belong to the buffer that was used to parse.
4014  csubstr src = m_buf;
4015  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4016  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4017  // ok. search the first stored newline after the given ptr
4018  using lineptr_type = size_t const* C4_RESTRICT;
4019  lineptr_type lineptr = nullptr;
4020  size_t offset = (size_t)(val - src.begin());
4021  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4022  {
4023  // just do a linear search if the size is small.
4024  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4025  {
4026  if(*curr > offset)
4027  {
4028  lineptr = curr;
4029  break;
4030  }
4031  }
4032  }
4033  else
4034  {
4035  // do a bisection search if the size is not small.
4036  //
4037  // We could use std::lower_bound but this is simple enough and
4038  // spares the costly include of <algorithm>.
4039  size_t count = m_newline_offsets_size;
4040  size_t step;
4041  lineptr_type it;
4042  lineptr = m_newline_offsets;
4043  while(count)
4044  {
4045  step = count >> 1;
4046  it = lineptr + step;
4047  if(*it < offset)
4048  {
4049  lineptr = ++it;
4050  count -= step + 1;
4051  }
4052  else
4053  {
4054  count = step;
4055  }
4056  }
4057  }
4058  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4059  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4060  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4061  Location loc;
4062  loc.name = m_file;
4063  loc.offset = offset;
4064  loc.line = (size_t)(lineptr - m_newline_offsets);
4065  if(lineptr > m_newline_offsets)
4066  loc.col = (offset - *(lineptr-1) - 1u);
4067  else
4068  loc.col = offset;
4069  return loc;
4070 }
4071 
4072 template<class EventHandler>
4073 void ParseEngine<EventHandler>::_prepare_locations()
4074 {
4075  m_newline_offsets_buf = m_buf;
4076  size_t numnewlines = 1u + m_buf.count('\n');
4077  _resize_locations(numnewlines);
4078  m_newline_offsets_size = 0;
4079  for(size_t i = 0; i < m_buf.len; i++)
4080  if(m_buf[i] == '\n')
4081  m_newline_offsets[m_newline_offsets_size++] = i;
4082  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4083  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4084 }
4085 
4086 template<class EventHandler>
4087 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4088 {
4089  if(numnewlines > m_newline_offsets_capacity)
4090  {
4091  if(m_newline_offsets)
4092  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4093  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4094  m_newline_offsets_capacity = numnewlines;
4095  }
4096 }
4097 
4098 template<class EventHandler>
4099 bool ParseEngine<EventHandler>::_locations_dirty() const
4100 {
4101  return !m_newline_offsets_size;
4102 }
4103 
4104 
4105 //-----------------------------------------------------------------------------
4106 //-----------------------------------------------------------------------------
4107 //-----------------------------------------------------------------------------
4108 
4109 template<class EventHandler>
4110 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4111 {
4112  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4113  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4114  {
4115  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4116  {
4117  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4118  _skipchars(" \t");
4119  }
4120  // comments
4121  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4122  {
4123  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4124  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4125  }
4126  }
4127 }
4128 
4129 
4130 //-----------------------------------------------------------------------------
4131 
4132 
4133 template<class EventHandler>
4134 void ParseEngine<EventHandler>::_handle_colon()
4135 {
4136  size_t curr = m_evt_handler->m_curr->pos.line;
4137  if(m_prev_colon != npos)
4138  {
4139  if(curr == m_prev_colon)
4140  _c4err("two colons on same line");
4141  }
4142  m_prev_colon = curr;
4143 }
4144 
4145 template<class EventHandler>
4146 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4147 {
4148  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4149  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) // NOLINT(bugprone-sizeof-expression)
4150  _c4err("too many annotations");
4151  dst->annotations[dst->num_entries].str = str;
4152  dst->annotations[dst->num_entries].indentation = indentation;
4153  dst->annotations[dst->num_entries].line = line;
4154  ++dst->num_entries;
4155 }
4156 
4157 template<class EventHandler>
4158 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4159 {
4160  dst->num_entries = 0;
4161 }
4162 
4163 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4164 template<class EventHandler>
4165 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4166 {
4167  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4168  {
4169  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4170  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4171  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4172  size_t to_skip = m_evt_handler->m_curr->indref;
4173  if(m_pending_anchors.num_entries)
4174  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4175  if(m_pending_tags.num_entries)
4176  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4177  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4178  _maybe_skipchars_up_to(' ', to_skip);
4179  return true;
4180  }
4181  return false;
4182 }
4183 #endif
4184 
4185 template<class EventHandler>
4186 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4187 {
4188  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4189 }
4190 
4191 template<class EventHandler>
4192 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4193 {
4194  if(!tag.begins_with("!<"))
4195  {
4196  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4197  _c4err("tags must not contain any of '[]{},'");
4198  }
4199  else
4200  {
4201  if(C4_UNLIKELY(!tag.ends_with('>')))
4202  _c4err("malformed tag");
4203  }
4204 }
4205 
4206 template<class EventHandler>
4207 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4208 {
4209  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4210  if(m_pending_tags.num_entries)
4211  {
4212  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4213  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4214  {
4215  _check_tag(m_pending_tags.annotations[0].str);
4216  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4217  _clear_annotations(&m_pending_tags);
4218  }
4219  else
4220  {
4221  _c4err("too many tags");
4222  }
4223  }
4224  if(m_pending_anchors.num_entries)
4225  {
4226  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4227  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4228  {
4229  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4230  _clear_annotations(&m_pending_anchors);
4231  }
4232  else
4233  {
4234  _c4err("too many anchors");
4235  }
4236  }
4237 }
4238 
4239 template<class EventHandler>
4240 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4241 {
4242  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4243  if(m_pending_tags.num_entries)
4244  {
4245  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4246  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4247  {
4248  _check_tag(m_pending_tags.annotations[0].str);
4249  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4250  _clear_annotations(&m_pending_tags);
4251  }
4252  else
4253  {
4254  _c4err("too many tags");
4255  }
4256  }
4257  if(m_pending_anchors.num_entries)
4258  {
4259  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4260  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4261  {
4262  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4263  _clear_annotations(&m_pending_anchors);
4264  }
4265  else
4266  {
4267  _c4err("too many anchors");
4268  }
4269  }
4270 }
4271 
4272 template<class EventHandler>
4273 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4274 {
4275  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4276  if(m_pending_tags.num_entries == 2)
4277  {
4278  _c4dbgp("2 tags, setting entry 0");
4279  _check_tag(m_pending_tags.annotations[0].str);
4280  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4281  }
4282  else if(m_pending_tags.num_entries == 1)
4283  {
4284  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4285  if(m_pending_tags.annotations[0].line < current_line)
4286  {
4287  _c4dbgp("...tag is for the map. setting it.");
4288  _check_tag(m_pending_tags.annotations[0].str);
4289  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4290  _clear_annotations(&m_pending_tags);
4291  }
4292  }
4293  //
4294  if(m_pending_anchors.num_entries == 2)
4295  {
4296  _c4dbgp("2 anchors, setting entry 0");
4297  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4298  }
4299  else if(m_pending_anchors.num_entries == 1)
4300  {
4301  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4302  if(m_pending_anchors.annotations[0].line < current_line)
4303  {
4304  _c4dbgp("...anchor is for the map. setting it.");
4305  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4306  _clear_annotations(&m_pending_anchors);
4307  }
4308  }
4309 }
4310 
4311 template<class EventHandler>
4312 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4313 {
4314  _c4dbgp("annotations_before_start_mapblck_as_key");
4315  if(m_pending_tags.num_entries == 2)
4316  {
4317  _check_tag(m_pending_tags.annotations[0].str);
4318  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4319  }
4320  if(m_pending_anchors.num_entries == 2)
4321  {
4322  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4323  }
4324 }
4325 
4326 template<class EventHandler>
4327 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4328 {
4329  _c4dbgp("annotations_after_start_mapblck");
4330  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4331  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4332  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4333  {
4334  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4335  switch(m_pending_tags.num_entries)
4336  {
4337  case 1u:
4338  _check_tag(m_pending_tags.annotations[0].str);
4339  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4340  _clear_annotations(&m_pending_tags);
4341  break;
4342  case 2u:
4343  _check_tag(m_pending_tags.annotations[1].str);
4344  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4345  _clear_annotations(&m_pending_tags);
4346  break;
4347  }
4348  switch(m_pending_anchors.num_entries)
4349  {
4350  case 1u:
4351  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4352  _clear_annotations(&m_pending_anchors);
4353  break;
4354  case 2u:
4355  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4356  _clear_annotations(&m_pending_anchors);
4357  break;
4358  }
4359  }
4360  _set_indentation(key_indentation);
4361 }
4362 
4363 template<class EventHandler>
4364 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4365 {
4366  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4367  // select the left-most annotation on the max line
4368  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4369  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4370  {
4371  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4372  if(ann.line > curr->line)
4373  curr = &ann;
4374  else if(ann.indentation < curr->indentation)
4375  curr = &ann;
4376  }
4377  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4378  {
4379  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4380  if(ann.line > curr->line)
4381  curr = &ann;
4382  else if(ann.indentation < curr->indentation)
4383  curr = &ann;
4384  }
4385  return curr->line < val_line ? val_indentation : curr->indentation;
4386 }
4387 
4388 template<class EventHandler>
4389 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4390 {
4391  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4392  const size_t pos = rem.find('#');
4393  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4394  if(pos == npos) // no comments
4395  {
4396  m_evt_handler->add_directive(rem);
4397  _line_progressed(rem.len);
4398  }
4399  else
4400  {
4401  csubstr to_comment = rem.first(pos);
4402  csubstr trimmed = to_comment.trimr(" \t");
4403  m_evt_handler->add_directive(trimmed);
4404  _line_progressed(pos);
4405  _skip_comment();
4406  }
4407 }
4408 
4409 template<class EventHandler>
4410 bool ParseEngine<EventHandler>::_handle_bom()
4411 {
4412  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4413  if(rem.len)
4414  {
4415  const csubstr rest = rem.sub(1);
4416  // https://yaml.org/spec/1.2.2/#52-character-encodings
4417  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4418  if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4419  {
4420  _c4dbgp("byte order mark: UTF32BE");
4421  _handle_bom(UTF32BE);
4422  _line_progressed(4);
4423  m_bom_len = 4;
4424  return true;
4425  }
4426  else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4427  {
4428  _c4dbgp("byte order mark: UTF32LE");
4429  _handle_bom(UTF32LE);
4430  _line_progressed(4);
4431  m_bom_len = 4;
4432  return true;
4433  }
4434  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4435  {
4436  _c4dbgp("byte order mark: UTF16BE");
4437  _handle_bom(UTF16BE);
4438  _line_progressed(2);
4439  m_bom_len = 2;
4440  return true;
4441  }
4442  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4443  {
4444  _c4dbgp("byte order mark: UTF16LE");
4445  _handle_bom(UTF16LE);
4446  _line_progressed(2);
4447  m_bom_len = 2;
4448  return true;
4449  }
4450  else if(rem.begins_with("\xef\xbb\xbf"))
4451  {
4452  _c4dbgp("byte order mark: UTF8");
4453  _handle_bom(UTF8);
4454  _line_progressed(3);
4455  m_bom_len = 3;
4456  return true;
4457  }
4458  #undef _rymlisascii
4459  }
4460  return false;
4461 }
4462 
4463 template<class EventHandler>
4464 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4465 {
4466  if(m_encoding == NOBOM)
4467  {
4468  if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == m_buf.str))
4469  m_encoding = enc;
4470  else
4471  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4472  }
4473  else if(enc != m_encoding)
4474  {
4475  _c4err("byte order mark can only be set once");
4476  }
4477 }
4478 
4479 
4480 //-----------------------------------------------------------------------------
4481 
4482 template<class EventHandler>
4483 void ParseEngine<EventHandler>::_handle_seq_json()
4484 {
4485 seqjson_start:
4486  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4487 
4488  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4489  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4490  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
4491  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4492  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4493 
4494  _handle_flow_skip_whitespace();
4495  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4496  if(!rem.len)
4497  goto seqjson_again;
4498 
4499  if(has_any(RVAL))
4500  {
4501  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4502  const char first = rem.str[0];
4503  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4504  switch(first)
4505  {
4506  case '"':
4507  {
4508  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4509  ScannedScalar sc = _scan_scalar_dquot();
4510  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4511  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4512  addrem_flags(RNXT, RVAL);
4513  break;
4514  }
4515  case '[':
4516  {
4517  _c4dbgp("seqjson[RVAL]: start child seqjson");
4518  addrem_flags(RNXT, RVAL);
4519  m_evt_handler->begin_seq_val_flow();
4520  addrem_flags(RVAL, RNXT);
4521  _line_progressed(1);
4522  break;
4523  }
4524  case '{':
4525  {
4526  _c4dbgp("seqjson[RVAL]: start child mapjson");
4527  addrem_flags(RNXT, RVAL);
4528  m_evt_handler->begin_map_val_flow();
4529  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4530  _line_progressed(1);
4531  goto seqjson_finish;
4532  }
4533  case ']': // this happens on a trailing comma like ", ]"
4534  {
4535  _c4dbgp("seqjson[RVAL]: end!");
4536  rem_flags(RSEQ);
4537  _end_seq_flow();
4538  _line_progressed(1);
4539  if(!has_all(RSEQ|RFLOW))
4540  goto seqjson_finish;
4541  break;
4542  }
4543  default:
4544  {
4545  ScannedScalar sc;
4546  if(_scan_scalar_seq_json(&sc))
4547  {
4548  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4549  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4550  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4551  addrem_flags(RNXT, RVAL);
4552  }
4553  else
4554  {
4555  _c4err("parse error");
4556  }
4557  }
4558  }
4559  }
4560  else // RNXT
4561  {
4562  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4563  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4564  const char first = rem.str[0];
4565  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4566  switch(first)
4567  {
4568  case ',':
4569  {
4570  _c4dbgp("seqjson[RNXT]: expect next val");
4571  addrem_flags(RVAL, RNXT);
4572  m_evt_handler->add_sibling();
4573  _line_progressed(1);
4574  break;
4575  }
4576  case ']':
4577  {
4578  _c4dbgp("seqjson[RNXT]: end!");
4579  _end_seq_flow();
4580  _line_progressed(1);
4581  goto seqjson_finish;
4582  }
4583  default:
4584  _c4err("parse error");
4585  }
4586  }
4587 
4588  seqjson_again:
4589  _c4dbgt("seqjson: go again", 0);
4590  if(_finished_line())
4591  {
4592  if(C4_LIKELY(!_finished_file()))
4593  {
4594  _line_ended();
4595  _scan_line();
4596  _c4dbgnextline();
4597  }
4598  else
4599  {
4600  _c4err("missing terminating ]");
4601  }
4602  }
4603  goto seqjson_start;
4604 
4605  seqjson_finish:
4606  _c4dbgp("seqjson: finish");
4607 }
4608 
4609 
4610 //-----------------------------------------------------------------------------
4611 
4612 template<class EventHandler>
4613 void ParseEngine<EventHandler>::_handle_map_json()
4614 {
4615 mapjson_start:
4616  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4617 
4618  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4619  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
4620  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4621  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4622  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4623 
4624  _handle_flow_skip_whitespace();
4625  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4626  if(!rem.len)
4627  goto mapjson_again;
4628 
4629  if(has_any(RKEY))
4630  {
4631  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4632  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4633  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4634  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4635  const char first = rem.str[0];
4636  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4637  switch(first)
4638  {
4639  case '"':
4640  {
4641  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4642  ScannedScalar sc = _scan_scalar_dquot();
4643  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4644  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4645  addrem_flags(RKCL, RKEY);
4646  break;
4647  }
4648  case '}': // this happens on a trailing comma like ", }"
4649  {
4650  _c4dbgp("mapjson[RKEY]: end!");
4651  _end_map_flow();
4652  _line_progressed(1);
4653  goto mapjson_finish;
4654  }
4655  default:
4656  _c4err("parse error");
4657  }
4658  }
4659  else if(has_any(RVAL))
4660  {
4661  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4662  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4663  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4664  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4665  const char first = rem.str[0];
4666  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4667  switch(first)
4668  {
4669  case '"':
4670  {
4671  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4672  ScannedScalar sc = _scan_scalar_dquot();
4673  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4674  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4675  addrem_flags(RNXT, RVAL);
4676  break;
4677  }
4678  case '[':
4679  {
4680  _c4dbgp("mapjson[RVAL]: start val seqjson");
4681  addrem_flags(RNXT, RVAL);
4682  m_evt_handler->begin_seq_val_flow();
4683  _set_indentation(m_evt_handler->m_parent->indref);
4684  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4685  _line_progressed(1);
4686  goto mapjson_finish;
4687  }
4688  case '{':
4689  {
4690  _c4dbgp("mapjson[RVAL]: start val mapjson");
4691  addrem_flags(RNXT, RVAL);
4692  m_evt_handler->begin_map_val_flow();
4693  _set_indentation(m_evt_handler->m_parent->indref);
4694  addrem_flags(RKEY, RNXT);
4695  _line_progressed(1);
4696  // keep going in this function
4697  break;
4698  }
4699  default:
4700  {
4701  ScannedScalar sc;
4702  if(_scan_scalar_map_json(&sc))
4703  {
4704  _c4dbgp("mapjson[RVAL]: plain scalar.");
4705  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4706  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4707  addrem_flags(RNXT, RVAL);
4708  }
4709  else
4710  {
4711  _c4err("parse error");
4712  }
4713  break;
4714  }
4715  }
4716  }
4717  else if(has_any(RKCL)) // read the key colon
4718  {
4719  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4720  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4721  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4722  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4723  const char first = rem.str[0];
4724  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4725  if(first == ':')
4726  {
4727  _c4dbgp("mapjson[RKCL]: found the colon");
4728  addrem_flags(RVAL, RKCL);
4729  _line_progressed(1);
4730  }
4731  else
4732  {
4733  _c4err("parse error");
4734  }
4735  }
4736  else if(has_any(RNXT))
4737  {
4738  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4739  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4740  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4741  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4742  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4743  if(rem.begins_with(','))
4744  {
4745  _c4dbgp("mapjson[RNXT]: expect next keyval");
4746  m_evt_handler->add_sibling();
4747  addrem_flags(RKEY, RNXT);
4748  _line_progressed(1);
4749  }
4750  else if(rem.begins_with('}'))
4751  {
4752  _c4dbgp("mapjson[RNXT]: end!");
4753  _end_map_flow();
4754  _line_progressed(1);
4755  goto mapjson_finish;
4756  }
4757  else
4758  {
4759  _c4err("parse error");
4760  }
4761  }
4762 
4763  mapjson_again:
4764  _c4dbgt("mapjson: go again", 0);
4765  if(_finished_line())
4766  {
4767  if(C4_LIKELY(!_finished_file()))
4768  {
4769  _line_ended();
4770  _scan_line();
4771  _c4dbgnextline();
4772  }
4773  else
4774  {
4775  _c4err("missing terminating }");
4776  }
4777  }
4778  goto mapjson_start;
4779 
4780  mapjson_finish:
4781  _c4dbgp("mapjson: finish");
4782 }
4783 
4784 
4785 //-----------------------------------------------------------------------------
4786 
4787 template<class EventHandler>
4788 void ParseEngine<EventHandler>::_handle_seq_imap()
4789 {
4790 seqimap_start:
4791  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4792 
4793  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4794  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4795  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4796  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4797  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4798 
4799  _handle_flow_skip_whitespace();
4800  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4801  if(!rem.len)
4802  goto seqimap_again;
4803 
4804  if(has_any(RVAL))
4805  {
4806  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4807  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4808  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4809  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4810  const char first = rem.str[0];
4811  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4812  ScannedScalar sc;
4813  if(first == '\'')
4814  {
4815  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4816  sc = _scan_scalar_squot();
4817  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4818  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4819  _end_map_flow();
4820  goto seqimap_finish;
4821  }
4822  else if(first == '"')
4823  {
4824  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4825  sc = _scan_scalar_dquot();
4826  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4827  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4828  _end_map_flow();
4829  goto seqimap_finish;
4830  }
4831  // block scalars (ie | and >) cannot appear in flow containers
4832  else if(_scan_scalar_plain_map_flow(&sc))
4833  {
4834  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4835  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4836  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4837  _end_map_flow();
4838  goto seqimap_finish;
4839  }
4840  else if(first == '[')
4841  {
4842  _c4dbgp("seqimap[RVAL]: start child seqflow");
4843  addrem_flags(RNXT, RVAL);
4844  m_evt_handler->begin_seq_val_flow();
4845  addrem_flags(RVAL, RNXT|RSEQIMAP);
4846  _set_indentation(m_evt_handler->m_parent->indref);
4847  _line_progressed(1);
4848  goto seqimap_finish;
4849  }
4850  else if(first == '{')
4851  {
4852  _c4dbgp("seqimap[RVAL]: start child mapflow");
4853  addrem_flags(RNXT, RVAL);
4854  m_evt_handler->begin_map_val_flow();
4855  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4856  _set_indentation(m_evt_handler->m_parent->indref);
4857  _line_progressed(1);
4858  goto seqimap_finish;
4859  }
4860  else if(first == ',' || first == ']')
4861  {
4862  _c4dbgp("seqimap[RVAL]: finish without val.");
4863  m_evt_handler->set_val_scalar_plain_empty();
4864  _end_map_flow();
4865  goto seqimap_finish;
4866  }
4867  else if(first == '&')
4868  {
4869  csubstr anchor = _scan_anchor();
4870  _c4dbgp("seqimap[RVAL]: anchor!");
4871  m_evt_handler->set_val_anchor(anchor);
4872  }
4873  else if(first == '*')
4874  {
4875  csubstr ref = _scan_ref_seq();
4876  _c4dbgp("seqimap[RVAL]: ref!");
4877  m_evt_handler->set_val_ref(ref);
4878  addrem_flags(RNXT, RVAL);
4879  }
4880  else
4881  {
4882  _c4err("parse error");
4883  }
4884  }
4885  else if(has_any(RNXT))
4886  {
4887  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4888  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4889  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4890  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4891  const char first = rem.str[0];
4892  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4893  if(first == ',' || first == ']')
4894  {
4895  // we may get here because a map or a seq started and we
4896  // return later
4897  _c4dbgp("seqimap: done");
4898  _end_map_flow();
4899  goto seqimap_finish;
4900  }
4901  else
4902  {
4903  _c4err("parse error");
4904  }
4905  }
4906  else if(has_any(QMRK))
4907  {
4908  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4909  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4910  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4911  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4912  const char first = rem.str[0];
4913  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4914  ScannedScalar sc;
4915  if(first == '\'')
4916  {
4917  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4918  sc = _scan_scalar_squot();
4919  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4920  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4921  addrem_flags(RKCL, QMRK);
4922  goto seqimap_again;
4923  }
4924  else if(first == '"')
4925  {
4926  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4927  sc = _scan_scalar_dquot();
4928  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4929  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4930  addrem_flags(RKCL, QMRK);
4931  goto seqimap_again;
4932  }
4933  // block scalars (ie | and >) cannot appear in flow containers
4934  else if(_scan_scalar_plain_map_flow(&sc))
4935  {
4936  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4937  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4938  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4939  addrem_flags(RKCL, QMRK);
4940  goto seqimap_again;
4941  }
4942  else if(first == '[')
4943  {
4944  _c4dbgp("seqimap[QMRK]: start child seqflow");
4945  addrem_flags(RKCL, QMRK);
4946  m_evt_handler->begin_seq_key_flow();
4947  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4948  _set_indentation(m_evt_handler->m_parent->indref);
4949  _line_progressed(1);
4950  goto seqimap_finish;
4951  }
4952  else if(first == '{')
4953  {
4954  _c4dbgp("seqimap[QMRK]: start child mapflow");
4955  addrem_flags(RKCL, QMRK);
4956  m_evt_handler->begin_map_key_flow();
4957  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4958  _set_indentation(m_evt_handler->m_parent->indref);
4959  _line_progressed(1);
4960  goto seqimap_finish;
4961  }
4962  else if(first == ',' || first == ']')
4963  {
4964  _c4dbgp("seqimap[QMRK]: finish without key.");
4965  m_evt_handler->set_key_scalar_plain_empty();
4966  m_evt_handler->set_val_scalar_plain_empty();
4967  _end_map_flow();
4968  goto seqimap_finish;
4969  }
4970  else if(first == '&')
4971  {
4972  csubstr anchor = _scan_anchor();
4973  _c4dbgp("seqimap[QMRK]: anchor!");
4974  m_evt_handler->set_key_anchor(anchor);
4975  }
4976  else if(first == '*')
4977  {
4978  csubstr ref = _scan_ref_seq();
4979  _c4dbgp("seqimap[QMRK]: ref!");
4980  m_evt_handler->set_key_ref(ref);
4981  addrem_flags(RKCL, QMRK);
4982  }
4983  else
4984  {
4985  _c4err("parse error");
4986  }
4987  }
4988  else if(has_any(RKCL))
4989  {
4990  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4991  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4992  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4993  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4994  const char first = rem.str[0];
4995  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4996  if(first == ':')
4997  {
4998  _c4dbgp("seqimap[RKCL]: found ':'");
4999  addrem_flags(RVAL, RKCL);
5000  _line_progressed(1);
5001  goto seqimap_again;
5002  }
5003  else if(first == ',' || first == ']')
5004  {
5005  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
5006  m_evt_handler->set_val_scalar_plain_empty();
5007  _end_map_flow();
5008  goto seqimap_finish;
5009  }
5010  else
5011  {
5012  _c4err("parse error");
5013  }
5014  }
5015 
5016  seqimap_again:
5017  _c4dbgt("seqimap: go again", 0);
5018  if(_finished_line())
5019  {
5020  if(C4_LIKELY(!_finished_file()))
5021  {
5022  _line_ended();
5023  _scan_line();
5024  _c4dbgnextline();
5025  }
5026  else
5027  {
5028  _c4err("parse error");
5029  }
5030  }
5031  goto seqimap_start;
5032 
5033  seqimap_finish:
5034  _c4dbgp("seqimap: finish");
5035 }
5036 
5037 
5038 //-----------------------------------------------------------------------------
5039 
5040 template<class EventHandler>
5041 void ParseEngine<EventHandler>::_handle_seq_flow()
5042 {
5043 seqflow_start:
5044  _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5045 
5046  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5047  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5048  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
5049  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5050  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
5051  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
5052 
5053  _handle_flow_skip_whitespace();
5054  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
5055  if(!m_evt_handler->m_curr->line_contents.rem.len)
5056  goto seqflow_again;
5057 
5058  if(has_any(RVAL))
5059  {
5060  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5061  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5062  ScannedScalar sc;
5063  if(first == '\'')
5064  {
5065  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5066  sc = _scan_scalar_squot();
5067  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5068  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5069  addrem_flags(RNXT, RVAL);
5070  }
5071  else if(first == '"')
5072  {
5073  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5074  sc = _scan_scalar_dquot();
5075  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5076  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5077  addrem_flags(RNXT, RVAL);
5078  }
5079  // block scalars (ie | and >) cannot appear in flow containers
5080  else if(_scan_scalar_plain_seq_flow(&sc))
5081  {
5082  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5083  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5084  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5085  addrem_flags(RNXT, RVAL);
5086  }
5087  else if(first == '[')
5088  {
5089  _c4dbgp("seqflow[RVAL]: start child seqflow");
5090  addrem_flags(RNXT, RVAL);
5091  m_evt_handler->begin_seq_val_flow();
5092  _set_indentation(m_evt_handler->m_parent->indref);
5093  addrem_flags(RVAL, RNXT);
5094  _line_progressed(1);
5095  }
5096  else if(first == '{')
5097  {
5098  _c4dbgp("seqflow[RVAL]: start child mapflow");
5099  addrem_flags(RNXT, RVAL);
5100  m_evt_handler->begin_map_val_flow();
5101  _set_indentation(m_evt_handler->m_parent->indref);
5102  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5103  _line_progressed(1);
5104  goto seqflow_finish;
5105  }
5106  else if(first == ']') // this happens on cases such as [] or [.., ]
5107  {
5108  _c4dbgp("seqflow[RVAL]: end!");
5109  _line_progressed(1);
5110  _end_seq_flow();
5111  goto seqflow_finish;
5112  }
5113  else if(first == '*')
5114  {
5115  csubstr ref = _scan_ref_seq();
5116  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5117  m_evt_handler->set_val_ref(ref);
5118  addrem_flags(RNXT, RVAL);
5119  }
5120  else if(first == '&')
5121  {
5122  csubstr anchor = _scan_anchor();
5123  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5124  m_evt_handler->set_val_anchor(anchor);
5125  if(_maybe_scan_following_comma())
5126  {
5127  _c4dbgp("seqflow[RVAL]: empty scalar!");
5128  m_evt_handler->set_val_scalar_plain_empty();
5129  m_evt_handler->add_sibling();
5130  }
5131  }
5132  else if(first == '!')
5133  {
5134  csubstr tag = _scan_tag();
5135  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5136  _check_tag(tag);
5137  m_evt_handler->set_val_tag(tag);
5138  if(_maybe_scan_following_comma())
5139  {
5140  _c4dbgp("seqflow[RVAL]: empty scalar!");
5141  m_evt_handler->set_val_scalar_plain_empty();
5142  m_evt_handler->add_sibling();
5143  }
5144  }
5145  else if(first == ':')
5146  {
5147  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5148  addrem_flags(RNXT, RVAL);
5149  m_evt_handler->begin_map_val_flow();
5150  _set_indentation(m_evt_handler->m_parent->indref);
5151  m_evt_handler->set_key_scalar_plain_empty();
5152  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5153  _line_progressed(1);
5154  goto seqflow_finish;
5155  }
5156  else if(first == '?')
5157  {
5158  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5159  addrem_flags(RNXT, RVAL);
5160  m_was_inside_qmrk = true;
5161  m_evt_handler->begin_map_val_flow();
5162  _set_indentation(m_evt_handler->m_parent->indref);
5163  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5164  _line_progressed(1);
5165  _maybe_skip_whitespace_tokens();
5166  goto seqflow_finish;
5167  }
5168  else
5169  {
5170  _c4err("parse error");
5171  }
5172  }
5173  else // RNXT
5174  {
5175  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5176  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5177  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5178  if(first == ',')
5179  {
5180  _c4dbgp("seqflow[RNXT]: expect next val");
5181  addrem_flags(RVAL, RNXT);
5182  m_evt_handler->add_sibling();
5183  _line_progressed(1);
5184  }
5185  else if(first == ']')
5186  {
5187  _c4dbgp("seqflow[RNXT]: end!");
5188  _line_progressed(1);
5189  _end_seq_flow();
5190  goto seqflow_finish;
5191  }
5192  else if(first == ':')
5193  {
5194  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5195  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5196  _set_indentation(m_evt_handler->m_parent->indref);
5197  _line_progressed(1);
5198  addrem_flags(RSEQIMAP|RVAL, RNXT);
5199  goto seqflow_finish;
5200  }
5201  else
5202  {
5203  _c4err("parse error");
5204  }
5205  }
5206 
5207  seqflow_again:
5208  _c4dbgt("seqflow: go again", 0);
5209  if(_finished_line())
5210  {
5211  if(C4_LIKELY(!_finished_file()))
5212  {
5213  _line_ended();
5214  _scan_line();
5215  _c4dbgnextline();
5216  }
5217  else
5218  {
5219  _c4err("missing terminating ]");
5220  }
5221  }
5222  goto seqflow_start;
5223 
5224  seqflow_finish:
5225  _c4dbgp("seqflow: finish");
5226 }
5227 
5228 
5229 //-----------------------------------------------------------------------------
5230 
5231 template<class EventHandler>
5232 void ParseEngine<EventHandler>::_handle_map_flow()
5233 {
5234 mapflow_start:
5235  _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5236 
5237  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5238  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
5239  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5240  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5241 
5242  _handle_flow_skip_whitespace();
5243  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5244  if(!rem.len)
5245  goto mapflow_again;
5246 
5247  if(has_any(RKEY))
5248  {
5249  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5250  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5251  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5252  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5253  const char first = rem.str[0];
5254  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5255  ScannedScalar sc;
5256  if(first == '\'')
5257  {
5258  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5259  sc = _scan_scalar_squot();
5260  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5261  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5262  addrem_flags(RKCL, RKEY|QMRK);
5263  }
5264  else if(first == '"')
5265  {
5266  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5267  sc = _scan_scalar_dquot();
5268  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5269  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5270  addrem_flags(RKCL, RKEY|QMRK);
5271  }
5272  // block scalars (ie | and >) cannot appear in flow containers
5273  else if(_scan_scalar_plain_map_flow(&sc))
5274  {
5275  _c4dbgp("mapflow[RKEY]: plain scalar");
5276  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5277  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5278  addrem_flags(RKCL, RKEY|QMRK);
5279  }
5280  else if(first == '?')
5281  {
5282  _c4dbgp("mapflow[RKEY]: explicit key");
5283  _line_progressed(1);
5284  addrem_flags(QMRK, RKEY);
5285  _maybe_skip_whitespace_tokens();
5286  }
5287  else if(first == ':')
5288  {
5289  _c4dbgp("mapflow[RKEY]: setting empty key");
5290  m_evt_handler->set_key_scalar_plain_empty();
5291  addrem_flags(RVAL, RKEY|QMRK);
5292  _line_progressed(1);
5293  _maybe_skip_whitespace_tokens();
5294  }
5295  else if(first == ',')
5296  {
5297  _c4dbgp("mapflow[RKEY]: empty key+val!");
5298  m_evt_handler->set_key_scalar_plain_empty();
5299  m_evt_handler->set_val_scalar_plain_empty();
5300  addrem_flags(RNXT, RKEY|QMRK);
5301  // keep going in this function
5302  }
5303  else if(first == '}') // this happens on a trailing comma like ", }"
5304  {
5305  _c4dbgp("mapflow[RKEY]: end!");
5306  _line_progressed(1);
5307  _end_map_flow();
5308  goto mapflow_finish;
5309  }
5310  else if(first == '&')
5311  {
5312  csubstr anchor = _scan_anchor();
5313  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5314  m_evt_handler->set_key_anchor(anchor);
5315  }
5316  else if(first == '*')
5317  {
5318  csubstr ref = _scan_ref_map();
5319  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5320  m_evt_handler->set_key_ref(ref);
5321  addrem_flags(RKCL, RKEY);
5322  }
5323  else if(first == '[')
5324  {
5325  // RYML's tree cannot store container keys, but that's
5326  // handled inside the tree event handler. Other handler
5327  // types may be able to handle it.
5328  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5329  addrem_flags(RKCL, RKEY);
5330  m_evt_handler->begin_seq_key_flow();
5331  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5332  _set_indentation(m_evt_handler->m_parent->indref);
5333  _line_progressed(1);
5334  goto mapflow_finish;
5335  }
5336  else if(first == '{')
5337  {
5338  // RYML's tree cannot store container keys, but that's
5339  // handled inside the tree event handler. Other handler
5340  // types may be able to handle it.
5341  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5342  addrem_flags(RKCL, RKEY);
5343  m_evt_handler->begin_map_key_flow();
5344  addrem_flags(RKEY, RVAL|RKCL);
5345  _set_indentation(m_evt_handler->m_parent->indref);
5346  _line_progressed(1);
5347  // keep going in this function
5348  }
5349  else if(first == '!')
5350  {
5351  csubstr tag = _scan_tag();
5352  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5353  _check_tag(tag);
5354  m_evt_handler->set_key_tag(tag);
5355  }
5356  else
5357  {
5358  _c4err("parse error");
5359  }
5360  }
5361  else if(has_any(RKCL)) // read the key colon
5362  {
5363  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5364  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5365  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5366  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5367  const char first = rem.str[0];
5368  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5369  if(first == ':')
5370  {
5371  _c4dbgp("mapflow[RKCL]: found the colon");
5372  addrem_flags(RVAL, RKCL);
5373  _line_progressed(1);
5374  }
5375  else if(first == '}')
5376  {
5377  _c4dbgp("mapflow[RKCL]: end with missing val!");
5378  addrem_flags(RVAL, RKCL);
5379  m_evt_handler->set_val_scalar_plain_empty();
5380  _line_progressed(1);
5381  _end_map_flow();
5382  goto mapflow_finish;
5383  }
5384  else if(first == ',')
5385  {
5386  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5387  m_evt_handler->set_val_scalar_plain_empty();
5388  m_evt_handler->add_sibling();
5389  addrem_flags(RKEY, RKCL);
5390  _line_progressed(1);
5391  }
5392  else
5393  {
5394  _c4err("parse error");
5395  }
5396  }
5397  else if(has_any(RVAL))
5398  {
5399  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5400  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5401  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5402  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5403  const char first = rem.str[0];
5404  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5405  ScannedScalar sc;
5406  if(first == '\'')
5407  {
5408  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5409  sc = _scan_scalar_squot();
5410  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5411  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5412  addrem_flags(RNXT, RVAL);
5413  }
5414  else if(first == '"')
5415  {
5416  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5417  sc = _scan_scalar_dquot();
5418  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5419  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5420  addrem_flags(RNXT, RVAL);
5421  }
5422  // block scalars (ie | and >) cannot appear in flow containers
5423  else if(_scan_scalar_plain_map_flow(&sc))
5424  {
5425  _c4dbgp("mapflow[RVAL]: plain scalar.");
5426  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5427  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5428  addrem_flags(RNXT, RVAL);
5429  }
5430  else if(first == '[')
5431  {
5432  _c4dbgp("mapflow[RVAL]: start val seqflow");
5433  addrem_flags(RNXT, RVAL);
5434  m_evt_handler->begin_seq_val_flow();
5435  _set_indentation(m_evt_handler->m_parent->indref);
5436  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5437  _line_progressed(1);
5438  goto mapflow_finish;
5439  }
5440  else if(first == '{')
5441  {
5442  _c4dbgp("mapflow[RVAL]: start val mapflow");
5443  addrem_flags(RNXT, RVAL);
5444  m_evt_handler->begin_map_val_flow();
5445  _set_indentation(m_evt_handler->m_parent->indref);
5446  addrem_flags(RKEY, RNXT);
5447  _line_progressed(1);
5448  // keep going in this function
5449  }
5450  else if(first == '}')
5451  {
5452  _c4dbgp("mapflow[RVAL]: end!");
5453  m_evt_handler->set_val_scalar_plain_empty();
5454  _line_progressed(1);
5455  _end_map_flow();
5456  goto mapflow_finish;
5457  }
5458  else if(first == ',')
5459  {
5460  _c4dbgp("mapflow[RVAL]: empty val!");
5461  m_evt_handler->set_val_scalar_plain_empty();
5462  addrem_flags(RNXT, RVAL);
5463  // keep going in this function
5464  }
5465  else if(first == '*')
5466  {
5467  csubstr ref = _scan_ref_map();
5468  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5469  m_evt_handler->set_val_ref(ref);
5470  addrem_flags(RNXT, RVAL);
5471  }
5472  else if(first == '&')
5473  {
5474  csubstr anchor = _scan_anchor();
5475  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5476  m_evt_handler->set_val_anchor(anchor);
5477  }
5478  else if(first == '!')
5479  {
5480  csubstr tag = _scan_tag();
5481  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5482  _check_tag(tag);
5483  m_evt_handler->set_val_tag(tag);
5484  }
5485  else
5486  {
5487  _c4err("parse error");
5488  }
5489  }
5490  else if(has_any(RNXT))
5491  {
5492  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5493  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5494  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5495  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5496  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5497  if(rem.begins_with(','))
5498  {
5499  _c4dbgp("mapflow[RNXT]: expect next keyval");
5500  m_evt_handler->add_sibling();
5501  addrem_flags(RKEY, RNXT);
5502  _line_progressed(1);
5503  }
5504  else if(rem.begins_with('}'))
5505  {
5506  _c4dbgp("mapflow[RNXT]: end!");
5507  _line_progressed(1);
5508  _end_map_flow();
5509  goto mapflow_finish;
5510  }
5511  else
5512  {
5513  _c4err("parse error");
5514  }
5515  }
5516  else if(has_any(QMRK))
5517  {
5518  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5519  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5520  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5521  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5522  const char first = rem.str[0];
5523  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5524  ScannedScalar sc;
5525  if(first == '\'')
5526  {
5527  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5528  sc = _scan_scalar_squot();
5529  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5530  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5531  addrem_flags(RKCL, QMRK);
5532  }
5533  else if(first == '"')
5534  {
5535  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5536  sc = _scan_scalar_dquot();
5537  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5538  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5539  addrem_flags(RKCL, QMRK);
5540  }
5541  // block scalars (ie | and >) cannot appear in flow containers
5542  else if(_scan_scalar_plain_map_flow(&sc))
5543  {
5544  _c4dbgp("mapflow[QMRK]: plain scalar");
5545  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5546  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5547  addrem_flags(RKCL, QMRK);
5548  }
5549  else if(first == ':')
5550  {
5551  _c4dbgp("mapflow[QMRK]: setting empty key");
5552  m_evt_handler->set_key_scalar_plain_empty();
5553  addrem_flags(RVAL, QMRK);
5554  _line_progressed(1);
5555  _maybe_skip_whitespace_tokens();
5556  }
5557  else if(first == '}') // this happens on a trailing comma like ", }"
5558  {
5559  _c4dbgp("mapflow[QMRK]: end!");
5560  m_evt_handler->set_key_scalar_plain_empty();
5561  m_evt_handler->set_val_scalar_plain_empty();
5562  _end_map_flow();
5563  _line_progressed(1);
5564  goto mapflow_finish;
5565  }
5566  else if(first == ',')
5567  {
5568  _c4dbgp("mapflow[QMRK]: empty key+val!");
5569  m_evt_handler->set_key_scalar_plain_empty();
5570  m_evt_handler->set_val_scalar_plain_empty();
5571  addrem_flags(RNXT, QMRK);
5572  }
5573  else if(first == '&')
5574  {
5575  csubstr anchor = _scan_anchor();
5576  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5577  m_evt_handler->set_key_anchor(anchor);
5578  }
5579  else if(first == '*')
5580  {
5581  csubstr ref = _scan_ref_map();
5582  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5583  m_evt_handler->set_key_ref(ref);
5584  addrem_flags(RKCL, QMRK);
5585  }
5586  else if(first == '[')
5587  {
5588  // RYML's tree cannot store container keys, but that's
5589  // handled inside the tree sink. Other sink types may be
5590  // able to handle it.
5591  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5592  addrem_flags(RKCL, QMRK);
5593  m_evt_handler->begin_seq_key_flow();
5594  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5595  _set_indentation(m_evt_handler->m_parent->indref);
5596  _line_progressed(1);
5597  goto mapflow_finish;
5598  }
5599  else if(first == '{')
5600  {
5601  // RYML's tree cannot store container keys, but that's
5602  // handled inside the tree sink. Other sink types may be
5603  // able to handle it.
5604  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5605  addrem_flags(RKCL, QMRK);
5606  m_evt_handler->begin_map_key_flow();
5607  _set_indentation(m_evt_handler->m_parent->indref);
5608  addrem_flags(RKEY, RKCL);
5609  _line_progressed(1);
5610  // keep going in this function
5611  }
5612  else if(first == '!')
5613  {
5614  csubstr tag = _scan_tag();
5615  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5616  _check_tag(tag);
5617  m_evt_handler->set_key_tag(tag);
5618  }
5619  else
5620  {
5621  _c4err("parse error");
5622  }
5623  }
5624 
5625  mapflow_again:
5626  _c4dbgt("mapflow: go again", 0);
5627  if(_finished_line())
5628  {
5629  if(C4_LIKELY(!_finished_file()))
5630  {
5631  _line_ended();
5632  _scan_line();
5633  _c4dbgnextline();
5634  }
5635  else
5636  {
5637  _c4err("missing terminating }");
5638  }
5639  }
5640  goto mapflow_start;
5641 
5642  mapflow_finish:
5643  _c4dbgp("mapflow: finish");
5644 }
5645 
5646 
5647 //-----------------------------------------------------------------------------
5648 
5649 template<class EventHandler>
5650 void ParseEngine<EventHandler>::_handle_seq_block()
5651 {
5652 seqblck_start:
5653  _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5654 
5655  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5656  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK));
5657  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5658  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5659 
5660  _maybe_skip_comment();
5661  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5662  if(!rem.len)
5663  goto seqblck_again;
5664 
5665  if(has_any(RVAL))
5666  {
5667  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5668  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5669  if(m_evt_handler->m_curr->at_line_beginning())
5670  {
5671  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5672  if(m_evt_handler->m_curr->indentation_ge())
5673  {
5674  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5675  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5676  rem = m_evt_handler->m_curr->line_contents.rem;
5677  if(!rem.len)
5678  goto seqblck_again;
5679  }
5680  else if(m_evt_handler->m_curr->indentation_lt())
5681  {
5682  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5683  _handle_indentation_pop_from_block_seq();
5684  goto seqblck_finish;
5685  }
5686  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5687  {
5688  _c4dbgp("seqblck[RVAL]: empty line!");
5689  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5690  goto seqblck_again;
5691  }
5692  }
5693  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5694  else
5695  {
5696  // accomodate annotation on the previous line. eg:
5697  // - &elm
5698  // foo # <-- on this line
5699  // - &elm
5700  // &foo foo: bar # <-- on this line
5701  if(rem.str[0] == ' ')
5702  {
5703  if(_handle_indentation_from_annotations())
5704  {
5705  _c4dbgp("seqblck[RVAL]: annotations!");
5706  rem = m_evt_handler->m_curr->line_contents.rem;
5707  if(!rem.len)
5708  goto seqblck_again;
5709  }
5710  }
5711  }
5712  #endif
5713  _RYML_ASSERT_BASIC_(callbacks(), rem.len);
5714  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5715  const char first = rem.str[0];
5716  const size_t startline = m_evt_handler->m_curr->pos.line;
5717  // warning: the gcc optimizer on x86 builds is brittle with
5718  // this function:
5719  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
5720  ScannedScalar sc;
5721  if(first == '\'')
5722  {
5723  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5724  sc = _scan_scalar_squot();
5725  if(!_maybe_scan_following_colon())
5726  {
5727  _c4dbgp("seqblck[RVAL]: set as val");
5728  _handle_annotations_before_blck_val_scalar();
5729  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5730  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5731  addrem_flags(RNXT, RVAL);
5732  }
5733  else
5734  {
5735  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5736  addrem_flags(RNXT, RVAL);
5737  _handle_annotations_before_start_mapblck(startline);
5738  _handle_colon();
5739  m_evt_handler->begin_map_val_block();
5740  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5741  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5742  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5743  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5744  _maybe_skip_whitespace_tokens();
5745  goto seqblck_finish;
5746  }
5747  }
5748  else if(first == '"')
5749  {
5750  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5751  sc = _scan_scalar_dquot();
5752  if(!_maybe_scan_following_colon())
5753  {
5754  _c4dbgp("seqblck[RVAL]: set as val");
5755  _handle_annotations_before_blck_val_scalar();
5756  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5757  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5758  addrem_flags(RNXT, RVAL);
5759  }
5760  else
5761  {
5762  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5763  addrem_flags(RNXT, RVAL);
5764  _handle_annotations_before_start_mapblck(startline);
5765  _handle_colon();
5766  m_evt_handler->begin_map_val_block();
5767  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5768  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5769  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5770  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5771  _maybe_skip_whitespace_tokens();
5772  goto seqblck_finish;
5773  }
5774  }
5775  // block scalars can only appear as keys when in QMRK scope
5776  // (ie, after ? tokens), so no need to scan following colon in
5777  // here.
5778  else if(first == '|')
5779  {
5780  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5781  ScannedBlock sb;
5782  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5783  _handle_annotations_before_blck_val_scalar();
5784  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5785  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5786  addrem_flags(RNXT, RVAL);
5787  }
5788  else if(first == '>')
5789  {
5790  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5791  ScannedBlock sb;
5792  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5793  _handle_annotations_before_blck_val_scalar();
5794  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5795  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5796  addrem_flags(RNXT, RVAL);
5797  }
5798  else if(_scan_scalar_plain_seq_blck(&sc))
5799  {
5800  _c4dbgp("seqblck[RVAL]: plain scalar.");
5801  if(!_maybe_scan_following_colon())
5802  {
5803  _c4dbgp("seqblck[RVAL]: set as val");
5804  _handle_annotations_before_blck_val_scalar();
5805  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
5806  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5807  addrem_flags(RNXT, RVAL);
5808  }
5809  else
5810  {
5811  if(startindent > m_evt_handler->m_curr->indref)
5812  {
5813  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5814  addrem_flags(RNXT, RVAL);
5815  _handle_annotations_before_start_mapblck(startline);
5816  _handle_colon();
5817  m_evt_handler->begin_map_val_block();
5818  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5819  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5820  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5821  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5822  _maybe_skip_whitespace_tokens();
5823  goto seqblck_finish;
5824  }
5825  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|RBLCK, m_evt_handler->m_parent))
5826  {
5827  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5828  m_evt_handler->set_val_scalar_plain_empty();
5829  m_evt_handler->end_seq_block();
5830  m_evt_handler->add_sibling();
5831  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5832  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5833  addrem_flags(RVAL, RNXT|RKEY);
5834  _maybe_skip_whitespace_tokens();
5835  goto seqblck_finish;
5836  }
5837  else
5838  {
5839  _c4err("parse error");
5840  }
5841  }
5842  }
5843  else if(first == '[')
5844  {
5845  _c4dbgp("seqblck[RVAL]: start child seqflow");
5846  addrem_flags(RNXT, RVAL);
5847  _handle_annotations_before_blck_val_scalar();
5848  m_evt_handler->begin_seq_val_flow();
5849  addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
5850  _line_progressed(1);
5851  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5852  goto seqblck_finish;
5853  }
5854  else if(first == '{')
5855  {
5856  _c4dbgp("seqblck[RVAL]: start child mapflow");
5857  addrem_flags(RNXT, RVAL);
5858  _handle_annotations_before_blck_val_scalar();
5859  m_evt_handler->begin_map_val_flow();
5860  addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
5861  _line_progressed(1);
5862  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5863  goto seqblck_finish;
5864  }
5865  else if(first == '-')
5866  {
5867  if(startindent == m_evt_handler->m_curr->indref)
5868  {
5869  _c4dbgp("seqblck[RVAL]: prev val was empty");
5870  _handle_annotations_before_blck_val_scalar();
5871  m_evt_handler->set_val_scalar_plain_empty();
5872  // keep in RVAL, but for the next sibling
5873  m_evt_handler->add_sibling();
5874  }
5875  else
5876  {
5877  _c4dbgp("seqblck[RVAL]: start child seqblck");
5878  _RYML_ASSERT_BASIC_(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5879  addrem_flags(RNXT, RVAL);
5880  _handle_annotations_before_blck_val_scalar();
5881  m_evt_handler->begin_seq_val_block();
5882  addrem_flags(RVAL, RNXT);
5883  _set_indentation(startindent);
5884  // keep going on inside this function
5885  }
5886  _line_progressed(1);
5887  _maybe_skip_whitespace_tokens();
5888  }
5889  else if(first == ':')
5890  {
5891  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5892  addrem_flags(RNXT, RVAL);
5893  _handle_annotations_before_start_mapblck(startline);
5894  _handle_colon();
5895  m_evt_handler->begin_map_val_block();
5896  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5897  m_evt_handler->set_key_scalar_plain_empty();
5898  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5899  _line_progressed(1);
5900  _maybe_skip_whitespace_tokens();
5901  goto seqblck_finish;
5902  }
5903  else if(first == '&')
5904  {
5905  const csubstr anchor = _scan_anchor();
5906  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5907  // we need to buffer the anchors, as there may be two
5908  // consecutive anchors in here
5909  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5910  }
5911  else if(first == '*')
5912  {
5913  csubstr ref = _scan_ref_seq();
5914  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5915  if(!_maybe_scan_following_colon())
5916  {
5917  _c4dbgp("seqblck[RVAL]: set ref as val!");
5918  _handle_annotations_before_blck_val_scalar();
5919  m_evt_handler->set_val_ref(ref);
5920  addrem_flags(RNXT, RVAL);
5921  }
5922  else
5923  {
5924  _c4dbgp("seqblck[RVAL]: ref is key of map");
5925  addrem_flags(RNXT, RVAL);
5926  _handle_annotations_before_start_mapblck(startline);
5927  m_evt_handler->begin_map_val_block();
5928  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5929  m_evt_handler->set_key_ref(ref);
5930  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5931  _set_indentation(startindent);
5932  _maybe_skip_whitespace_tokens();
5933  goto seqblck_finish;
5934  }
5935  }
5936  else if(first == '!')
5937  {
5938  csubstr tag = _scan_tag();
5939  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5940  // we need to buffer the tags, as there may be two
5941  // consecutive tags in here
5942  _add_annotation(&m_pending_tags, tag, startindent, startline);
5943  }
5944  else if(first == '?')
5945  {
5946  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5947  addrem_flags(RNXT, RVAL);
5948  m_was_inside_qmrk = true;
5949  m_evt_handler->begin_map_val_block();
5950  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5951  _set_indentation(startindent);
5952  _line_progressed(1);
5953  _maybe_skip_whitespace_tokens();
5954  goto seqblck_finish;
5955  }
5956  else
5957  {
5958  _c4err("parse error");
5959  }
5960  }
5961  else // RNXT
5962  {
5963  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5964  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5965  //
5966  // handle indentation
5967  //
5968  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5969  if(C4_LIKELY(_at_line_begin()))
5970  {
5971  _c4dbgp("seqblck[RNXT]: at line begin");
5972  if(m_evt_handler->m_curr->indentation_ge())
5973  {
5974  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5975  _line_progressed(m_evt_handler->m_curr->indref);
5976  _maybe_skip_whitespace_tokens();
5977  rem = m_evt_handler->m_curr->line_contents.rem;
5978  if(!rem.len)
5979  goto seqblck_again;
5980  }
5981  else if(m_evt_handler->m_curr->indentation_lt())
5982  {
5983  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5984  _handle_indentation_pop_from_block_seq();
5985  if(has_all(RSEQ|RBLCK))
5986  {
5987  _c4dbgp("seqblck[RNXT]: still seqblck!");
5988  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5989  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5990  rem = m_evt_handler->m_curr->line_contents.rem;
5991  if(!rem.len)
5992  goto seqblck_again;
5993  }
5994  else
5995  {
5996  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5997  goto seqblck_finish;
5998  }
5999  }
6000  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6001  {
6002  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6003  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6004  rem = m_evt_handler->m_curr->line_contents.rem;
6005  if(!rem.len)
6006  goto seqblck_again;
6007  }
6008  }
6009  else
6010  {
6011  _c4dbgp("seqblck[RNXT]: NOT at line begin");
6012  if(!rem.begins_with_any(" \t"))
6013  {
6014  _c4err("parse error");
6015  }
6016  else
6017  {
6018  _skipchars(" \t");
6019  rem = m_evt_handler->m_curr->line_contents.rem;
6020  if(!rem.len)
6021  {
6022  _c4dbgp("seqblck[RNXT]: again");
6023  goto seqblck_again;
6024  }
6025  }
6026  }
6027  //
6028  // now handle the tokens
6029  //
6030  const char first = rem.str[0];
6031  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
6032  if(first == '-')
6033  {
6034  if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
6035  {
6036  _c4dbgp("seqblck[RNXT]: expect next val");
6037  addrem_flags(RVAL, RNXT);
6038  m_evt_handler->add_sibling();
6039  _line_progressed(1);
6040  _maybe_skip_whitespace_tokens();
6041  }
6042  else
6043  {
6044  _c4dbgp("seqblck[RNXT]: start doc");
6045  _start_doc_suddenly();
6046  _line_progressed(3);
6047  _maybe_skip_whitespace_tokens();
6048  goto seqblck_finish;
6049  }
6050  }
6051  else if(first == ':')
6052  {
6053  // This happens for example in `- [a: b]: c` (after
6054  // terminating the seq, ie, after `]`). All other cases
6055  // (ie colon after scalars) are caught elsewhere (ie, in
6056  // RVAL state).
6057  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6058  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
6059  {
6060  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6061  m_evt_handler->end_seq_block();
6062  goto seqblck_finish;
6063  }
6064  else
6065  {
6066  _c4err("parse error");
6067  }
6068  }
6069  else if(first == '.')
6070  {
6071  _c4dbgp("seqblck[RNXT]: maybe doc?");
6072  csubstr rs = rem.sub(1);
6073  if(rs == ".." || rs.begins_with(".. "))
6074  {
6075  _c4dbgp("seqblck[RNXT]: end+start doc");
6076  _end_doc_suddenly();
6077  _line_progressed(3);
6078  _maybe_skip_whitespace_tokens();
6079  goto seqblck_finish;
6080  }
6081  else
6082  {
6083  _c4err("parse error");
6084  }
6085  }
6086  else
6087  {
6088  // may be an indentless sequence nested in a map...
6089  //if(m_evt_handler->m_stack.size() >= 2)
6090  #ifdef RYML_DBG
6091  char flagbuf_[128];
6092  for(auto const& s : m_evt_handler->m_stack)
6093  {
6094  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6095  }
6096  #endif
6097  if(m_evt_handler->m_parent && has_all(RMAP|RBLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6098  {
6099  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6100  _RYML_ASSERT_BASIC_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6101  _handle_indentation_pop(m_evt_handler->m_parent);
6102  _RYML_ASSERT_BASIC_(this->callbacks(), has_all(RMAP|RBLCK));
6103  m_evt_handler->add_sibling();
6104  addrem_flags(RKEY, RNXT);
6105  goto seqblck_finish;
6106  }
6107  else //if(first != '*')
6108  {
6109  _c4err("parse error");
6110  }
6111  }
6112  }
6113 
6114  seqblck_again:
6115  _c4dbgt("seqblck: go again", 0);
6116  if(_finished_line())
6117  {
6118  m_bom_len = 0;
6119  _line_ended();
6120  _scan_line();
6121  if(_finished_file())
6122  {
6123  _c4dbgp("seqblck: finish!");
6124  _end_seq_blck();
6125  goto seqblck_finish;
6126  }
6127  _c4dbgnextline();
6128  }
6129  goto seqblck_start;
6130 
6131  seqblck_finish:
6132  _c4dbgp("seqblck: finish");
6133 }
6134 
6135 
6136 //-----------------------------------------------------------------------------
6137 
6138 template<class EventHandler>
6139 void ParseEngine<EventHandler>::_handle_map_block()
6140 {
6141 mapblck_start:
6142  _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6143 
6144  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
6145  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
6146  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK));
6147  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
6148  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
6149 
6150  _maybe_skip_comment();
6151  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6152  if(!rem.len)
6153  goto mapblck_again;
6154 
6155  if(has_any(RKEY))
6156  {
6157  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6158  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6159  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6160  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6161  //
6162  // handle indentation
6163  //
6164  if(m_evt_handler->m_curr->at_line_beginning())
6165  {
6166  if(m_evt_handler->m_curr->indentation_eq())
6167  {
6168  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6169  _line_progressed(m_evt_handler->m_curr->indref);
6170  rem = m_evt_handler->m_curr->line_contents.rem;
6171  if(!rem.len)
6172  goto mapblck_again;
6173  }
6174  else if(m_evt_handler->m_curr->indentation_lt())
6175  {
6176  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6177  _handle_indentation_pop_from_block_map();
6178  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6179  if(has_all(RMAP|RBLCK))
6180  {
6181  _c4dbgp("mapblck[RKEY]: still mapblck!");
6182  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
6183  rem = m_evt_handler->m_curr->line_contents.rem;
6184  if(!rem.len)
6185  goto mapblck_again;
6186  }
6187  else
6188  {
6189  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6190  goto mapblck_finish;
6191  }
6192  }
6193  else
6194  {
6195  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6196  _c4err("invalid indentation");
6197  }
6198  }
6199  //
6200  // now handle the tokens
6201  //
6202  const char first = rem.str[0];
6203  const size_t startline = m_evt_handler->m_curr->pos.line;
6204  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6205  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6206  ScannedScalar sc;
6207  if(first == '\'')
6208  {
6209  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6210  sc = _scan_scalar_squot();
6211  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6212  _handle_annotations_before_blck_key_scalar();
6213  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6214  addrem_flags(RVAL, RKEY);
6215  if(!_maybe_scan_following_colon())
6216  _c4err("could not find ':' colon after key");
6217  _maybe_skip_whitespace_tokens();
6218  }
6219  else if(first == '"')
6220  {
6221  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6222  sc = _scan_scalar_dquot();
6223  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6224  _handle_annotations_before_blck_key_scalar();
6225  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6226  addrem_flags(RVAL, RKEY);
6227  if(!_maybe_scan_following_colon())
6228  _c4err("could not find ':' colon after key");
6229  _maybe_skip_whitespace_tokens();
6230  }
6231  // block scalars (| and >) can not be used as keys unless they
6232  // appear in an explicit QMRK scope (ie, after the ? token),
6233  else if(C4_UNLIKELY(first == '|'))
6234  {
6235  _c4err("block map: literal keys must be enclosed in '?'");
6236  }
6237  else if(C4_UNLIKELY(first == '>'))
6238  {
6239  _c4err("block map: folded keys must be enclosed in '?'");
6240  }
6241  else if(_scan_scalar_plain_map_blck(&sc))
6242  {
6243  _c4dbgp("mapblck[RKEY]: plain scalar");
6244  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6245  _handle_annotations_before_blck_key_scalar();
6246  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6247  addrem_flags(RVAL, RKEY);
6248  if(!_maybe_scan_following_colon())
6249  _c4err("could not find ':' colon after key");
6250  _maybe_skip_whitespace_tokens();
6251  }
6252  else if(first == '?')
6253  {
6254  _c4dbgp("mapblck[RKEY]: key token!");
6255  addrem_flags(QMRK, RKEY);
6256  _line_progressed(1);
6257  _maybe_skip_whitespace_tokens();
6258  m_was_inside_qmrk = true;
6259  goto mapblck_again;
6260  }
6261  else if(first == ':')
6262  {
6263  _c4dbgp("mapblck[RKEY]: setting empty key");
6264  _handle_annotations_before_blck_key_scalar();
6265  m_evt_handler->set_key_scalar_plain_empty();
6266  addrem_flags(RVAL, RKEY);
6267  _line_progressed(1);
6268  _maybe_skip_whitespace_tokens();
6269  }
6270  else if(first == '*')
6271  {
6272  csubstr ref = _scan_ref_map();
6273  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6274  _handle_annotations_before_blck_key_scalar();
6275  m_evt_handler->set_key_ref(ref);
6276  addrem_flags(RVAL, RKEY);
6277  if(!_maybe_scan_following_colon())
6278  _c4err("could not find ':' colon after key");
6279  _maybe_skip_whitespace_tokens();
6280  }
6281  else if(first == '&')
6282  {
6283  csubstr anchor = _scan_anchor();
6284  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6285  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6286  }
6287  else if(first == '!')
6288  {
6289  csubstr tag = _scan_tag();
6290  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6291  _add_annotation(&m_pending_tags, tag, startindent, startline);
6292  }
6293  else if(first == '[')
6294  {
6295  // RYML's tree cannot store container keys, but that's
6296  // handled inside the tree handler. Other handlers may be
6297  // able to handle it.
6298  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6299  addrem_flags(RKCL, RKEY);
6300  _handle_annotations_before_blck_key_scalar();
6301  m_evt_handler->begin_seq_key_flow();
6302  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKCL);
6303  _line_progressed(1);
6304  _set_indentation(startindent);
6305  goto mapblck_finish;
6306  }
6307  else if(first == '{')
6308  {
6309  // RYML's tree cannot store container keys, but that's
6310  // handled inside the tree handler. Other handlers may be
6311  // able to handle it.
6312  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6313  addrem_flags(RKCL, RKEY);
6314  _handle_annotations_before_blck_key_scalar();
6315  m_evt_handler->begin_map_key_flow();
6316  addrem_flags(RFLOW|RKEY, RBLCK|RKCL);
6317  _line_progressed(1);
6318  _set_indentation(startindent);
6319  goto mapblck_finish;
6320  }
6321  else if(first == '-')
6322  {
6323  _c4dbgp("mapblck[RKEY]: maybe doc?");
6324  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6325  {
6326  _c4dbgp("mapblck[RKEY]: end+start doc");
6327  _start_doc_suddenly();
6328  _line_progressed(3);
6329  _maybe_skip_whitespace_tokens();
6330  goto mapblck_finish;
6331  }
6332  else
6333  {
6334  _c4err("parse error");
6335  }
6336  }
6337  else if(first == '.')
6338  {
6339  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6340  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6341  {
6342  _c4dbgp("mapblck[RKEY]: end doc");
6343  _end_doc_suddenly();
6344  _line_progressed(3);
6345  _maybe_skip_whitespace_tokens();
6346  goto mapblck_finish;
6347  }
6348  else
6349  {
6350  _c4err("parse error");
6351  }
6352  }
6354  else if(first == '\t')
6355  {
6356  _c4dbgp("mapblck[RKEY]: skip tabs");
6357  _maybe_skipchars('\t');
6358  })
6359  else
6360  {
6361  _c4err("parse error");
6362  }
6363  }
6364  else if(has_any(RKCL)) // read the key colon
6365  {
6366  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6367  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6368  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6369  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6370  //
6371  // handle indentation
6372  //
6373  if(m_evt_handler->m_curr->at_line_beginning())
6374  {
6375  if(m_evt_handler->m_curr->indentation_eq())
6376  {
6377  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6378  _line_progressed(m_evt_handler->m_curr->indref);
6379  rem = m_evt_handler->m_curr->line_contents.rem;
6380  if(!rem.len)
6381  goto mapblck_again;
6382  }
6383  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6384  {
6385  _c4err("invalid indentation");
6386  }
6387  }
6388  const char first = rem.str[0];
6389  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6390  if(first == ':')
6391  {
6392  _c4dbgp("mapblck[RKCL]: found the colon");
6393  addrem_flags(RVAL, RKCL);
6394  _line_progressed(1);
6395  _maybe_skip_whitespace_tokens();
6396  }
6397  else if(first == '?')
6398  {
6399  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6400  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6401  m_evt_handler->set_val_scalar_plain_empty();
6402  m_evt_handler->add_sibling();
6403  addrem_flags(QMRK, RKCL);
6404  _line_progressed(1);
6405  _maybe_skip_whitespace_tokens();
6406  }
6407  else if(first == '-')
6408  {
6409  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6410  {
6411  _c4dbgp("mapblck[RKCL]: end+start doc");
6412  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6413  _start_doc_suddenly();
6414  _line_progressed(3);
6415  _maybe_skip_whitespace_tokens();
6416  goto mapblck_finish;
6417  }
6418  else
6419  {
6420  _c4err("parse error");
6421  }
6422  }
6423  else if(first == '.')
6424  {
6425  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6426  csubstr rs = rem.sub(1);
6427  if(rs == ".." || rs.begins_with(".. "))
6428  {
6429  _c4dbgp("mapblck[RKCL]: end+start doc");
6430  _end_doc_suddenly();
6431  _line_progressed(3);
6432  goto mapblck_finish;
6433  }
6434  else
6435  {
6436  _c4err("parse error");
6437  }
6438  }
6439  else if(m_was_inside_qmrk)
6440  {
6441  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6442  _c4dbgp("mapblck[RKCL]: missing :");
6443  m_evt_handler->set_val_scalar_plain_empty();
6444  m_evt_handler->add_sibling();
6445  m_was_inside_qmrk = false;
6446  addrem_flags(RKEY, RKCL);
6447  }
6448  else
6449  {
6450  _c4err("parse error");
6451  }
6452  }
6453  else if(has_any(RVAL))
6454  {
6455  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6456  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6457  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6458  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6459  //
6460  // handle indentation
6461  //
6462  if(m_evt_handler->m_curr->at_line_beginning())
6463  {
6464  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6465  m_evt_handler->m_curr->more_indented = false;
6466  if(m_evt_handler->m_curr->indref == npos)
6467  {
6468  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6469  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6470  _line_progressed(m_evt_handler->m_curr->indref);
6471  rem = m_evt_handler->m_curr->line_contents.rem;
6472  if(!rem.len)
6473  goto mapblck_again;
6474  }
6475  else if(m_evt_handler->m_curr->indentation_eq())
6476  {
6477  _c4dbgp("mapblck[RVAL]: skip indentation!");
6478  _line_progressed(m_evt_handler->m_curr->indref);
6479  rem = m_evt_handler->m_curr->line_contents.rem;
6480  if(!rem.len)
6481  goto mapblck_again;
6482  // TODO: this is valid:
6483  //
6484  // ```yaml
6485  // a:
6486  // b:
6487  // ---
6488  // a:
6489  // b
6490  // ---
6491  // a:
6492  // b: c
6493  // ```
6494  //
6495  // ... but this is not:
6496  //
6497  // ```yaml
6498  // a:
6499  // v
6500  // ---
6501  // a: b: c
6502  // ```
6503  //
6504  // here, we probably need to set a boolean on the state
6505  // to disambiguate between these cases.
6506  }
6507  else if(m_evt_handler->m_curr->indentation_gt())
6508  {
6509  _c4dbgp("mapblck[RVAL]: more indented!");
6510  m_evt_handler->m_curr->more_indented = true;
6511  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6512  rem = m_evt_handler->m_curr->line_contents.rem;
6513  if(!rem.len)
6514  goto mapblck_again;
6515  }
6516  else if(m_evt_handler->m_curr->indentation_lt())
6517  {
6518  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6519  _handle_indentation_pop_from_block_map();
6520  if(has_all(RMAP|RBLCK))
6521  {
6522  _c4dbgp("mapblck[RVAL]: still mapblck!");
6523  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6524  if(has_any(RNXT))
6525  {
6526  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6527  m_evt_handler->add_sibling();
6528  addrem_flags(RKEY, RNXT);
6529  }
6530  goto mapblck_again;
6531  }
6532  else
6533  {
6534  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6535  goto mapblck_finish;
6536  }
6537  }
6538  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6539  {
6540  _c4dbgp("mapblck[RVAL]: empty line!");
6541  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6542  goto mapblck_again;
6543  }
6544  }
6545  //
6546  // now handle the tokens
6547  //
6548  const char first = rem.str[0];
6549  const size_t startline = m_evt_handler->m_curr->pos.line;
6550  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6551  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6552  ScannedScalar sc;
6553  if(first == '\'')
6554  {
6555  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6556  sc = _scan_scalar_squot();
6557  if(!_maybe_scan_following_colon())
6558  {
6559  _c4dbgp("mapblck[RVAL]: set as val");
6560  _handle_annotations_before_blck_val_scalar();
6561  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6562  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6563  addrem_flags(RNXT, RVAL);
6564  }
6565  else
6566  {
6567  if(startindent != m_evt_handler->m_curr->indref)
6568  {
6569  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6570  _handle_annotations_before_start_mapblck(startline);
6571  addrem_flags(RNXT, RVAL);
6572  _handle_colon();
6573  m_evt_handler->begin_map_val_block();
6574  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6575  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6576  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6577  _maybe_skip_whitespace_tokens();
6578  // keep the child state on RVAL
6579  addrem_flags(RVAL, RNXT);
6580  }
6581  else
6582  {
6583  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6584  m_evt_handler->set_val_scalar_plain_empty();
6585  m_evt_handler->add_sibling();
6586  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6587  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6588  // keep going on RVAL
6589  _maybe_skip_whitespace_tokens();
6590  }
6591  }
6592  }
6593  else if(first == '"')
6594  {
6595  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6596  sc = _scan_scalar_dquot();
6597  if(!_maybe_scan_following_colon())
6598  {
6599  _c4dbgp("mapblck[RVAL]: set as val");
6600  _handle_annotations_before_blck_val_scalar();
6601  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6602  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6603  addrem_flags(RNXT, RVAL);
6604  }
6605  else
6606  {
6607  if(startindent != m_evt_handler->m_curr->indref)
6608  {
6609  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6610  _handle_annotations_before_start_mapblck(startline);
6611  addrem_flags(RNXT, RVAL);
6612  _handle_colon();
6613  m_evt_handler->begin_map_val_block();
6614  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6615  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6616  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6617  _maybe_skip_whitespace_tokens();
6618  // keep the child state on RVAL
6619  addrem_flags(RVAL, RNXT);
6620  }
6621  else
6622  {
6623  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6624  m_evt_handler->set_val_scalar_plain_empty();
6625  m_evt_handler->add_sibling();
6626  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6627  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6628  // keep going on RVAL
6629  _maybe_skip_whitespace_tokens();
6630  }
6631  }
6632  }
6633  // block scalars can only appear as keys when in QMRK scope
6634  // (ie, after ? tokens), so no need to scan following colon
6635  else if(first == '|')
6636  {
6637  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6638  ScannedBlock sb;
6639  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6640  _handle_annotations_before_blck_val_scalar();
6641  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6642  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6643  addrem_flags(RNXT, RVAL);
6644  }
6645  else if(first == '>')
6646  {
6647  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6648  ScannedBlock sb;
6649  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6650  _handle_annotations_before_blck_val_scalar();
6651  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6652  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6653  addrem_flags(RNXT, RVAL);
6654  }
6655  else if(_scan_scalar_plain_map_blck(&sc))
6656  {
6657  _c4dbgp("mapblck[RVAL]: plain scalar.");
6658  if(!_maybe_scan_following_colon())
6659  {
6660  _c4dbgp("mapblck[RVAL]: set as val");
6661  _handle_annotations_before_blck_val_scalar();
6662  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6663  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6664  addrem_flags(RNXT, RVAL);
6665  }
6666  else
6667  {
6668  if(startindent != m_evt_handler->m_curr->indref)
6669  {
6670  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6671  addrem_flags(RNXT, RVAL);
6672  _handle_annotations_before_start_mapblck(startline);
6673  _handle_colon();
6674  m_evt_handler->begin_map_val_block();
6675  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6676  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6677  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6678  _maybe_skip_whitespace_tokens();
6679  // keep the child state on RVAL
6680  addrem_flags(RVAL, RNXT);
6681  }
6682  else
6683  {
6684  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6685  _handle_annotations_before_blck_val_scalar();
6686  m_evt_handler->set_val_scalar_plain_empty();
6687  m_evt_handler->add_sibling();
6688  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6689  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6690  // keep going on RVAL
6691  _maybe_skip_whitespace_tokens();
6692  }
6693  }
6694  }
6695  else if(first == '-')
6696  {
6697  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6698  {
6699  _c4dbgp("mapblck[RVAL]: start val seqblck");
6700  addrem_flags(RNXT, RVAL);
6701  _handle_annotations_before_blck_val_scalar();
6702  m_evt_handler->begin_seq_val_block();
6703  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6704  _set_indentation(startindent);
6705  _line_progressed(1);
6706  _maybe_skip_whitespace_tokens();
6707  goto mapblck_finish;
6708  }
6709  else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6710  {
6711  _c4dbgp("mapblck[RVAL]: end+start doc");
6712  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6713  _start_doc_suddenly();
6714  _line_progressed(3);
6715  _maybe_skip_whitespace_tokens();
6716  goto mapblck_finish;
6717  }
6718  else
6719  {
6720  _c4err("parse error");
6721  }
6722  }
6723  else if(first == '[')
6724  {
6725  _c4dbgp("mapblck[RVAL]: start val seqflow");
6726  addrem_flags(RNXT, RVAL);
6727  _handle_annotations_before_blck_val_scalar();
6728  m_evt_handler->begin_seq_val_flow();
6729  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
6730  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6731  _line_progressed(1);
6732  goto mapblck_finish;
6733  }
6734  else if(first == '{')
6735  {
6736  _c4dbgp("mapblck[RVAL]: start val mapflow");
6737  addrem_flags(RNXT, RVAL);
6738  _handle_annotations_before_blck_val_scalar();
6739  m_evt_handler->begin_map_val_flow();
6740  addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
6741  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6742  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6743  _line_progressed(1);
6744  goto mapblck_finish;
6745  }
6746  else if(first == '*')
6747  {
6748  csubstr ref = _scan_ref_map();
6749  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6750  if(startindent == m_evt_handler->m_curr->indref)
6751  {
6752  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6753  m_evt_handler->set_val_ref(ref);
6754  addrem_flags(RNXT, RVAL);
6755  }
6756  else
6757  {
6758  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6759  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6760  if(_maybe_scan_following_colon())
6761  {
6762  _c4dbgp("mapblck[RVAL]: start child map, block");
6763  addrem_flags(RNXT, RVAL);
6764  _handle_annotations_before_blck_val_scalar();
6765  m_evt_handler->begin_map_val_block();
6766  m_evt_handler->set_key_ref(ref);
6767  _set_indentation(startindent);
6768  // keep going in RVAL
6769  addrem_flags(RVAL, RNXT);
6770  }
6771  else
6772  {
6773  _c4dbgp("mapblck[RVAL]: was val ref");
6774  _handle_annotations_before_blck_val_scalar();
6775  m_evt_handler->set_val_ref(ref);
6776  addrem_flags(RNXT, RVAL);
6777  }
6778  }
6779  _maybe_skip_whitespace_tokens();
6780  }
6781  else if(first == '&')
6782  {
6783  csubstr anchor = _scan_anchor();
6784  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6785  if(startindent == m_evt_handler->m_curr->indref)
6786  {
6787  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6788  m_evt_handler->set_val_scalar_plain_empty();
6789  m_evt_handler->add_sibling();
6790  addrem_flags(RKEY, RVAL);
6791  }
6792  // we need to buffer the anchors, as there may be two
6793  // consecutive anchors in here
6794  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6795  }
6796  else if(first == '!')
6797  {
6798  csubstr tag = _scan_tag();
6799  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6800  if(startindent == m_evt_handler->m_curr->indref)
6801  {
6802  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6803  _handle_annotations_before_blck_val_scalar();
6804  m_evt_handler->set_val_scalar_plain_empty();
6805  m_evt_handler->add_sibling();
6806  addrem_flags(RKEY, RVAL);
6807  }
6808  // we need to buffer the tags, as there may be two
6809  // consecutive tags in here
6810  _add_annotation(&m_pending_tags, tag, startindent, startline);
6811  }
6812  else if(first == '?')
6813  {
6814  if(startindent == m_evt_handler->m_curr->indref)
6815  {
6816  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6817  _handle_annotations_before_blck_val_scalar();
6818  m_evt_handler->set_val_scalar_plain_empty();
6819  m_evt_handler->add_sibling();
6820  addrem_flags(QMRK, RVAL);
6821  }
6822  else if(startindent > m_evt_handler->m_curr->indref)
6823  {
6824  _c4dbgp("mapblck[RVAL]: start val mapblck");
6825  addrem_flags(RNXT, RVAL);
6826  _handle_annotations_before_blck_val_scalar();
6827  m_evt_handler->begin_map_val_block();
6828  addrem_flags(QMRK|RBLCK, RNXT);
6829  _set_indentation(startindent);
6830  }
6831  else
6832  {
6833  _c4err("parse error");
6834  }
6835  m_was_inside_qmrk = true;
6836  _line_progressed(1);
6837  _maybe_skip_whitespace_tokens();
6838  goto mapblck_again;
6839  }
6840  else if(first == ':')
6841  {
6842  if(startindent == m_evt_handler->m_curr->indref)
6843  {
6844  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6845  m_evt_handler->set_val_scalar_plain_empty();
6846  m_evt_handler->add_sibling();
6847  m_evt_handler->set_key_scalar_plain_empty();
6848  }
6849  else if(startindent > m_evt_handler->m_curr->indref)
6850  {
6851  _c4dbgp("mapblck[RVAL]: start val mapblck");
6852  addrem_flags(RNXT, RVAL);
6853  _handle_annotations_before_start_mapblck(startline);
6854  _handle_colon();
6855  m_evt_handler->begin_map_val_block();
6856  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6857  m_evt_handler->set_key_scalar_plain_empty();
6858  // keep the child state on RVAL
6859  addrem_flags(RVAL, RNXT);
6860  }
6861  else
6862  {
6863  _c4err("parse error");
6864  }
6865  _line_progressed(1);
6866  _maybe_skip_whitespace_tokens();
6867  goto mapblck_again;
6868  }
6869  else if(first == '.')
6870  {
6871  _c4dbgp("mapblck[RVAL]: maybe doc?");
6872  csubstr rs = rem.sub(1);
6873  if(rs == ".." || rs.begins_with(".. "))
6874  {
6875  _c4dbgp("seqblck[RVAL]: end doc expl");
6876  _end_doc_suddenly();
6877  _line_progressed(3);
6878  _maybe_skip_whitespace_tokens();
6879  goto mapblck_finish;
6880  }
6881  else
6882  {
6883  _c4err("parse error");
6884  }
6885  }
6887  else if(first == '\t')
6888  {
6889  _c4dbgp("mapblck[RVAL]: skip tabs");
6890  _maybe_skipchars('\t');
6891  })
6892  else
6893  {
6894  _c4err("parse error");
6895  }
6896  }
6897  else if(has_any(RNXT))
6898  {
6899  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6900  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6901  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6902  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6903  //
6904  // handle indentation
6905  //
6906  if(m_evt_handler->m_curr->at_line_beginning())
6907  {
6908  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6909  if(m_evt_handler->m_curr->indentation_eq())
6910  {
6911  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6912  _line_progressed(m_evt_handler->m_curr->indref);
6913  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6914  m_evt_handler->add_sibling();
6915  addrem_flags(RKEY, RNXT);
6916  goto mapblck_again;
6917  }
6918  else if(m_evt_handler->m_curr->indentation_lt())
6919  {
6920  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6921  _handle_indentation_pop_from_block_map();
6922  if(has_all(RMAP|RBLCK))
6923  {
6924  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6925  if(!has_any(RKCL))
6926  {
6927  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6928  m_evt_handler->add_sibling();
6929  addrem_flags(RKEY, RNXT);
6930  }
6931  goto mapblck_again;
6932  }
6933  else
6934  {
6935  goto mapblck_finish;
6936  }
6937  }
6938  }
6939  else
6940  {
6941  _c4dbgp("mapblck[RNXT]: NOT at line begin");
6942  if(!rem.begins_with_any(" \t"))
6943  {
6944  _c4err("parse error");
6945  }
6946  else
6947  {
6948  _skipchars(" \t");
6949  rem = m_evt_handler->m_curr->line_contents.rem;
6950  if(!rem.len)
6951  {
6952  _c4dbgp("seqblck[RNXT]: again");
6953  goto mapblck_again;
6954  }
6955  }
6956  }
6957  //
6958  // handle tokens
6959  //
6960  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6961  const char first = rem.str[0];
6962  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6963  if(first == ':')
6964  {
6965  if(m_evt_handler->m_curr->more_indented)
6966  {
6967  _c4dbgp("mapblck[RNXT]: start child block map");
6968  C4_NOT_IMPLEMENTED();
6969  //m_evt_handler->actually_as_block_map();
6970  _line_progressed(1);
6971  _set_indentation(m_evt_handler->m_curr->scalar_col);
6972  m_evt_handler->m_curr->more_indented = false;
6973  goto mapblck_again;
6974  }
6975  else
6976  {
6977  _c4err("parse error");
6978  }
6979  }
6980  else if(first == ' ')
6981  {
6982  _c4dbgp("mapblck[RNXT]: skip spaces");
6983  _maybe_skip_whitespace_tokens();
6984  }
6985  else
6986  {
6987  _c4err("parse error");
6988  }
6989  }
6990  else if(has_any(QMRK))
6991  {
6992  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6993  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6994  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6995  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6996  //
6997  // handle indentation
6998  //
6999  if(m_evt_handler->m_curr->at_line_beginning())
7000  {
7001  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
7002  if(m_evt_handler->m_curr->indentation_eq())
7003  {
7004  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
7005  _line_progressed(m_evt_handler->m_curr->indref);
7006  rem = m_evt_handler->m_curr->line_contents.rem;
7007  if(!rem.len)
7008  goto mapblck_again;
7009  }
7010  else if(m_evt_handler->m_curr->indentation_lt())
7011  {
7012  _c4dbgp("mapblck[QMRK]: smaller indentation!");
7013  _handle_indentation_pop_from_block_map();
7014  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7015  if(has_all(RMAP|RBLCK))
7016  {
7017  _c4dbgp("mapblck[QMRK]: still mapblck!");
7018  goto mapblck_again;
7019  }
7020  else
7021  {
7022  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7023  goto mapblck_finish;
7024  }
7025  }
7026  // indentation can be larger in QMRK state
7027  else
7028  {
7029  _c4dbgp("mapblck[QMRK]: larger indentation !");
7030  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7031  rem = m_evt_handler->m_curr->line_contents.rem;
7032  if(!rem.len)
7033  goto mapblck_again;
7034  }
7035  }
7036  //
7037  // now handle the tokens
7038  //
7039  const char first = rem.str[0];
7040  const size_t startline = m_evt_handler->m_curr->pos.line;
7041  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7042  _c4dbgpf("mapblck[QMRK]: '{}'", first);
7043  ScannedScalar sc;
7044  if(first == '\'')
7045  {
7046  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7047  sc = _scan_scalar_squot();
7048  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7049  if(!_maybe_scan_following_colon())
7050  {
7051  _c4dbgp("mapblck[QMRK]: set as key");
7052  _handle_annotations_before_blck_key_scalar();
7053  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7054  addrem_flags(RKCL, QMRK);
7055  }
7056  else
7057  {
7058  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7059  addrem_flags(RKCL, QMRK);
7060  _handle_annotations_before_start_mapblck_as_key();
7061  m_evt_handler->begin_map_key_block();
7062  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7063  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7064  _maybe_skip_whitespace_tokens();
7065  _set_indentation(startindent);
7066  // keep the child state on RVAL
7067  addrem_flags(RVAL, RKCL|QMRK);
7068  }
7069  }
7070  else if(first == '"')
7071  {
7072  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7073  sc = _scan_scalar_dquot();
7074  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7075  if(!_maybe_scan_following_colon())
7076  {
7077  _c4dbgp("mapblck[QMRK]: set as key");
7078  _handle_annotations_before_blck_key_scalar();
7079  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7080  addrem_flags(RKCL, QMRK);
7081  }
7082  else
7083  {
7084  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7085  addrem_flags(RKCL, QMRK);
7086  _handle_annotations_before_start_mapblck_as_key();
7087  m_evt_handler->begin_map_key_block();
7088  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7089  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7090  _maybe_skip_whitespace_tokens();
7091  _set_indentation(startindent);
7092  // keep the child state on RVAL
7093  addrem_flags(RVAL, RKCL|QMRK);
7094  }
7095  }
7096  else if(first == '|')
7097  {
7098  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7099  ScannedBlock sb;
7100  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7101  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7102  _handle_annotations_before_blck_key_scalar();
7103  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7104  addrem_flags(RKCL, QMRK);
7105  }
7106  else if(first == '>')
7107  {
7108  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7109  ScannedBlock sb;
7110  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7111  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7112  _handle_annotations_before_blck_key_scalar();
7113  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7114  addrem_flags(RKCL, QMRK);
7115  }
7116  else if(_scan_scalar_plain_map_blck(&sc))
7117  {
7118  _c4dbgp("mapblck[QMRK]: plain scalar");
7119  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7120  if(!_maybe_scan_following_colon())
7121  {
7122  _c4dbgp("mapblck[QMRK]: set as key");
7123  _handle_annotations_before_blck_key_scalar();
7124  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7125  addrem_flags(RKCL, QMRK);
7126  }
7127  else
7128  {
7129  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7130  addrem_flags(RKCL, QMRK);
7131  _handle_annotations_before_start_mapblck_as_key();
7132  m_evt_handler->begin_map_key_block();
7133  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7134  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7135  _maybe_skip_whitespace_tokens();
7136  _set_indentation(startindent);
7137  // keep the child state on RVAL
7138  addrem_flags(RVAL, RKCL|QMRK);
7139  }
7140  }
7141  else if(first == ':')
7142  {
7143  if(startindent == m_evt_handler->m_curr->indref)
7144  {
7145  _c4dbgp("mapblck[QMRK]: empty key");
7146  addrem_flags(RVAL, QMRK);
7147  _handle_annotations_before_blck_key_scalar();
7148  m_evt_handler->set_key_scalar_plain_empty();
7149  _line_progressed(1);
7150  _maybe_skip_whitespace_tokens();
7151  }
7152  else
7153  {
7154  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7155  addrem_flags(RKCL, QMRK);
7156  _handle_annotations_before_start_mapblck_as_key();
7157  m_evt_handler->begin_map_key_block();
7158  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7159  m_evt_handler->set_key_scalar_plain_empty();
7160  _line_progressed(1);
7161  _maybe_skip_whitespace_tokens();
7162  _set_indentation(startindent);
7163  // keep the child state on RVAL
7164  addrem_flags(RVAL, RKCL|QMRK);
7165  }
7166  }
7167  else if(first == '*')
7168  {
7169  csubstr ref = _scan_ref_map();
7170  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7171  if(!_maybe_scan_following_colon())
7172  {
7173  _c4dbgp("mapblck[QMRK]: set ref as key");
7174  _handle_annotations_before_blck_key_scalar();
7175  m_evt_handler->set_key_ref(ref);
7176  addrem_flags(RKCL, QMRK);
7177  }
7178  else
7179  {
7180  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7181  addrem_flags(RKCL, QMRK);
7182  _handle_annotations_before_blck_key_scalar();
7183  m_evt_handler->begin_map_key_block();
7184  m_evt_handler->set_key_ref(ref);
7185  _set_indentation(startindent);
7186  // keep the child state on RVAL
7187  addrem_flags(RVAL, RKCL|QMRK);
7188  }
7189  _maybe_skip_whitespace_tokens();
7190  }
7191  else if(first == '&')
7192  {
7193  csubstr anchor = _scan_anchor();
7194  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7195  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7196  }
7197  else if(first == '!')
7198  {
7199  csubstr tag = _scan_tag();
7200  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7201  _add_annotation(&m_pending_tags, tag, startindent, startline);
7202  }
7203  else if(first == '-')
7204  {
7205  _c4dbgp("mapblck[QMRK]: maybe doc?");
7206  csubstr rs = rem.sub(1);
7207  if(rs == "--" || rs.begins_with("-- "))
7208  {
7209  _c4dbgp("mapblck[QMRK]: end+start doc");
7210  _start_doc_suddenly();
7211  _line_progressed(3);
7212  }
7213  else
7214  {
7215  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7216  addrem_flags(RKCL, RKEY|QMRK);
7217  _handle_annotations_before_blck_key_scalar();
7218  m_evt_handler->begin_seq_key_block();
7219  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
7220  _set_indentation(startindent);
7221  _line_progressed(1);
7222  }
7223  _maybe_skip_whitespace_tokens();
7224  goto mapblck_finish;
7225  }
7226  else if(first == '[')
7227  {
7228  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7229  addrem_flags(RKCL, RKEY|QMRK);
7230  m_evt_handler->begin_seq_key_flow();
7231  addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|QMRK|RBLCK);
7232  _set_indentation(startindent);
7233  _line_progressed(1);
7234  goto mapblck_finish;
7235  }
7236  else if(first == '{')
7237  {
7238  _c4dbgp("mapblck[QMRK]: start child mapflow (!)");
7239  addrem_flags(RKCL, RKEY|QMRK);
7240  m_evt_handler->begin_map_key_flow();
7241  addrem_flags(RKEY|RFLOW, RVAL|RKCL|QMRK|RBLCK);
7242  _set_indentation(startindent);
7243  _line_progressed(1);
7244  goto mapblck_finish;
7245  }
7246  else if(first == '?')
7247  {
7248  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7249  if(m_evt_handler->m_curr->indentation_eq())
7250  {
7251  _c4dbgp("mapblck[QMRK]: ? indent eq - prev ? was for an empty keyval");
7252  m_evt_handler->set_key_scalar_plain_empty();
7253  m_evt_handler->set_val_scalar_plain_empty();
7254  m_evt_handler->add_sibling();
7255  }
7256  else
7257  {
7258  _RYML_ASSERT_BASIC_(callbacks(), m_evt_handler->m_curr->indentation_gt());
7259  _c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7260  addrem_flags(RKCL, RKEY|QMRK);
7261  m_evt_handler->begin_map_key_block();
7262  addrem_flags(RBLCK|QMRK, RVAL|RKCL);
7263  _set_indentation(startindent);
7264  }
7265  // indentation_lt() should be handled elsewhere
7266  _line_progressed(1);
7267  _maybe_skip_whitespace_tokens();
7268  }
7269  else if(first == '.')
7270  {
7271  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7272  csubstr rs = rem.sub(1);
7273  if(rs == ".." || rs.begins_with(".. "))
7274  {
7275  _c4dbgp("mapblck[QMRK]: end+start doc");
7276  _end_doc_suddenly();
7277  _line_progressed(3);
7278  goto mapblck_finish;
7279  }
7280  else
7281  {
7282  _c4err("parse error");
7283  }
7284  }
7285  else
7286  {
7287  _c4err("parse error");
7288  }
7289  }
7290 
7291  mapblck_again:
7292  _c4dbgt("mapblck: again", 0);
7293  if(_finished_line())
7294  {
7295  _line_ended();
7296  _scan_line();
7297  if(_finished_file())
7298  {
7299  _c4dbgp("mapblck: file finished!");
7300  _end_map_blck();
7301  goto mapblck_finish;
7302  }
7303  _c4dbgnextline();
7304  }
7305  goto mapblck_start;
7306 
7307  mapblck_finish:
7308  _c4dbgp("mapblck: finish");
7309 }
7310 
7311 
7312 //-----------------------------------------------------------------------------
7313 
7314 template<class EventHandler>
7315 void ParseEngine<EventHandler>::_handle_unk_json()
7316 {
7317  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7318 
7319  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7320  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7321 
7322  _maybe_skip_comment();
7323  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7324  if(!rem.len)
7325  return;
7326 
7327  size_t pos = rem.first_not_of(" \t");
7328  if(pos)
7329  {
7330  pos = pos != npos ? pos : rem.len;
7331  _c4dbgpf("skipping indentation of {}", pos);
7332  _line_progressed(pos);
7333  rem = m_evt_handler->m_curr->line_contents.rem;
7334  if(!rem.len)
7335  return;
7336  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7337  }
7338 
7339  if(rem.begins_with('['))
7340  {
7341  _c4dbgp("it's a seq");
7342  m_evt_handler->check_trailing_doc_token();
7343  _maybe_begin_doc();
7344  m_evt_handler->begin_seq_val_flow();
7345  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7346  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7347  m_doc_empty = false;
7348  _line_progressed(1);
7349  }
7350  else if(rem.begins_with('{'))
7351  {
7352  _c4dbgp("it's a map");
7353  m_evt_handler->check_trailing_doc_token();
7354  _maybe_begin_doc();
7355  m_evt_handler->begin_map_val_flow();
7356  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7357  m_doc_empty = false;
7358  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7359  _line_progressed(1);
7360  }
7361  else if(_handle_bom())
7362  {
7363  _c4dbgp("byte order mark");
7364  }
7365  else
7366  {
7367  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7368  _maybe_skip_whitespace_tokens();
7369  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7370  if(!s.len)
7371  return;
7372  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7373  const char first = s.str[0];
7374  ScannedScalar sc;
7375  if(first == '"')
7376  {
7377  _c4dbgp("runk_json: scanning double-quoted scalar");
7378  m_evt_handler->check_trailing_doc_token();
7379  _maybe_begin_doc();
7380  add_flags(RDOC);
7381  m_doc_empty = false;
7382  sc = _scan_scalar_dquot();
7383  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7384  if(!_maybe_scan_following_colon())
7385  {
7386  _c4dbgp("runk_json: set as val");
7387  _handle_annotations_before_blck_val_scalar();
7388  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7389  }
7390  else
7391  {
7392  _c4err("parse error");
7393  }
7394  }
7395  else if(_scan_scalar_plain_unk(&sc))
7396  {
7397  _c4dbgp("runk_json: got a plain scalar");
7398  m_evt_handler->check_trailing_doc_token();
7399  _maybe_begin_doc();
7400  add_flags(RDOC);
7401  m_doc_empty = false;
7402  if(!_maybe_scan_following_colon())
7403  {
7404  _c4dbgp("runk_json: set as val");
7405  _handle_annotations_before_blck_val_scalar();
7406  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7407  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7408  }
7409  else
7410  {
7411  _c4err("parse error");
7412  }
7413  }
7414  else
7415  {
7416  _c4err("parse error");
7417  }
7418  }
7419 }
7420 
7421 
7422 //-----------------------------------------------------------------------------
7423 
7424 template<class EventHandler>
7425 void ParseEngine<EventHandler>::_handle_unk()
7426 {
7427  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7428 
7429  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7430  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7431 
7432  _maybe_skip_comment();
7433  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7434  if(!rem.len)
7435  return;
7436 
7437  size_t pos = rem.first_not_of(" \t");
7438  if(pos)
7439  {
7440  pos = pos != npos ? pos : rem.len;
7441  _c4dbgpf("skipping {} whitespace characters", pos);
7442  _line_progressed(pos);
7443  rem = m_evt_handler->m_curr->line_contents.rem;
7444  if(!rem.len)
7445  return;
7446  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7447  }
7448 
7449  if(m_evt_handler->m_curr->line_contents.indentation == 0u && (_at_line_begin() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7450  {
7451  _c4dbgpf("rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7452  _c4dbgp("check BOM");
7453  if(_handle_bom())
7454  {
7455  m_bom_line = m_evt_handler->m_curr->pos.line;
7456  _c4dbgpf("byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7457  return;
7458  }
7459  const char first = rem.str[0];
7460  if(first == '-')
7461  {
7462  _c4dbgp("rtop: suspecting doc");
7463  if(_is_doc_begin_token(rem))
7464  {
7465  _c4dbgp("rtop: begin doc");
7466  _maybe_end_doc();
7467  _begin2_doc_expl();
7468  _set_indentation(0);
7469  addrem_flags(RDOC|RUNK, NDOC);
7470  _line_progressed(3u);
7471  _maybe_skip_whitespace_tokens();
7472  return;
7473  }
7474  }
7475  else if(first == '.')
7476  {
7477  _c4dbgp("rtop: suspecting doc end");
7478  if(_is_doc_end_token(rem))
7479  {
7480  _c4dbgp("rtop: end doc");
7481  if(has_any(RDOC))
7482  {
7483  _end2_doc_expl();
7484  }
7485  else
7486  {
7487  _c4dbgp("rtop: ignore end doc");
7488  }
7489  addrem_flags(NDOC|RUNK, RDOC);
7490  _line_progressed(3u);
7491  _maybe_skip_whitespace_tokens();
7492  return;
7493  }
7494  }
7495  else if(first == '%')
7496  {
7497  _c4dbgpf("directive: {}", rem);
7498  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7499  _c4err("need document footer before directives");
7500  _handle_directive(rem);
7501  return;
7502  }
7503  }
7504 
7505  /* no else-if! */
7506  char first = rem.str[0];
7507 
7508  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7509  size_t remindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7510  if(m_bom_len)
7511  {
7512  _c4dbgpf("prev BOMlen={}", m_bom_len);
7513  if(m_evt_handler->m_curr->pos.line == m_bom_line)
7514  {
7515  _c4dbgpf("BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7516  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len);
7517  remindent -= m_bom_len;
7518  }
7519  else
7520  {
7521  m_bom_len = 0;
7522  }
7523  }
7524 
7525  if(first == '[')
7526  {
7527  m_evt_handler->check_trailing_doc_token();
7528  _maybe_begin_doc();
7529  m_doc_empty = false;
7530  if(C4_LIKELY( ! _annotations_require_key_container()))
7531  {
7532  _c4dbgp("it's a seq, flow");
7533  _handle_annotations_before_blck_val_scalar();
7534  m_evt_handler->begin_seq_val_flow();
7535  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7536  _set_indentation(remindent);
7537  }
7538  else
7539  {
7540  _c4dbgp("start new block map, set flow seq as key (!)");
7541  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7542  m_evt_handler->begin_map_val_block();
7543  addrem_flags(RMAP|RBLCK|RKCL, RUNK|RTOP|RDOC);
7544  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7545  m_evt_handler->begin_seq_key_flow();
7546  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKCL);
7547  _set_indentation(remindent);
7548  }
7549  _line_progressed(1);
7550  }
7551  else if(first == '{')
7552  {
7553  m_evt_handler->check_trailing_doc_token();
7554  _maybe_begin_doc();
7555  m_doc_empty = false;
7556  if(C4_LIKELY( ! _annotations_require_key_container()))
7557  {
7558  _c4dbgp("it's a map, flow");
7559  _handle_annotations_before_blck_val_scalar();
7560  m_evt_handler->begin_map_val_flow();
7561  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7562  _set_indentation(remindent);
7563  }
7564  else
7565  {
7566  _c4dbgp("start new block map, set flow map as key (!)");
7567  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7568  m_evt_handler->begin_map_val_block();
7569  addrem_flags(RMAP|RBLCK|RKCL, RUNK|RTOP|RDOC);
7570  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7571  m_evt_handler->begin_map_key_flow();
7572  addrem_flags(RMAP|RFLOW|RKEY, RBLCK|RKCL);
7573  _set_indentation(remindent);
7574  }
7575  _line_progressed(1);
7576  }
7577  else if(first == '-' && _is_blck_token(rem))
7578  {
7579  _c4dbgp("it's a seq, block");
7580  m_evt_handler->check_trailing_doc_token();
7581  _maybe_begin_doc();
7582  _handle_annotations_before_blck_val_scalar();
7583  m_evt_handler->begin_seq_val_block();
7584  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7585  m_doc_empty = false;
7586  _set_indentation(remindent);
7587  _line_progressed(1);
7588  _maybe_skip_whitespace_tokens();
7589  }
7590  else if(first == '?' && _is_blck_token(rem))
7591  {
7592  _c4dbgp("it's a map + this key is complex");
7593  m_evt_handler->check_trailing_doc_token();
7594  _maybe_begin_doc();
7595  _handle_annotations_before_blck_val_scalar();
7596  m_evt_handler->begin_map_val_block();
7597  addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
7598  m_doc_empty = false;
7599  m_was_inside_qmrk = true;
7600  _set_indentation(remindent); //_save_indentation();
7601  _line_progressed(1);
7602  _maybe_skip_whitespace_tokens();
7603  }
7604  else if(first == ':' && _is_blck_token(rem))
7605  {
7606  if(m_doc_empty)
7607  {
7608  _c4dbgp("it's a map with an empty key");
7609  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7610  m_evt_handler->check_trailing_doc_token();
7611  _maybe_begin_doc();
7612  _handle_annotations_before_start_mapblck(startline);
7613  _handle_colon();
7614  m_evt_handler->begin_map_val_block();
7615  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7616  m_evt_handler->set_key_scalar_plain_empty();
7617  m_doc_empty = false;
7618  _set_indentation(startindent);
7619  }
7620  else
7621  {
7622  _c4dbgp("actually prev val is a key!");
7623  size_t prev_indentation = m_evt_handler->m_curr->indref;
7624  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7625  _set_indentation(prev_indentation);
7626  }
7627  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7628  _line_progressed(1);
7629  _maybe_skip_whitespace_tokens();
7630  }
7631  else if(first == '&')
7632  {
7633  csubstr anchor = _scan_anchor();
7634  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7635  m_evt_handler->check_trailing_doc_token();
7636  _maybe_begin_doc();
7637  const size_t line = m_evt_handler->m_curr->pos.line;
7638  _add_annotation(&m_pending_anchors, anchor, remindent, line);
7639  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7640  m_doc_empty = false;
7641  }
7642  else if(first == '*')
7643  {
7644  csubstr ref = _scan_ref_map();
7645  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7646  m_evt_handler->check_trailing_doc_token();
7647  _maybe_begin_doc();
7648  m_doc_empty = false;
7649  if(!_maybe_scan_following_colon())
7650  {
7651  _c4dbgp("runk: set val ref");
7652  _handle_annotations_before_blck_val_scalar();
7653  m_evt_handler->set_val_ref(ref);
7654  }
7655  else
7656  {
7657  _c4dbgp("runk: start new block map, set ref as key");
7658  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7659  _handle_annotations_before_start_mapblck(startline);
7660  m_evt_handler->begin_map_val_block();
7661  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7662  m_evt_handler->set_key_ref(ref);
7663  _maybe_skip_whitespace_tokens();
7664  _set_indentation(startindent);
7665  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7666  }
7667  }
7668  else if(first == '!')
7669  {
7670  csubstr tag = _scan_tag();
7671  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7672  // we need to buffer the tags, as there may be two
7673  // consecutive tags in here
7674  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7675  const size_t line = m_evt_handler->m_curr->pos.line;
7676  _add_annotation(&m_pending_tags, tag, indentation, line);
7677  }
7678  else
7679  {
7680  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7681  _maybe_skip_whitespace_tokens();
7682  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7683  if(!s.len)
7684  return;
7685  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7686  first = s.str[0];
7687  ScannedScalar sc;
7688  if(first == '\'')
7689  {
7690  _c4dbgp("runk: scanning single-quoted scalar");
7691  m_evt_handler->check_trailing_doc_token();
7692  _maybe_begin_doc();
7693  add_flags(RDOC);
7694  m_doc_empty = false;
7695  sc = _scan_scalar_squot();
7696  if(!_maybe_scan_following_colon())
7697  {
7698  _c4dbgp("runk: set as val");
7699  _handle_annotations_before_blck_val_scalar();
7700  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7701  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7702  }
7703  else
7704  {
7705  _c4dbgp("runk: start new block map, set scalar as key");
7706  _handle_annotations_before_start_mapblck(startline);
7707  _handle_colon();
7708  m_evt_handler->begin_map_val_block();
7709  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7710  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7711  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7712  _maybe_skip_whitespace_tokens();
7713  _set_indentation(startindent);
7714  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7715  }
7716  }
7717  else if(first == '"')
7718  {
7719  _c4dbgp("runk: scanning double-quoted scalar");
7720  m_evt_handler->check_trailing_doc_token();
7721  _maybe_begin_doc();
7722  add_flags(RDOC);
7723  m_doc_empty = false;
7724  sc = _scan_scalar_dquot();
7725  if(!_maybe_scan_following_colon())
7726  {
7727  _c4dbgp("runk: set as val");
7728  _handle_annotations_before_blck_val_scalar();
7729  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7730  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7731  }
7732  else
7733  {
7734  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7735  _handle_annotations_before_start_mapblck(startline);
7736  m_evt_handler->begin_map_val_block();
7737  _handle_colon();
7738  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7739  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7740  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7741  _maybe_skip_whitespace_tokens();
7742  _set_indentation(startindent);
7743  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7744  }
7745  }
7746  else if(first == '|')
7747  {
7748  _c4dbgp("runk: scanning block-literal scalar");
7749  m_evt_handler->check_trailing_doc_token();
7750  _maybe_begin_doc();
7751  add_flags(RDOC);
7752  m_doc_empty = false;
7753  ScannedBlock sb;
7754  _scan_block(&sb, startindent);
7755  if(C4_LIKELY(!_maybe_scan_following_colon()))
7756  {
7757  _c4dbgp("runk: set as val");
7758  _handle_annotations_before_blck_val_scalar();
7759  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7760  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7761  }
7762  else
7763  {
7764  _c4err("block literal keys must be enclosed in '?'");
7765  }
7766  }
7767  else if(first == '>')
7768  {
7769  _c4dbgp("runk: scanning block-folded scalar");
7770  m_evt_handler->check_trailing_doc_token();
7771  _maybe_begin_doc();
7772  add_flags(RDOC);
7773  m_doc_empty = false;
7774  ScannedBlock sb;
7775  _scan_block(&sb, startindent);
7776  if(C4_LIKELY(!_maybe_scan_following_colon()))
7777  {
7778  _c4dbgp("runk: set as val");
7779  _handle_annotations_before_blck_val_scalar();
7780  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7781  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7782  }
7783  else
7784  {
7785  _c4err("block folded keys must be enclosed in '?'");
7786  }
7787  }
7788  else if(_scan_scalar_plain_unk(&sc))
7789  {
7790  _c4dbgp("runk: got a plain scalar");
7791  m_evt_handler->check_trailing_doc_token();
7792  _maybe_begin_doc();
7793  add_flags(RDOC);
7794  m_doc_empty = false;
7795  if(!_maybe_scan_following_colon())
7796  {
7797  _c4dbgp("runk: set as val");
7798  _handle_annotations_before_blck_val_scalar();
7799  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7800  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7801  }
7802  else
7803  {
7804  _c4dbgp("runk: start new block map, set scalar as key");
7805  _handle_annotations_before_start_mapblck(startline);
7806  _handle_colon();
7807  m_evt_handler->begin_map_val_block();
7808  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7809  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7810  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7811  _maybe_skip_whitespace_tokens();
7812  _set_indentation(startindent);
7813  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7814  }
7815  }
7816  }
7817 }
7818 
7819 
7820 //-----------------------------------------------------------------------------
7821 
7822 template<class EventHandler>
7823 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7824 {
7825  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7826 
7827  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW));
7828 
7829  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7830  if(has_any(RNXT))
7831  {
7832  _c4dbgp("usty[RNXT]: finishing!");
7833  _end_stream();
7834  }
7835  #endif
7836 
7837  _maybe_skip_comment();
7838  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7839  if(!rem.len)
7840  return;
7841 
7842  size_t pos = rem.first_not_of(" \t");
7843  if(pos)
7844  {
7845  pos = pos != npos ? pos : rem.len;
7846  _c4dbgpf("skipping indentation of {}", pos);
7847  _line_progressed(pos);
7848  rem = m_evt_handler->m_curr->line_contents.rem;
7849  if(!rem.len)
7850  return;
7851  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7852  }
7853 
7854  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7855  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7856  char first = rem.str[0];
7857  if(has_any(RSEQ)) // destination is a sequence
7858  {
7859  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7860  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7861  if(first == '[')
7862  {
7863  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7864  add_flags(RNXT);
7865  m_evt_handler->_push();
7866  addrem_flags(RFLOW|RVAL, RNXT|USTY);
7867  _set_indentation(startindent);
7868  _line_progressed(1);
7869  _maybe_skip_whitespace_tokens();
7870  }
7871  else if(first == '-' && _is_blck_token(rem))
7872  {
7873  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7874  add_flags(RNXT);
7875  m_evt_handler->_push();
7876  addrem_flags(RBLCK|RVAL, RNXT|USTY);
7877  _set_indentation(startindent);
7878  _line_progressed(1);
7879  _maybe_skip_whitespace_tokens();
7880  }
7881  else
7882  {
7883  _c4err("can only parse a seq into an existing seq");
7884  }
7885  }
7886  else if(has_any(RMAP)) // destination is a map
7887  {
7888  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7889  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7890  if(first == '{')
7891  {
7892  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7893  add_flags(RNXT);
7894  _handle_annotations_before_blck_val_scalar();
7895  m_evt_handler->_push();
7896  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
7897  _set_indentation(startindent);
7898  _line_progressed(1);
7899  _maybe_skip_whitespace_tokens();
7900  }
7901  else if(first == '?' && _is_blck_token(rem))
7902  {
7903  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7904  add_flags(RNXT);
7905  _handle_annotations_before_blck_val_scalar();
7906  m_evt_handler->_push();
7907  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
7908  m_was_inside_qmrk = true;
7909  _save_indentation();
7910  _line_progressed(1);
7911  _maybe_skip_whitespace_tokens();
7912  }
7913  else if(first == ':' && _is_blck_token(rem))
7914  {
7915  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7916  add_flags(RNXT);
7917  _handle_annotations_before_blck_val_scalar();
7918  m_evt_handler->_push();
7919  m_evt_handler->set_key_scalar_plain_empty();
7920  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
7921  _save_indentation();
7922  _line_progressed(1);
7923  _maybe_skip_whitespace_tokens();
7924  }
7925  else if(rem.begins_with('&'))
7926  {
7927  csubstr anchor = _scan_anchor();
7928  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7929  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7930  const size_t line = m_evt_handler->m_curr->pos.line;
7931  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7932  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7933  }
7934  else if(first == '*')
7935  {
7936  csubstr ref = _scan_ref_map();
7937  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7938  if(!_maybe_scan_following_colon())
7939  {
7940  _c4err("cannot read a VAL to a map");
7941  }
7942  else
7943  {
7944  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7945  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7946  add_flags(RNXT);
7947  _handle_annotations_before_start_mapblck(startline);
7948  m_evt_handler->_push();
7949  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7950  m_evt_handler->set_key_ref(ref);
7951  _maybe_skip_whitespace_tokens();
7952  _set_indentation(startindent);
7953  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
7954  }
7955  }
7956  else if(first == '!')
7957  {
7958  csubstr tag = _scan_tag();
7959  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7960  // we need to buffer the tags, as there may be two
7961  // consecutive tags in here
7962  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7963  const size_t line = m_evt_handler->m_curr->pos.line;
7964  _add_annotation(&m_pending_tags, tag, indentation, line);
7965  }
7966  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7967  {
7968  _c4err("cannot parse a seq into an existing map");
7969  }
7970  else
7971  {
7972  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7973  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7974  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7975  ScannedScalar sc;
7976  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7977  if(first == '\'')
7978  {
7979  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7980  sc = _scan_scalar_squot();
7981  if(!_maybe_scan_following_colon())
7982  {
7983  _c4err("cannot read a VAL to a map");
7984  }
7985  else
7986  {
7987  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7988  add_flags(RNXT);
7989  _handle_annotations_before_start_mapblck(startline);
7990  m_evt_handler->_push();
7991  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7992  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7993  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7994  _set_indentation(startindent);
7995  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
7996  _maybe_skip_whitespace_tokens();
7997  }
7998  }
7999  else if(first == '"')
8000  {
8001  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
8002  sc = _scan_scalar_dquot();
8003  if(!_maybe_scan_following_colon())
8004  {
8005  _c4err("cannot read a VAL to a map");
8006  }
8007  else
8008  {
8009  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
8010  add_flags(RNXT);
8011  _handle_annotations_before_start_mapblck(startline);
8012  m_evt_handler->_push();
8013  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8014  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8015  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8016  _set_indentation(startindent);
8017  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8018  _maybe_skip_whitespace_tokens();
8019  }
8020  }
8021  else if(first == '|')
8022  {
8023  _c4err("block literal keys must be enclosed in '?'");
8024  }
8025  else if(first == '>')
8026  {
8027  _c4err("block literal keys must be enclosed in '?'");
8028  }
8029  else if(_scan_scalar_plain_unk(&sc))
8030  {
8031  _c4dbgp("usty[RMAP]: got a plain scalar");
8032  if(!_maybe_scan_following_colon())
8033  {
8034  _c4err("cannot read a VAL to a map");
8035  }
8036  else
8037  {
8038  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8039  add_flags(RNXT);
8040  _handle_annotations_before_start_mapblck(startline);
8041  m_evt_handler->_push();
8042  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8043  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8044  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8045  _set_indentation(startindent);
8046  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8047  _maybe_skip_whitespace_tokens();
8048  }
8049  }
8050  else
8051  {
8052  _c4err("parse error");
8053  }
8054  }
8055  }
8056  else // destination is unknown
8057  {
8058  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
8059  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8060  if(first == '[')
8061  {
8062  _c4dbgp("usty[UNK]: it's a flow seq");
8063  add_flags(RNXT);
8064  _handle_annotations_before_blck_val_scalar();
8065  m_evt_handler->begin_seq_val_flow();
8066  addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8067  _set_indentation(startindent);
8068  _line_progressed(1);
8069  _maybe_skip_whitespace_tokens();
8070  }
8071  else if(first == '-' && _is_blck_token(rem))
8072  {
8073  _c4dbgp("usty[UNK]: it's a block seq");
8074  add_flags(RNXT);
8075  _handle_annotations_before_blck_val_scalar();
8076  m_evt_handler->begin_seq_val_block();
8077  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8078  _set_indentation(startindent);
8079  _line_progressed(1);
8080  _maybe_skip_whitespace_tokens();
8081  }
8082  else if(first == '{')
8083  {
8084  _c4dbgp("usty[UNK]: it's a flow map");
8085  add_flags(RNXT);
8086  _handle_annotations_before_blck_val_scalar();
8087  m_evt_handler->begin_map_val_flow();
8088  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8089  _set_indentation(startindent);
8090  _line_progressed(1);
8091  _maybe_skip_whitespace_tokens();
8092  }
8093  else if(first == '?' && _is_blck_token(rem))
8094  {
8095  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8096  add_flags(RNXT);
8097  _handle_annotations_before_blck_val_scalar();
8098  m_evt_handler->begin_map_val_block();
8099  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8100  m_was_inside_qmrk = true;
8101  _save_indentation();
8102  _line_progressed(1);
8103  _maybe_skip_whitespace_tokens();
8104  }
8105  else if(first == ':' && _is_blck_token(rem))
8106  {
8107  _c4dbgp("usty[UNK]: it's a map with an empty key");
8108  add_flags(RNXT);
8109  _handle_annotations_before_blck_val_scalar();
8110  m_evt_handler->begin_map_val_block();
8111  m_evt_handler->set_key_scalar_plain_empty();
8112  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8113  _save_indentation();
8114  _line_progressed(1);
8115  _maybe_skip_whitespace_tokens();
8116  }
8117  else if(first == '&')
8118  {
8119  csubstr anchor = _scan_anchor();
8120  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8121  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8122  const size_t line = m_evt_handler->m_curr->pos.line;
8123  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8124  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8125  }
8126  else if(first == '*')
8127  {
8128  csubstr ref = _scan_ref_map();
8129  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8130  if(!_maybe_scan_following_colon())
8131  {
8132  _c4dbgp("usty[UNK]: set val ref");
8133  _handle_annotations_before_blck_val_scalar();
8134  m_evt_handler->set_val_ref(ref);
8135  }
8136  else
8137  {
8138  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8139  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8140  add_flags(RNXT);
8141  _handle_annotations_before_start_mapblck(startline);
8142  m_evt_handler->begin_map_val_block();
8143  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8144  m_evt_handler->set_key_ref(ref);
8145  _maybe_skip_whitespace_tokens();
8146  _set_indentation(startindent);
8147  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8148  }
8149  }
8150  else if(first == '!')
8151  {
8152  csubstr tag = _scan_tag();
8153  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8154  // we need to buffer the tags, as there may be two
8155  // consecutive tags in here
8156  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8157  const size_t line = m_evt_handler->m_curr->pos.line;
8158  _add_annotation(&m_pending_tags, tag, indentation, line);
8159  }
8160  else
8161  {
8162  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
8163  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8164  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8165  first = rem.str[0];
8166  ScannedScalar sc;
8167  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8168  if(first == '\'')
8169  {
8170  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8171  sc = _scan_scalar_squot();
8172  if(!_maybe_scan_following_colon())
8173  {
8174  _c4dbgp("usty[UNK]: set as val");
8175  _handle_annotations_before_blck_val_scalar();
8176  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8177  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8178  _end_stream();
8179  }
8180  else
8181  {
8182  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8183  add_flags(RNXT);
8184  _handle_annotations_before_start_mapblck(startline);
8185  m_evt_handler->begin_map_val_block();
8186  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8187  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8188  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8189  _set_indentation(startindent);
8190  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8191  _maybe_skip_whitespace_tokens();
8192  }
8193  }
8194  else if(first == '"')
8195  {
8196  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8197  sc = _scan_scalar_dquot();
8198  if(!_maybe_scan_following_colon())
8199  {
8200  _c4dbgp("usty[UNK]: set as val");
8201  _handle_annotations_before_blck_val_scalar();
8202  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8203  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8204  _end_stream();
8205  }
8206  else
8207  {
8208  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8209  add_flags(RNXT);
8210  _handle_annotations_before_start_mapblck(startline);
8211  m_evt_handler->begin_map_val_block();
8212  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8213  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8214  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8215  _set_indentation(startindent);
8216  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8217  _maybe_skip_whitespace_tokens();
8218  }
8219  }
8220  else if(first == '|')
8221  {
8222  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8223  ScannedBlock sb;
8224  _scan_block(&sb, startindent);
8225  _c4dbgp("usty[UNK]: set as val");
8226  _handle_annotations_before_blck_val_scalar();
8227  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8228  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8229  _end_stream();
8230  }
8231  else if(first == '>')
8232  {
8233  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8234  ScannedBlock sb;
8235  _scan_block(&sb, startindent);
8236  _c4dbgp("usty[UNK]: set as val");
8237  _handle_annotations_before_blck_val_scalar();
8238  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8239  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8240  _end_stream();
8241  }
8242  else if(_scan_scalar_plain_unk(&sc))
8243  {
8244  _c4dbgp("usty[UNK]: got a plain scalar");
8245  if(!_maybe_scan_following_colon())
8246  {
8247  _c4dbgp("usty[UNK]: set as val");
8248  _handle_annotations_before_blck_val_scalar();
8249  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8250  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8251  _end_stream();
8252  }
8253  else
8254  {
8255  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8256  add_flags(RNXT);
8257  _handle_annotations_before_start_mapblck(startline);
8258  m_evt_handler->begin_map_val_block();
8259  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8260  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8261  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8262  _set_indentation(startindent);
8263  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8264  _maybe_skip_whitespace_tokens();
8265  }
8266  }
8267  else
8268  {
8269  _c4err("parse error");
8270  }
8271  }
8272  }
8273 }
8274 
8275 
8276 //-----------------------------------------------------------------------------
8277 
8278 template<class EventHandler>
8279 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8280 {
8281  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8282  m_file = filename;
8283  m_buf = src;
8284  _reset();
8285  m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena, this);
8286  m_evt_handler->begin_stream();
8287  while( ! _finished_file())
8288  {
8289  _scan_line();
8290  while( ! _finished_line())
8291  {
8292  _c4dbgnextline();
8293  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8294  if(has_any(RSEQ))
8295  {
8296  _handle_seq_json();
8297  }
8298  else if(has_any(RMAP))
8299  {
8300  _handle_map_json();
8301  }
8302  else if(has_any(RUNK))
8303  {
8304  _handle_unk_json();
8305  }
8306  else
8307  {
8308  _c4err("internal error");
8309  }
8310  }
8311  if(_finished_file())
8312  break; // it may have finished because of multiline blocks
8313  _line_ended();
8314  }
8315  _end_stream();
8316  m_evt_handler->finish_parse();
8317 }
8318 
8319 
8320 //-----------------------------------------------------------------------------
8321 
8322 template<class EventHandler>
8323 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8324 {
8325  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8326  m_file = filename;
8327  m_buf = src;
8328  _reset();
8329  m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena, this);
8330  m_evt_handler->begin_stream();
8331  while( ! _finished_file())
8332  {
8333  _scan_line();
8334  while( ! _finished_line())
8335  {
8336  _c4dbgnextline();
8337  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8338  if(has_any(RFLOW))
8339  {
8340  if(has_none(RSEQIMAP))
8341  {
8342  if(has_any(RSEQ))
8343  {
8344  _handle_seq_flow();
8345  }
8346  else
8347  {
8348  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8349  _handle_map_flow();
8350  }
8351  }
8352  else
8353  {
8354  _handle_seq_imap();
8355  }
8356  }
8357  else if(has_any(RBLCK))
8358  {
8359  if(has_any(RSEQ))
8360  {
8361  _handle_seq_block();
8362  }
8363  else
8364  {
8365  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8366  _handle_map_block();
8367  }
8368  }
8369  else if(has_any(RUNK))
8370  {
8371  _handle_unk();
8372  }
8373  else if(has_any(USTY))
8374  {
8375  _handle_usty();
8376  }
8377  else
8378  {
8379  _c4err("internal error");
8380  }
8381  }
8382  if(_finished_file())
8383  break; // it may have finished because of multiline blocks
8384  _line_ended();
8385  }
8386  _end_stream();
8387  m_evt_handler->finish_parse();
8388 }
8389 /** @endcond */
8390 
8391 } // namespace yml
8392 } // namespace c4
8393 
8394 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
8395 
8396 #undef _c4dbgnextline
8397 
8398 #if defined(_MSC_VER)
8399 # pragma warning(pop)
8400 #elif defined(__clang__)
8401 # pragma clang diagnostic pop
8402 #elif defined(__GNUC__)
8403 # pragma GCC diagnostic pop
8404 #endif
8405 
8406 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:28
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1546
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition: common.cpp:210
enum c4::yml::BlockChomp_ BlockChomp_e
@ CHOMP_CLIP
single newline at end (default)
@ CHOMP_KEEP
all newlines from end (+)
@ CHOMP_STRIP
no newline at end (-)
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:889
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
Definition: common.hpp:258
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
@ RFLOW
reading is inside explicit flow chars: [] or {}
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with x
@ UTF16BE
UTF16, Big-Endian.
Definition: common.hpp:266
@ UTF8
UTF8.
Definition: common.hpp:264
@ UTF16LE
UTF16, Little-Endian.
Definition: common.hpp:265
@ NOBOM
No Byte Order Mark was found.
Definition: common.hpp:263
@ UTF32BE
UTF32, Big-Endian.
Definition: common.hpp:268
@ UTF32LE
UTF32, Little-Endian.
Definition: common.hpp:267
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _c4err(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
Definition: common.hpp:347
utilities for UTF and Byte Order Mark