rapidyaml  0.10.0
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 
9 #include <ctype.h>
10 
11 #include "c4/yml/detail/dbgprint.hpp"
13 #ifdef RYML_DBG
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
16 #define _c4err_(fmt, ...) do { RYML_DEBUG_BREAK(); this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, __VA_ARGS__); } while(0)
17 #define _c4err(fmt) do { RYML_DEBUG_BREAK(); this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__); } while(0)
18 #else
19 #define _c4err_(fmt, ...) this->_err("ERROR: " fmt, __VA_ARGS__)
20 #define _c4err(fmt) this->_err("ERROR: {}", fmt)
21 #endif
22 
23 
24 #if defined(RYML_WITH_TAB_TOKENS)
25 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITHOUT_TAB_TOKENS(...)
27 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
28 #else
29 #define _RYML_WITH_TAB_TOKENS(...)
30 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
31 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
32 #endif
33 
34 
35 // scaffold:
36 #define _c4dbgnextline() \
37  do { \
38  _c4dbgq("\n-----------"); \
39  _c4dbgt("handling line={}, offset={}B", \
40  m_evt_handler->m_curr->pos.line, \
41  m_evt_handler->m_curr->pos.offset); \
42  } while(0)
43 
44 
45 #if defined(_MSC_VER)
46 # pragma warning(push)
47 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
48 # pragma warning(disable: 4702/*unreachable code*/)
49 #elif defined(__clang__)
50 # pragma clang diagnostic push
51 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma clang diagnostic ignored "-Wformat-nonliteral"
53 # pragma clang diagnostic ignored "-Wold-style-cast"
54 #elif defined(__GNUC__)
55 # pragma GCC diagnostic push
56 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
57 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 # pragma GCC diagnostic ignored "-Wold-style-cast"
59 # if __GNUC__ >= 7
60 # pragma GCC diagnostic ignored "-Wduplicated-branches"
61 # endif
62 #endif
63 
64 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
65 
66 namespace c4 {
67 namespace yml {
68 
69 namespace { // NOLINT
70 
71 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
72 {
73  RYML_ASSERT(s.len > 0);
74  RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
75  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
76 }
77 
78 inline bool _is_doc_begin_token(csubstr s)
79 {
80  RYML_ASSERT(s.begins_with('-'));
81  RYML_ASSERT(!s.ends_with("\n"));
82  RYML_ASSERT(!s.ends_with("\r"));
83  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
84  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
85 }
86 
87 inline bool _is_doc_end_token(csubstr s)
88 {
89  RYML_ASSERT(s.begins_with('.'));
90  RYML_ASSERT(!s.ends_with("\n"));
91  RYML_ASSERT(!s.ends_with("\r"));
92  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
93  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
94 }
95 
96 inline bool _is_doc_token(csubstr s) noexcept
97 {
98  //
99  // NOTE: this function was failing under some scenarios when
100  // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
101  // related to optimizer assumptions on the input string and
102  // possibly caused from UB around assignment to that string (the
103  // call site was in _scan_block()). For more details see:
104  //
105  // https://github.com/biojppm/rapidyaml/issues/440
106  //
107  // The current version does not suffer this problem, but it may
108  // appear again.
109  //
110  //
111  // UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
112  // (but not any other optimization level, nor any other compiler
113  // or version), because the assignment to s is being hoisted out
114  // of the loop which calls this function. Then the length doesn't
115  // enter the s.len >= 3 when it should. Adding a
116  // C4_DONT_OPTIMIZE(var) makes the problem go away.
117  //
118  if(s.len >= 3)
119  {
120  switch(s.str[0])
121  {
122  case '-':
123  //return _is_doc_begin_token(s); // this was failing with gcc -O2
124  return (s.str[1] == '-' && s.str[2] == '-')
125  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
126  case '.':
127  //return _is_doc_end_token(s); // this was failing with gcc -O2
128  return (s.str[1] == '.' && s.str[2] == '.')
129  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
130  }
131  }
132  return false;
133 }
134 
135 inline size_t _is_special_json_scalar(csubstr s)
136 {
137  RYML_ASSERT(s.len);
138  switch(s.str[0])
139  {
140  case 'f':
141  if(s.len >= 5 && s.begins_with("false"))
142  return 5u;
143  break;
144  case 't':
145  if(s.len >= 4 && s.begins_with("true"))
146  return 4u;
147  break;
148  case 'n':
149  if(s.len >= 4 && s.begins_with("null"))
150  return 4u;
151  break;
152  }
153  return 0u;
154 }
155 
156 
157 //-----------------------------------------------------------------------------
158 
159 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
160 {
161  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
162 }
163 
164 //! look for the next newline chars, and jump to the right of those
165 inline substr from_next_line(substr rem)
166 {
167  size_t nlpos = rem.first_of("\r\n");
168  if(nlpos == csubstr::npos)
169  return {};
170  const char nl = rem[nlpos];
171  rem = rem.right_of(nlpos);
172  if(rem.empty())
173  return {};
174  if(_extend_from_combined_newline(nl, rem.front()))
175  rem = rem.sub(1);
176  return rem;
177 }
178 
179 
180 //-----------------------------------------------------------------------------
181 
182 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
183 {
184  RYML_ASSERT(r[*i] == '\n');
185  size_t numnl_following = 0;
186  ++(*i);
187  for( ; *i < r.len; ++(*i))
188  {
189  if(r.str[*i] == '\n')
190  ++numnl_following;
191  // skip leading whitespace
192  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
193  ;
194  else
195  break;
196  }
197  return numnl_following;
198 }
199 
200 /** @p i is set to the first non whitespace character after the line
201  * @return the number of empty lines after the initial position */
202 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
203 {
204  RYML_ASSERT(r[*i] == '\n');
205  size_t numnl_following = 0;
206  ++(*i);
207  if(indentation == 0)
208  {
209  for( ; *i < r.len; ++(*i))
210  {
211  if(r.str[*i] == '\n')
212  ++numnl_following;
213  // skip leading whitespace
214  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
215  ;
216  else
217  break;
218  }
219  }
220  else
221  {
222  for( ; *i < r.len; ++(*i))
223  {
224  if(r.str[*i] == '\n')
225  {
226  ++numnl_following;
227  // skip the indentation after the newline
228  size_t stop = *i + indentation;
229  for( ; *i < r.len; ++(*i))
230  {
231  if(r.str[*i] != ' ' && r.str[*i] != '\r')
232  break;
233  RYML_ASSERT(*i < stop);
234  }
235  C4_UNUSED(stop);
236  }
237  // skip leading whitespace
238  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
239  ;
240  else
241  break;
242  }
243  }
244  return numnl_following;
245 }
246 
247 } // anon namespace
248 
249 
250 //-----------------------------------------------------------------------------
251 //-----------------------------------------------------------------------------
252 //-----------------------------------------------------------------------------
253 
254 template<class EventHandler>
256 {
257  _free();
258  _clr();
259 }
260 
261 template<class EventHandler>
263  : m_options(opts)
264  , m_file()
265  , m_buf()
266  , m_evt_handler(evt_handler)
267  , m_pending_anchors()
268  , m_pending_tags()
269  , m_was_inside_qmrk(false)
270  , m_doc_empty(false)
271  , m_prev_colon(npos)
272  , m_encoding(NOBOM)
273  , m_newline_offsets()
274  , m_newline_offsets_size(0)
275  , m_newline_offsets_capacity(0)
276  , m_newline_offsets_buf()
277 {
278  RYML_CHECK(evt_handler);
279 }
280 
281 template<class EventHandler>
283  : m_options(that.m_options)
284  , m_file(that.m_file)
285  , m_buf(that.m_buf)
286  , m_evt_handler(that.m_evt_handler)
287  , m_pending_anchors(that.m_pending_anchors)
288  , m_pending_tags(that.m_pending_tags)
289  , m_was_inside_qmrk(false)
290  , m_doc_empty(false)
291  , m_prev_colon(npos)
292  , m_encoding(NOBOM)
293  , m_newline_offsets(that.m_newline_offsets)
294  , m_newline_offsets_size(that.m_newline_offsets_size)
295  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
296  , m_newline_offsets_buf(that.m_newline_offsets_buf)
297 {
298  that._clr();
299 }
300 
301 template<class EventHandler>
303  : m_options(that.m_options)
304  , m_file(that.m_file)
305  , m_buf(that.m_buf)
306  , m_evt_handler(that.m_evt_handler)
307  , m_pending_anchors(that.m_pending_anchors)
308  , m_pending_tags(that.m_pending_tags)
309  , m_was_inside_qmrk(false)
310  , m_doc_empty(false)
311  , m_prev_colon(npos)
312  , m_encoding(NOBOM)
313  , m_newline_offsets()
314  , m_newline_offsets_size()
315  , m_newline_offsets_capacity()
316  , m_newline_offsets_buf()
317 {
318  if(that.m_newline_offsets_capacity)
319  {
320  _resize_locations(that.m_newline_offsets_capacity);
321  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
322  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
323  m_newline_offsets_size = that.m_newline_offsets_size;
324  }
325 }
326 
327 template<class EventHandler>
329 {
330  _free();
331  m_options = (that.m_options);
332  m_file = (that.m_file);
333  m_buf = (that.m_buf);
334  m_evt_handler = that.m_evt_handler;
335  m_pending_anchors = that.m_pending_anchors;
336  m_pending_tags = that.m_pending_tags;
337  m_was_inside_qmrk = that.m_was_inside_qmrk;
338  m_doc_empty = that.m_doc_empty;
339  m_prev_colon = that.m_prev_colon;
340  m_encoding = that.m_encoding;
341  m_newline_offsets = (that.m_newline_offsets);
342  m_newline_offsets_size = (that.m_newline_offsets_size);
343  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
344  m_newline_offsets_buf = (that.m_newline_offsets_buf);
345  that._clr();
346  return *this;
347 }
348 
349 template<class EventHandler>
351 {
352  if(&that != this)
353  {
354  _free();
355  m_options = (that.m_options);
356  m_file = (that.m_file);
357  m_buf = (that.m_buf);
358  m_evt_handler = that.m_evt_handler;
359  m_pending_anchors = that.m_pending_anchors;
360  m_pending_tags = that.m_pending_tags;
361  m_was_inside_qmrk = that.m_was_inside_qmrk;
362  m_doc_empty = that.m_doc_empty;
363  m_prev_colon = that.m_prev_colon;
364  m_encoding = that.m_encoding;
365  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
366  _resize_locations(that.m_newline_offsets_capacity);
367  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
368  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
369  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
370  m_newline_offsets_size = that.m_newline_offsets_size;
371  m_newline_offsets_buf = that.m_newline_offsets_buf;
372  }
373  return *this;
374 }
375 
376 template<class EventHandler>
378 {
379  m_options = {};
380  m_file = {};
381  m_buf = {};
382  m_evt_handler = {};
383  m_pending_anchors = {};
384  m_pending_tags = {};
385  m_was_inside_qmrk = false;
386  m_doc_empty = true;
387  m_prev_colon = npos;
388  m_encoding = NOBOM;
389  m_newline_offsets = {};
390  m_newline_offsets_size = {};
391  m_newline_offsets_capacity = {};
392  m_newline_offsets_buf = {};
393 }
394 
395 template<class EventHandler>
396 void ParseEngine<EventHandler>::_free()
397 {
398  if(m_newline_offsets)
399  {
400  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
401  m_newline_offsets = nullptr;
402  m_newline_offsets_size = 0u;
403  m_newline_offsets_capacity = 0u;
404  m_newline_offsets_buf = nullptr;
405  }
406 }
407 
408 
409 //-----------------------------------------------------------------------------
410 
411 template<class EventHandler>
412 void ParseEngine<EventHandler>::_reset()
413 {
414  m_pending_anchors = {};
415  m_pending_tags = {};
416  m_doc_empty = true;
417  m_was_inside_qmrk = false;
418  m_prev_colon = npos;
419  m_encoding = NOBOM;
420  if(m_options.locations())
421  {
422  _prepare_locations();
423  }
424 }
425 
426 
427 //-----------------------------------------------------------------------------
428 
429 template<class EventHandler>
430 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
431 {
432  #define _ryml_relocate(s) \
433  if((s).is_sub(prev_arena)) \
434  { \
435  (s).str = next_arena.str + ((s).str - prev_arena.str); \
436  }
437  _ryml_relocate(m_buf);
438  _ryml_relocate(m_newline_offsets_buf);
439  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
440  _ryml_relocate(m_pending_tags.annotations[i].str);
441  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
442  _ryml_relocate(m_pending_anchors.annotations[i].str);
443  #undef _ryml_relocate
444 }
445 
446 template<class EventHandler>
447 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
448 {
449  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
450 }
451 
452 
453 //-----------------------------------------------------------------------------
454 
455 template<class EventHandler>
456 template<class DumpFn>
457 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
458 {
459  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
460  auto const& lc = st->line_contents;
461  csubstr contents = lc.stripped;
462  if(contents.len)
463  {
464  // print the yaml src line
465  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
466  if(m_file.len)
467  {
468  detail::_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
469  offs += m_file.len + 1;
470  }
471  detail::_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
472  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
473  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
474  detail::_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
475  // highlight the remaining portion of the previous line
476  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
477  size_t lastcol = firstcol + lc.rem.len;
478  for(size_t i = 0; i < offs + firstcol; ++i)
479  std::forward<DumpFn>(dumpfn)(" ");
480  std::forward<DumpFn>(dumpfn)("^");
481  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
482  std::forward<DumpFn>(dumpfn)("~");
483  detail::_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
484  }
485  else
486  {
487  std::forward<DumpFn>(dumpfn)("\n");
488  }
489 
490 #ifdef RYML_DBG
491  // next line: print the state flags
492  {
493  char flagbuf_[128];
494  detail::_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
495  }
496 #endif
497 }
498 
499 
500 //-----------------------------------------------------------------------------
501 
502 template<class EventHandler>
503 template<class ...Args>
504 void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
505 {
506  char errmsg[RYML_ERRMSG_SIZE];
507  detail::_SubstrWriter writer(errmsg);
508  auto dumpfn = [&writer](csubstr s){ writer.append(s); };
509  detail::_dump(dumpfn, fmt, args...);
510  writer.append('\n');
511  _fmt_msg(dumpfn);
512  size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
513  m_evt_handler->cancel_parse();
514  m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
515 }
516 
517 
518 //-----------------------------------------------------------------------------
519 #ifdef RYML_DBG
520 template<class EventHandler>
521 template<class ...Args>
522 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
523 {
524  if(_dbg_enabled())
525  {
526  auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
527  detail::_dump(dumpfn, fmt, args...);
528  dumpfn("\n");
529  _fmt_msg(dumpfn);
530  }
531 }
532 #endif
533 
534 
535 //-----------------------------------------------------------------------------
536 template<class EventHandler>
537 bool ParseEngine<EventHandler>::_finished_file() const
538 {
539  bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
540  if(ret)
541  {
542  _c4dbgp("finished file!!!");
543  }
544  return ret;
545 }
546 
547 template<class EventHandler>
548 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
549 {
550  return m_evt_handler->m_curr->line_contents.rem.empty();
551 }
552 
553 
554 //-----------------------------------------------------------------------------
555 
556 template<class EventHandler>
557 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
558 {
559  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
560  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
561  {
562  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
563  if(pos == npos)
564  pos = rem.len; // maybe the line is just all whitespace
565  _c4dbgpf("skip {} whitespace characters", pos);
566  _line_progressed(pos);
567  }
568 }
569 
570 template<class EventHandler>
571 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
572 {
573  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
574  if(rem.len && rem.str[0] == c)
575  {
576  size_t pos = rem.first_not_of(c);
577  if(pos == npos)
578  pos = rem.len; // maybe the line is just all c
579  _c4dbgpf("skip {}x'{}'", pos, c);
580  _line_progressed(pos);
581  }
582 }
583 
584 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
585 template<class EventHandler>
586 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
587 {
588  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
589  if(rem.len && rem.str[0] == c)
590  {
591  size_t pos = rem.first_not_of(c);
592  if(pos == npos)
593  pos = rem.len; // maybe the line is just all c
594  if(pos > max_to_skip)
595  pos = max_to_skip;
596  _c4dbgpf("skip {}x'{}'", pos, c);
597  _line_progressed(pos);
598  }
599 }
600 #endif
601 
602 template<class EventHandler>
603 template<size_t N>
604 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
605 {
606  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
607  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
608  if(pos == npos)
609  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
610  _c4dbgpf("skip {} characters", pos);
611  _line_progressed(pos);
612 }
613 
614 template<class EventHandler>
615 void ParseEngine<EventHandler>::_skip_comment()
616 {
617  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
618  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
619  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
620  csubstr full = m_evt_handler->m_curr->line_contents.full;
621  // raise an error if the comment is not preceded by whitespace
622  if(!full.begins_with('#'))
623  {
624  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
625  const char c = full[(size_t)(rem.str - full.str - 1)];
626  if(C4_UNLIKELY(c != ' ' && c != '\t'))
627  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
628  }
629  else
630  {
631  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
632  }
633  _c4dbgpf("comment was '{}'", rem);
634  _line_progressed(rem.len);
635 }
636 
637 template<class EventHandler>
638 void ParseEngine<EventHandler>::_maybe_skip_comment()
639 {
640  csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
641  if(s.begins_with('#'))
642  {
643  _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
644  _skip_comment();
645  }
646 }
647 
648 template<class EventHandler>
649 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
650 {
651  if(m_evt_handler->m_curr->line_contents.rem.len)
652  {
653  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
654  {
655  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
656  if(pos == npos)
657  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
658  _c4dbgpf("skip {}x'{}'", pos, ' ');
659  _line_progressed(pos);
660  }
661  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
662  {
663  _c4dbgp("found ':' colon next");
664  _line_progressed(1);
665  return true;
666  }
667  }
668  return false;
669 }
670 
671 template<class EventHandler>
672 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
673 {
674  if(m_evt_handler->m_curr->line_contents.rem.len)
675  {
676  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
677  {
678  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
679  if(pos == npos)
680  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
681  _c4dbgpf("skip {}x'{}'", pos, ' ');
682  _line_progressed(pos);
683  }
684  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
685  {
686  _c4dbgp("found ',' comma next");
687  _line_progressed(1);
688  return true;
689  }
690  }
691  return false;
692 }
693 
694 
695 //-----------------------------------------------------------------------------
696 
697 template<class EventHandler>
698 csubstr ParseEngine<EventHandler>::_scan_anchor()
699 {
700  csubstr s = m_evt_handler->m_curr->line_contents.rem;
701  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
702  csubstr anchor = s.range(1, s.first_of(' '));
703  _line_progressed(1u + anchor.len);
704  _maybe_skipchars(' ');
705  return anchor;
706 }
707 
708 template<class EventHandler>
709 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
710 {
711  csubstr s = m_evt_handler->m_curr->line_contents.rem;
712  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
713  csubstr ref = s.first(s.first_of(",] :"));
714  _line_progressed(ref.len);
715  return ref;
716 }
717 
718 template<class EventHandler>
719 csubstr ParseEngine<EventHandler>::_scan_ref_map()
720 {
721  csubstr s = m_evt_handler->m_curr->line_contents.rem;
722  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
723  csubstr ref = s.first(s.first_of(",} "));
724  _line_progressed(ref.len);
725  return ref;
726 }
727 
728 template<class EventHandler>
729 csubstr ParseEngine<EventHandler>::_scan_tag()
730 {
731  csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
732  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
733  csubstr t;
734  if(rem.begins_with("!!"))
735  {
736  _c4dbgp("begins with '!!'");
737  if(has_any(FLOW))
738  t = rem.left_of(rem.first_of(" ,"));
739  else
740  t = rem.left_of(rem.first_of(' '));
741  }
742  else if(rem.begins_with("!<"))
743  {
744  _c4dbgp("begins with '!<'");
745  t = rem.left_of(rem.first_of('>'), true);
746  }
747  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
748  else if(rem.begins_with("!h!"))
749  {
750  _c4dbgp("begins with '!h!'");
751  t = rem.left_of(rem.first_of(' '));
752  }
753  #endif
754  else
755  {
756  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
757  _c4dbgp("begins with '!'");
758  if(has_any(FLOW))
759  t = rem.left_of(rem.first_of(" ,"));
760  else
761  t = rem.left_of(rem.first_of(' '));
762  }
763  _line_progressed(t.len);
764  _maybe_skip_whitespace_tokens();
765  return t;
766 }
767 
768 
769 //-----------------------------------------------------------------------------
770 
771 template<class EventHandler>
772 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
773 {
774  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
775 
776  // it's not a scalar if it starts with any of these characters:
777  switch(s.str[0])
778  {
779  // these are all legal tokens which mean no scalar is starting:
780  case '[':
781  case ']':
782  case '{':
783  case '}':
784  case '!':
785  case '&':
786  case '*':
787  case '|':
788  case '>':
789  case '#':
790  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
791  return false;
792  // '-' and ':' are illegal at the beginning if not followed by a scalar character
793  case '-':
794  case ':':
795  if(s.len > 1)
796  {
797  switch(s.str[1])
798  {
799  case '\n':
800  case '\r':
801  case '{':
802  case '[':
803  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
804  _c4err_("invalid token \":{}\"", _c4prc(s.str[1]));
805  break;
806  case ' ':
807  case '}':
808  case ']':
809  if(s.str[0] == ':')
810  {
811  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
812  return false;
813  }
814  break;
815  default:
816  break;
817  }
818  }
819  else
820  {
821  return false;
822  }
823  break;
824  case '?':
825  if(s.len > 1)
826  {
827  switch(s.str[1])
828  {
829  case ' ':
830  case '\n':
831  case '\r':
832  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
833  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
834  return false;
835  case '{':
836  case '}':
837  case '[':
838  case ']':
839  _c4err_("invalid token \"?{}\"", _c4prc(s.str[1]));
840  break;
841  default:
842  break;
843  }
844  }
845  else
846  {
847  return false;
848  }
849  break;
850  // everything else is a legal starting character
851  default:
852  break;
853  }
854 
855  return true;
856 }
857 
858 template<class EventHandler>
859 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
860 {
861  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
862  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
863  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
864  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
865  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
866 
867  substr s = m_evt_handler->m_curr->line_contents.rem;
868  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
869  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
870 
871  if(!s.len)
872  return false;
873 
874  if(!_is_valid_start_scalar_plain_flow(s))
875  return false;
876 
877  _c4dbgp("scanning seqflow scalar...");
878 
879  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
880  bool needs_filter = false;
881  while(true)
882  {
883  _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
884  for(size_t i = 0; i < s.len; ++i)
885  {
886  const char c = s.str[i];
887  switch(c)
888  {
889  case ',':
890  _c4dbgpf("found terminating character at {}: '{}'", i, c);
891  _line_progressed(i);
892  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
893  {
894  goto ended_scalar;
895  }
896  else
897  {
898  _c4dbgp("at the beginning. no scalar here.");
899  return false;
900  }
901  break;
902  case ']':
903  _c4dbgpf("found terminating character at {}: '{}'", i, c);
904  _line_progressed(i);
905  goto ended_scalar;
906  break;
907  case '#':
908  _c4dbgp("found suspicious '#'");
909  if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
910  {
911  _c4dbgpf("found terminating character at {}: '{}'", i, c);
912  _line_progressed(i);
913  goto ended_scalar;
914  }
915  break;
916  case ':':
917  _c4dbgp("found suspicious ':'");
918  if(s.len > i+1)
919  {
920  const char next = s.str[i+1];
921  _c4dbgpf("next char is '{}'", _c4prc(next));
922  if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
923  {
924  _c4dbgp("map starting!");
925  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
926  {
927  _c4dbgp("scalar finished!");
928  _line_progressed(i);
929  goto ended_scalar;
930  }
931  else
932  {
933  _c4dbgp("at the beginning. no scalar here.");
934  return false;
935  }
936  }
937  else
938  {
939  _c4dbgp("it's a scalar indeed.");
940  ++i; // skip the next char
941  }
942  }
943  else if(s.len == i+1)
944  {
945  _c4dbgp("':' at line end. map starting!");
946  return false;
947  }
948  break;
949  case '[':
950  case '{':
951  case '}':
952  _line_progressed(i);
953  _c4err_("invalid character: '{}'", c); // noreturn
954  default:
955  ;
956  }
957  }
958  _line_progressed(s.len);
959  if(!_finished_file())
960  {
961  _c4dbgp("next line!");
962  _line_ended();
963  _scan_line();
964  }
965  else
966  {
967  _c4dbgp("file finished!");
968  goto ended_scalar;
969  }
970  s = m_evt_handler->m_curr->line_contents.rem;
971  needs_filter = true;
972  }
973 
974 ended_scalar:
975 
976  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
977  sc->needs_filter = needs_filter;
978 
979  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
980 
981  return true;
982 }
983 
984 template<class EventHandler>
985 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
986 {
987  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
988  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
989  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
990  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
991  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
992 
993  substr s = m_evt_handler->m_curr->line_contents.rem;
994  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
995 
996  if(!s.len)
997  return false;
998 
999  if(!_is_valid_start_scalar_plain_flow(s))
1000  return false;
1001 
1002  _c4dbgp("scanning scalar...");
1003 
1004  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1005  bool needs_filter = false;
1006  while(true)
1007  {
1008  for(size_t i = 0; i < s.len; ++i)
1009  {
1010  const char c = s.str[i];
1011  switch(c)
1012  {
1013  case ',':
1014  case '}':
1015  _line_progressed(i);
1016  _c4dbgpf("found terminating character: '{}'", c);
1017  goto ended_scalar;
1018  case ':':
1019  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
1020  {
1021  _line_progressed(i);
1022  _c4dbgpf("found terminating character: '{}'", c);
1023  goto ended_scalar;
1024  }
1025  break;
1026  case '{':
1027  case '[':
1028  _line_progressed(i);
1029  _c4err_("invalid character: '{}'", c); // noreturn
1030  break;
1031  case ']':
1032  _line_progressed(i);
1033  if(has_any(RSEQIMAP))
1034  goto ended_scalar;
1035  else
1036  _c4err_("invalid character: '{}'", c); // noreturn
1037  break;
1038  case '#':
1039  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
1040  {
1041  _line_progressed(i);
1042  _c4dbgpf("found terminating character: '{}'", c);
1043  goto ended_scalar;
1044  }
1045  break;
1046  default:
1047  ;
1048  }
1049  }
1050  _c4dbgp("next line!");
1051  _line_progressed(s.len);
1052  if(!_finished_file())
1053  {
1054  _c4dbgp("next line!");
1055  _line_ended();
1056  _scan_line();
1057  }
1058  else
1059  {
1060  _c4dbgp("file finished!");
1061  goto ended_scalar;
1062  }
1063  s = m_evt_handler->m_curr->line_contents.rem;
1064  needs_filter = true;
1065  }
1066 
1067 ended_scalar:
1068 
1069  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1070  sc->needs_filter = needs_filter;
1071 
1072  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1073 
1074  return sc->scalar.len > 0u;
1075 }
1076 
1077 template<class EventHandler>
1078 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1079 {
1080  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1081  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1082  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1083  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1084 
1085  substr s = m_evt_handler->m_curr->line_contents.rem;
1086  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1087 
1088  if(!s.len)
1089  return false;
1090 
1091  _c4dbgp("scanning scalar...");
1092 
1093  switch(s.str[0])
1094  {
1095  case ']':
1096  case '{':
1097  case ',':
1098  _c4dbgp("not a scalar.");
1099  return false;
1100  }
1101 
1102  {
1103  const size_t len = _is_special_json_scalar(s);
1104  if(len)
1105  {
1106  sc->scalar = s.first(len);
1107  sc->needs_filter = false;
1108  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1109  _line_progressed(len);
1110  return true;
1111  }
1112  }
1113 
1114  // must be a number
1115  size_t i = 0;
1116  for( ; i < s.len; ++i)
1117  {
1118  const char c = s.str[i];
1119  switch(c)
1120  {
1121  case ',':
1122  case ']':
1123  case ' ':
1124  case '\t':
1125  _c4dbgpf("found terminating character: '{}'", c);
1126  goto ended_scalar;
1127  case '#':
1128  if(!i || s.str[i-1] == ' ')
1129  {
1130  _c4dbgpf("found terminating character: '{}'", c);
1131  goto ended_scalar;
1132  }
1133  break;
1134  default:
1135  ;
1136  }
1137  }
1138 
1139 ended_scalar:
1140 
1141  if(C4_LIKELY(i > 0))
1142  {
1143  _line_progressed(i);
1144  sc->scalar = s.first(i);
1145  sc->needs_filter = false;
1146  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1147  return true;
1148  }
1149 
1150  return false;
1151 }
1152 
1153 template<class EventHandler>
1154 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1155 {
1156  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1157  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1158  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1159  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1160  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1161 
1162  substr s = m_evt_handler->m_curr->line_contents.rem;
1163  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1164 
1165  if(!s.len)
1166  return false;
1167 
1168  _c4dbgp("scanning scalar...");
1169 
1170  {
1171  const size_t len = _is_special_json_scalar(s);
1172  if(len)
1173  {
1174  sc->scalar = s.first(len);
1175  sc->needs_filter = false;
1176  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1177  _line_progressed(len);
1178  return true;
1179  }
1180  }
1181 
1182  // must be a number
1183  size_t i = 0;
1184  for( ; i < s.len; ++i)
1185  {
1186  const char c = s.str[i];
1187  switch(c)
1188  {
1189  case ',':
1190  case '}':
1191  case ' ':
1192  case '\t':
1193  _c4dbgpf("found terminating character: '{}'", c);
1194  goto ended_scalar;
1195  case '#':
1196  if(!i || s.str[i-1] == ' ')
1197  {
1198  _c4dbgpf("found terminating character: '{}'", c);
1199  goto ended_scalar;
1200  }
1201  break;
1202  default:
1203  ;
1204  }
1205  }
1206 
1207 ended_scalar:
1208 
1209  if(C4_LIKELY(i > 0))
1210  {
1211  _line_progressed(i);
1212  sc->scalar = s.first(i);
1213  sc->needs_filter = false;
1214  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1215  return true;
1216  }
1217 
1218  return false;
1219 }
1220 
1221 template<class EventHandler>
1222 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1223 {
1224  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1225  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1226 }
1227 
1228 template<class EventHandler>
1229 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1230 {
1231  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1232  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1233 }
1234 
1235 template<class EventHandler>
1236 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1237 {
1238  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1239  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1240  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
1241 
1242  substr s = m_evt_handler->m_curr->line_contents.rem;
1243  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1244 
1245  if(!s.len)
1246  return false;
1247 
1248  switch(s.str[0])
1249  {
1250  case '-':
1251  if(_is_blck_token(s))
1252  {
1253  return false;
1254  }
1255  else if(_is_doc_begin(s))
1256  {
1257  _c4dbgp("token is doc start");
1258  return false;
1259  }
1260  break;
1261  case ':':
1262  case '?':
1263  if(_is_blck_token(s))
1264  return false;
1265  break;
1266  case '[':
1267  case '{':
1268  case '&':
1269  case '*':
1270  case '!':
1271  _RYML_WITH_TAB_TOKENS(case '\t':)
1272  return false;
1273  case '.':
1274  if(_is_doc_end(s))
1275  {
1276  _c4dbgp("token is doc end");
1277  return false;
1278  }
1279  break;
1280  }
1281 
1282  _c4dbgpf("plain scalar! indentation={}", indentation);
1283 
1284  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1285  const size_t start_line = m_evt_handler->m_curr->pos.line;
1286 
1287  bool needs_filter = false;
1288  while(true)
1289  {
1290  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1291  for(size_t i = 0; i < s.len; ++i)
1292  {
1293  const char curr = s.str[i];
1294  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1295  switch(curr)
1296  {
1297  case ':':
1298  _c4dbgpf("[{}]: got suspicious ':'", i);
1299  // are there more characters?
1300  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1301  {
1302  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1303  _line_progressed(i);
1304  // ': ' is accepted only on the first line
1305  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1306  {
1307  _c4dbgp("start line. scalar ends here");
1308  goto ended_scalar;
1309  }
1310  else
1311  {
1312  _c4err("parse error");
1313  }
1314  }
1315  else
1316  {
1317  size_t j = i;
1318  while(j + 1 < s.len && s.str[j+1] == ':')
1319  {
1320  _c4dbgp("skip colon");
1321  ++j;
1322  }
1323  i = j > i ? j-1 : i;
1324  _c4dbgp("nothing to see here");
1325  }
1326  break;
1327  case '#':
1328  _c4dbgp("got suspicious '#'");
1329  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1330  {
1331  _c4dbgp("comment! scalar ends here");
1332  _line_progressed(i);
1333  goto ended_scalar;
1334  }
1335  else
1336  {
1337  _c4dbgp("nothing to see here");
1338  }
1339  break;
1340  }
1341  }
1342  _line_progressed(s.len);
1343  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1344  next_peeked = next_peeked.trimr("\n\r");
1345  const size_t next_indentation = next_peeked.first_not_of(' ');
1346  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1347  if(next_indentation < indentation)
1348  {
1349  _c4dbgp("smaller indentation! scalar ended");
1350  goto ended_scalar;
1351  }
1352  else if(next_indentation == 0 && next_peeked.len > 0)
1353  {
1354  const char first = next_peeked.str[0];
1355  switch(first)
1356  {
1357  case '-':
1358  next_peeked = next_peeked.trimr("\n\r");
1359  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1360  if(_is_doc_begin_token(next_peeked))
1361  {
1362  _c4dbgp("doc begin! scalar ended");
1363  goto ended_scalar;
1364  }
1365  break;
1366  case '.':
1367  next_peeked = next_peeked.trimr("\n\r");
1368  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1369  if(_is_doc_end_token(next_peeked))
1370  {
1371  _c4dbgp("doc end! scalar ended");
1372  goto ended_scalar;
1373  }
1374  break;
1375  }
1376  }
1377  // load with next line
1378  _c4dbgp("next line!");
1379  if(!_finished_file())
1380  {
1381  _c4dbgp("next line!");
1382  _line_ended();
1383  _scan_line();
1384  }
1385  else
1386  {
1387  _c4dbgp("file finished!");
1388  goto ended_scalar;
1389  }
1390  s = m_evt_handler->m_curr->line_contents.rem;
1391  needs_filter = true;
1392  }
1393 
1394 ended_scalar:
1395 
1396  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1397  sc->needs_filter = needs_filter;
1398 
1399  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1400 
1401  return true;
1402 }
1403 
1404 template<class EventHandler>
1405 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1406 {
1407  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1408  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1409  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1410  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1411  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1412  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1413  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1414 }
1415 
1416 template<class EventHandler>
1417 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1418 {
1419  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1420  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1421  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1422  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1423  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1424  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1425 }
1426 
1427 template<class EventHandler>
1428 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1429 {
1430  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1431  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1432 }
1433 
1434 
1435 //-----------------------------------------------------------------------------
1436 
1437 template<class EventHandler>
1438 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1439 {
1440  substr rem{}; // declare here because of the goto
1441  size_t nlpos{}; // declare here because of the goto
1442  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1443  if(pos >= m_buf.len)
1444  goto next_is_empty;
1445 
1446  // look for the next newline chars, and jump to the right of those
1447  rem = from_next_line(m_buf.sub(pos));
1448  if(rem.empty())
1449  goto next_is_empty;
1450 
1451  // now get everything up to and including the following newline chars
1452  nlpos = rem.first_of("\r\n");
1453  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1454  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1455  rem = rem.left_of(nlpos, /*include_pos*/true);
1456 
1457  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1458  return rem;
1459 
1460 next_is_empty:
1461  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1462  return {};
1463 }
1464 
1465 //-----------------------------------------------------------------------------
1466 
1467 template<class EventHandler>
1468 void ParseEngine<EventHandler>::_scan_line()
1469 {
1470  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1471  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1472  else
1473  m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1474 }
1475 
1476 template<class EventHandler>
1477 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1478 {
1479  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1480  m_evt_handler->m_curr->pos.offset += ahead;
1481  m_evt_handler->m_curr->pos.col += ahead;
1482  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1483  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1484 }
1485 
1486 template<class EventHandler>
1487 void ParseEngine<EventHandler>::_line_ended()
1488 {
1489  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1490  m_evt_handler->m_curr->pos.line,
1491  m_evt_handler->m_curr->line_contents.full.len,
1492  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1493  m_evt_handler->m_curr->pos.col, 1);
1494  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1495  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1496  ++m_evt_handler->m_curr->pos.line;
1497  m_evt_handler->m_curr->pos.col = 1;
1498 }
1499 
1500 template<class EventHandler>
1501 void ParseEngine<EventHandler>::_line_ended_undo()
1502 {
1503  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1504  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1505  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1506  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1507  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1508  m_evt_handler->m_curr->pos.offset -= delta;
1509  --m_evt_handler->m_curr->pos.line;
1510  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1511  // don't forget to undo also the changes to the remainder of the line
1512  //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
1513  m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1514 }
1515 
1516 
1517 //-----------------------------------------------------------------------------
1518 template<class EventHandler>
1519 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1520 {
1521  m_evt_handler->m_curr->indref = indentation;
1522  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1523 }
1524 
1525 template<class EventHandler>
1526 void ParseEngine<EventHandler>::_save_indentation()
1527 {
1528  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1529  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1530  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1531 }
1532 
1533 
1534 //-----------------------------------------------------------------------------
1535 
1536 template<class EventHandler>
1537 void ParseEngine<EventHandler>::_end_map_blck()
1538 {
1539  _c4dbgp("mapblck: end");
1540  if(has_any(RKCL|RVAL))
1541  {
1542  _c4dbgp("mapblck: set missing val");
1543  _handle_annotations_before_blck_val_scalar();
1544  m_evt_handler->set_val_scalar_plain_empty();
1545  }
1546  else if(has_any(QMRK))
1547  {
1548  _c4dbgp("mapblck: set missing keyval");
1549  _handle_annotations_before_blck_key_scalar();
1550  m_evt_handler->set_key_scalar_plain_empty();
1551  _handle_annotations_before_blck_val_scalar();
1552  m_evt_handler->set_val_scalar_plain_empty();
1553  }
1554  m_evt_handler->end_map();
1555 }
1556 
1557 template<class EventHandler>
1558 void ParseEngine<EventHandler>::_end_seq_blck()
1559 {
1560  if(has_any(RVAL))
1561  {
1562  _c4dbgp("seqblck: set missing val");
1563  _handle_annotations_before_blck_val_scalar();
1564  m_evt_handler->set_val_scalar_plain_empty();
1565  }
1566  m_evt_handler->end_seq();
1567 }
1568 
1569 template<class EventHandler>
1570 void ParseEngine<EventHandler>::_end2_map()
1571 {
1572  _c4dbgp("map: end");
1573  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1574  if(has_any(BLCK))
1575  {
1576  _end_map_blck();
1577  }
1578  else
1579  {
1580  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1581  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1582  m_evt_handler->_pop();
1583  }
1584 }
1585 
1586 template<class EventHandler>
1587 void ParseEngine<EventHandler>::_end2_seq()
1588 {
1589  _c4dbgp("seq: end");
1590  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1591  if(has_any(BLCK))
1592  {
1593  _end_seq_blck();
1594  }
1595  else
1596  {
1597  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1598  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1599  m_evt_handler->_pop();
1600  }
1601 }
1602 
1603 template<class EventHandler>
1604 void ParseEngine<EventHandler>::_begin2_doc()
1605 {
1606  m_doc_empty = true;
1607  add_flags(RDOC);
1608  m_evt_handler->begin_doc();
1609  m_evt_handler->m_curr->indref = 0; // ?
1610 }
1611 
1612 template<class EventHandler>
1613 void ParseEngine<EventHandler>::_begin2_doc_expl()
1614 {
1615  m_doc_empty = true;
1616  add_flags(RDOC);
1617  m_evt_handler->begin_doc_expl();
1618  m_evt_handler->m_curr->indref = 0; // ?
1619 }
1620 
1621 template<class EventHandler>
1622 void ParseEngine<EventHandler>::_end2_doc()
1623 {
1624  _c4dbgp("doc: end");
1625  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1626  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1627  {
1628  _c4dbgp("doc was empty; add empty val");
1629  _handle_annotations_before_blck_val_scalar();
1630  m_evt_handler->set_val_scalar_plain_empty();
1631  }
1632  m_evt_handler->end_doc();
1633 }
1634 
1635 template<class EventHandler>
1636 void ParseEngine<EventHandler>::_end2_doc_expl()
1637 {
1638  _c4dbgp("doc: end");
1639  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1640  {
1641  _c4dbgp("doc: no children; add empty val");
1642  _handle_annotations_before_blck_val_scalar();
1643  m_evt_handler->set_val_scalar_plain_empty();
1644  }
1645  m_evt_handler->end_doc_expl();
1646 }
1647 
1648 template<class EventHandler>
1649 void ParseEngine<EventHandler>::_maybe_begin_doc()
1650 {
1651  if(has_none(RDOC))
1652  {
1653  _c4dbgp("doc must be started");
1654  _begin2_doc();
1655  }
1656 }
1657 template<class EventHandler>
1658 void ParseEngine<EventHandler>::_maybe_end_doc()
1659 {
1660  if(has_any(RDOC))
1661  {
1662  _c4dbgp("doc must be finished");
1663  _end2_doc();
1664  }
1665  else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1666  {
1667  _c4dbgp("no doc to finish, but pending annotations");
1668  m_evt_handler->begin_doc();
1669  _handle_annotations_before_blck_val_scalar();
1670  m_evt_handler->set_val_scalar_plain_empty();
1671  m_evt_handler->end_doc();
1672  }
1673 }
1674 
1675 template<class EventHandler>
1676 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1677 {
1678  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1679  if(m_evt_handler->m_stack[0].flags & RDOC)
1680  {
1681  _c4dbgp("root is RDOC");
1682  if(m_evt_handler->m_curr->level != 0)
1683  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1684  }
1685  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1686  {
1687  _c4dbgp("root is STREAM");
1688  if(m_evt_handler->m_curr->level != 1)
1689  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1690  }
1691  else
1692  {
1693  _c4err("internal error");
1694  }
1695  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1696 }
1697 
1698 template<class EventHandler>
1699 void ParseEngine<EventHandler>::_end_doc_suddenly()
1700 {
1701  _c4dbgp("end doc suddenly");
1702  _end_doc_suddenly__pop();
1703  _end2_doc_expl();
1704  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1705 }
1706 
1707 template<class EventHandler>
1708 void ParseEngine<EventHandler>::_start_doc_suddenly()
1709 {
1710  _c4dbgp("start doc suddenly");
1711  _end_doc_suddenly__pop();
1712  _end2_doc();
1713  _begin2_doc_expl();
1714 }
1715 
1716 template<class EventHandler>
1717 void ParseEngine<EventHandler>::_end_stream()
1718 {
1719  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1720  if(has_all(RSEQ|FLOW))
1721  _c4err("missing terminating ]");
1722  else if(has_all(RMAP|FLOW))
1723  _c4err("missing terminating }");
1724  if(m_evt_handler->m_stack.size() > 1)
1725  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1726  if(has_all(RDOC))
1727  {
1728  _end2_doc();
1729  }
1730  else if(has_all(RTOP|RUNK))
1731  {
1732  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1733  {
1734  if(m_doc_empty)
1735  {
1736  m_evt_handler->begin_doc();
1737  _handle_annotations_before_blck_val_scalar();
1738  m_evt_handler->set_val_scalar_plain_empty();
1739  m_evt_handler->end_doc();
1740  }
1741  }
1742  }
1743  m_evt_handler->end_stream();
1744 }
1745 
1746 
1747 template<class EventHandler>
1748 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1749 {
1750  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1751  while(m_evt_handler->m_curr != popto)
1752  {
1753  if(has_any(RSEQ))
1754  {
1755  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1756  _end2_seq();
1757  }
1758  else if(has_any(RMAP))
1759  {
1760  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1761  _end2_map();
1762  }
1763  else
1764  {
1765  break;
1766  }
1767  }
1768  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1769 }
1770 
1771 template<class EventHandler>
1772 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1773 {
1774  // search the stack frame to jump to based on its indentation
1775  using state_type = typename EventHandler::state;
1776  state_type const* popto = nullptr;
1777  auto &stack = m_evt_handler->m_stack;
1778  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1779  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1780  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1781  #ifdef RYML_DBG
1782  if(_dbg_enabled())
1783  {
1784  char flagbuf_[128];
1785  for(state_type const& s : stack)
1786  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1787  }
1788  #endif
1789  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1790  {
1791  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1792  if(s->indref == ind)
1793  {
1794  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1795  popto = s;
1796  break;
1797  }
1798  }
1799  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1800  {
1801  _c4err("parse error: incorrect indentation?");
1802  }
1803  _handle_indentation_pop(popto);
1804 }
1805 
1806 template<class EventHandler>
1807 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1808 {
1809  // search the stack frame to jump to based on its indentation
1810  using state_type = typename EventHandler::state;
1811  auto &stack = m_evt_handler->m_stack;
1812  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1813  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1814  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1815  state_type const* popto = nullptr;
1816  #ifdef RYML_DBG
1817  char flagbuf_[128];
1818  if(_dbg_enabled())
1819  {
1820  for(state_type const& s : stack)
1821  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1822  }
1823  #endif
1824  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1825  {
1826  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1827  if(s->indref < ind)
1828  {
1829  break;
1830  }
1831  else if(s->indref == ind)
1832  {
1833  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1834  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1835  {
1836  break;
1837  }
1838  popto = s;
1839  if(has_all(RSEQ|BLCK, s))
1840  {
1841  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1842  const size_t first = rem.first_not_of(' ');
1843  _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
1844  rem = rem.right_of(first, true);
1845  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1846  if(rem.begins_with('-') && _is_blck_token(rem))
1847  {
1848  _c4dbgp("parent was indentless seq");
1849  break;
1850  }
1851  }
1852  }
1853  }
1854  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1855  {
1856  _c4err("parse error: incorrect indentation?");
1857  }
1858  _handle_indentation_pop(popto);
1859 }
1860 
1861 
1862 //-----------------------------------------------------------------------------
1863 template<class EventHandler>
1864 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1865 {
1866  // quoted scalars can spread over multiple lines!
1867  // nice explanation here: http://yaml-multiline.info/
1868 
1869  // a span to the end of the file
1870  size_t b = m_evt_handler->m_curr->pos.offset;
1871  substr s = m_buf.sub(b);
1872  if(s.begins_with(' '))
1873  {
1874  s = s.triml(' ');
1875  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1876  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1877  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1878  }
1879  b = m_evt_handler->m_curr->pos.offset; // take this into account
1880  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1881 
1882  // skip the opening quote
1883  _line_progressed(1);
1884  s = s.sub(1);
1885 
1886  bool needs_filter = false;
1887 
1888  size_t numlines = 1; // we already have one line
1889  size_t pos = npos; // find the pos of the matching quote
1890  while( ! _finished_file())
1891  {
1892  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1893  bool line_is_blank = true;
1894  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1895  for(size_t i = 0; i < line.len; ++i)
1896  {
1897  const char curr = line.str[i];
1898  if(curr == '\'') // single quotes are escaped with two single quotes
1899  {
1900  const char next = i+1 < line.len ? line.str[i+1] : '~';
1901  if(next != '\'') // so just look for the first quote
1902  { // without another after it
1903  pos = i;
1904  break;
1905  }
1906  else
1907  {
1908  needs_filter = true; // needs filter to remove escaped quotes
1909  ++i; // skip the escaped quote
1910  }
1911  }
1912  else if(curr != ' ')
1913  {
1914  line_is_blank = false;
1915  }
1916  }
1917 
1918  // leading whitespace also needs filtering
1919  needs_filter = needs_filter
1920  || (numlines > 1)
1921  || line_is_blank
1922  || (_at_line_begin() && line.begins_with(' '));
1923 
1924  if(pos == npos)
1925  {
1926  _line_progressed(line.len);
1927  ++numlines;
1928  }
1929  else
1930  {
1931  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1932  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
1933  _line_progressed(pos + 1); // progress beyond the quote
1934  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1935  break;
1936  }
1937 
1938  _line_ended();
1939  _scan_line();
1940  }
1941 
1942  if(pos == npos)
1943  {
1944  _c4err("reached end of file while looking for closing quote");
1945  }
1946  else
1947  {
1948  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1949  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1950  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
1951  s = s.sub(0, pos-1);
1952  }
1953 
1954  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
1955 
1956  return ScannedScalar { s, needs_filter };
1957 }
1958 
1959 
1960 //-----------------------------------------------------------------------------
1961 template<class EventHandler>
1962 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1963 {
1964  // quoted scalars can spread over multiple lines!
1965  // nice explanation here: http://yaml-multiline.info/
1966 
1967  // a span to the end of the file
1968  size_t b = m_evt_handler->m_curr->pos.offset;
1969  substr s = m_buf.sub(b);
1970  if(s.begins_with(' '))
1971  {
1972  s = s.triml(' ');
1973  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1974  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1975  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1976  }
1977  b = m_evt_handler->m_curr->pos.offset; // take this into account
1978  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
1979 
1980  // skip the opening quote
1981  _line_progressed(1);
1982  s = s.sub(1);
1983 
1984  bool needs_filter = false;
1985 
1986  size_t numlines = 1; // we already have one line
1987  size_t pos = npos; // find the pos of the matching quote
1988  while( ! _finished_file())
1989  {
1990  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1991  #if defined(__GNUC__) && __GNUC__ == 11
1992  C4_DONT_OPTIMIZE(line); // prevent erroneous hoist of the assignment out of the loop
1993  #endif
1994  bool line_is_blank = true;
1995  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1996  for(size_t i = 0; i < line.len; ++i)
1997  {
1998  const char curr = line.str[i];
1999  if(curr != ' ')
2000  line_is_blank = false;
2001  // every \ is an escape
2002  if(curr == '\\')
2003  {
2004  const char next = i+1 < line.len ? line.str[i+1] : '~';
2005  needs_filter = true;
2006  if(next == '"' || next == '\\')
2007  ++i;
2008  }
2009  else if(curr == '"')
2010  {
2011  pos = i;
2012  break;
2013  }
2014  }
2015 
2016  // leading whitespace also needs filtering
2017  needs_filter = needs_filter
2018  || (numlines > 1)
2019  || line_is_blank
2020  || (_at_line_begin() && line.begins_with(' '));
2021 
2022  if(pos == npos)
2023  {
2024  _line_progressed(line.len);
2025  ++numlines;
2026  }
2027  else
2028  {
2029  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2030  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"');
2031  _line_progressed(pos + 1); // progress beyond the quote
2032  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
2033  break;
2034  }
2035 
2036  _line_ended();
2037  _scan_line();
2038  }
2039 
2040  if(pos == npos)
2041  {
2042  _c4err("reached end of file looking for closing quote");
2043  }
2044  else
2045  {
2046  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
2047  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
2048  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2049  s = s.sub(0, pos-1);
2050  }
2051 
2052  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2053 
2054  return ScannedScalar { s, needs_filter };
2055 }
2056 
2057 
2058 //-----------------------------------------------------------------------------
2059 template<class EventHandler>
2060 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2061 {
2062  _c4dbgpf("blck: indref={}", indref);
2063  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
2064 
2065  // nice explanation here: http://yaml-multiline.info/
2066  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2067  csubstr trimmed = s.triml(' ');
2068  if(trimmed.str > s.str)
2069  {
2070  _c4dbgp("skipping whitespace");
2071  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2072  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2073  s = trimmed;
2074  }
2075  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2076 
2077  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2078 
2079  // parse the spec
2080  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2081  size_t indentation = npos; // have to find out if no spec is given
2082  csubstr digits;
2083  if(s.len > 1)
2084  {
2085  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2086  csubstr t = s.sub(1);
2087  _c4dbgpf("blck: spec is multichar: '{}'", t);
2088  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2089  size_t pos = t.first_of("-+");
2090  _c4dbgpf("blck: spec chomp char at {}", pos);
2091  if(pos != npos)
2092  {
2093  if(t[pos] == '-')
2094  chomp = CHOMP_STRIP;
2095  else if(t[pos] == '+')
2096  chomp = CHOMP_KEEP;
2097  if(pos == 0)
2098  t = t.sub(1);
2099  else
2100  t = t.first(pos);
2101  }
2102  // from here to the end, only digits are considered
2103  digits = t.left_of(t.first_not_of("0123456789"));
2104  if( ! digits.empty())
2105  {
2106  if(C4_UNLIKELY(digits.len > 1))
2107  _c4err("parse error: invalid indentation");
2108  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2109  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2110  _c4err("parse error: could not read indentation as decimal");
2111  if(C4_UNLIKELY( ! indentation))
2112  _c4err("parse error: null indentation");
2113  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2114  indentation += m_evt_handler->m_curr->indref;
2115  }
2116  }
2117 
2118  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2119 
2120  // finish the current line
2121  _line_progressed(s.len);
2122  _line_ended();
2123  _scan_line();
2124 
2125  // start with a zero-length block, already pointing at the right place
2126  substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
2127  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2128 
2129  // read every full line into a raw block,
2130  // from which newlines are to be stripped as needed.
2131  //
2132  // If no explicit indentation was given, pick it from the first
2133  // non-empty line. See
2134  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2135  size_t num_lines = 0;
2136  size_t first = m_evt_handler->m_curr->pos.line;
2137  size_t provisional_indentation = npos;
2138  LineContents lc;
2139  while(( ! _finished_file()))
2140  {
2141  // peek next line, but do not advance immediately
2142  lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2143  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2144  C4_DONT_OPTIMIZE(lc.rem);
2145  #endif
2146  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2147  // evaluate termination conditions
2148  if(indentation != npos)
2149  {
2150  _c4dbgpf("blck: indentation={}", indentation);
2151  // stop when the line is deindented and not empty
2152  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2153  {
2154  if(raw_block.len)
2155  {
2156  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2157  }
2158  else
2159  {
2160  _c4err("indentation decreased without any scalar");
2161  }
2162  break;
2163  }
2164  else if(indentation == 0)
2165  {
2166  _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2167  if(_is_doc_token(lc.rem))
2168  {
2169  _c4dbgp("blck: stop. indentation=0 and doc ended");
2170  break;
2171  }
2172  }
2173  }
2174  else
2175  {
2176  const size_t fns = lc.stripped.first_not_of(' ');
2177  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2178  if(fns != npos) // non-empty line
2179  {
2181  if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
2182  _c4err("parse error");
2183  )
2184  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2185  if(provisional_indentation == npos)
2186  {
2187  if(lc.indentation < indref)
2188  {
2189  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2190  if(raw_block.len == 0)
2191  {
2192  _c4dbgp("blck: was empty, undo next line");
2193  _line_ended_undo();
2194  }
2195  break;
2196  }
2197  else if(lc.indentation == m_evt_handler->m_curr->indref)
2198  {
2199  if(has_any(RSEQ|RMAP))
2200  {
2201  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2202  break;
2203  }
2204  }
2205  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2206  indentation = lc.indentation;
2207  }
2208  else
2209  {
2210  if(lc.indentation >= provisional_indentation)
2211  {
2212  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2213  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2214  indentation = lc.indentation;
2215  }
2216  else
2217  {
2218  break;
2219  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2220  }
2221  }
2222  }
2223  else // empty line
2224  {
2225  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2226  if(provisional_indentation != npos)
2227  {
2228  if(lc.stripped.len >= provisional_indentation)
2229  {
2230  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2231  provisional_indentation = lc.stripped.len;
2232  }
2233  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2234  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2235  {
2236  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2237  provisional_indentation = lc.indentation;
2238  }
2239  #endif
2240  }
2241  else
2242  {
2243  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2244  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2245  if(provisional_indentation == npos)
2246  {
2247  provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
2248  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2249  }
2250  if(provisional_indentation < indref)
2251  {
2252  provisional_indentation = indref;
2253  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2254  }
2255  }
2256  }
2257  }
2258  // advance now that we know the folded scalar continues
2259  m_evt_handler->m_curr->line_contents = lc;
2260  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2261  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2262  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2263  _line_ended();
2264  ++num_lines;
2265  }
2266  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2267  C4_UNUSED(num_lines);
2268  C4_UNUSED(first);
2269 
2270  if(indentation == npos)
2271  {
2272  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2273  indentation = provisional_indentation;
2274  }
2275 
2276  if(num_lines)
2277  _line_ended_undo();
2278 
2279  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2280 
2281  sb->scalar = raw_block;
2282  sb->indentation = indentation;
2283  sb->chomp = chomp;
2284 }
2285 
2286 
2287 //-----------------------------------------------------------------------------
2288 //-----------------------------------------------------------------------------
2289 //-----------------------------------------------------------------------------
2290 /** @cond dev */
2291 
2292 // a debugging scaffold:
2293 #if 0
2294 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2295 #else
2296 #define _c4dbgfws(...)
2297 #endif
2298 
2299 template<class EventHandler>
2300 template<class FilterProcessor>
2301 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2302 {
2303  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2304  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2305 
2306  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2307  if(first_pos != npos)
2308  {
2309  const char first_char = proc.src[first_pos];
2310  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2311  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2312  {
2313  _c4dbgfws("whitespace is trailing on line", "");
2314  proc.skip(first_pos - proc.rpos);
2315  }
2316  else // a legit whitespace
2317  {
2318  proc.copy();
2319  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2320  }
2321  return true;
2322  }
2323  _c4dbgfws("whitespace is trailing on line", "");
2324  return false;
2325 }
2326 
2327 template<class EventHandler>
2328 template<class FilterProcessor>
2329 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2330 {
2331  if(!_filter_ws_handle_to_first_non_space(proc))
2332  {
2333  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2334  proc.copy(proc.src.len - proc.rpos);
2335  }
2336 }
2337 
2338 template<class EventHandler>
2339 template<class FilterProcessor>
2340 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2341 {
2342  if(!_filter_ws_handle_to_first_non_space(proc))
2343  {
2344  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2345  proc.skip(proc.src.len - proc.rpos);
2346  }
2347 }
2348 
2349 #undef _c4dbgfws
2350 
2351 
2352 //-----------------------------------------------------------------------------
2353 //-----------------------------------------------------------------------------
2354 //-----------------------------------------------------------------------------
2355 /* plain scalars */
2356 
2357 // a debugging scaffold:
2358 #if 0
2359 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2360 #else
2361 #define _c4dbgfps(fmt, ...)
2362 #endif
2363 
2364 template<class EventHandler>
2365 template<class FilterProcessor>
2366 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2367 {
2368  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2369 
2370  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2371  size_t ii = proc.rpos;
2372  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2373  if(numnl_following)
2374  {
2375  proc.set('\n', numnl_following);
2376  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2377  }
2378  else
2379  {
2380  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2381  if(ret != npos)
2382  {
2383  proc.set(' ');
2384  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2385  }
2386  else
2387  {
2388  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2389  ii = proc.src.len;
2390  }
2391  }
2392  proc.rpos = ii;
2393 }
2394 
2395 template<class EventHandler>
2396 template<class FilterProcessor>
2397 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2398 {
2399  _RYML_CB_ASSERT(this->callbacks(), indentation != npos);
2400  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2401 
2402  while(proc.has_more_chars())
2403  {
2404  const char curr = proc.curr();
2405  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2406  switch(curr)
2407  {
2408  case ' ':
2409  _RYML_WITH_TAB_TOKENS(case '\t':)
2410  _c4dbgfps("whitespace", curr);
2411  _filter_ws_skip_trailing(proc);
2412  break;
2413  case '\n':
2414  _c4dbgfps("newline", curr);
2415  _filter_nl_plain(proc, /*indentation*/indentation);
2416  break;
2417  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2418  _c4dbgfps("carriage return, ignore", curr);
2419  proc.skip();
2420  break;
2421  default:
2422  proc.copy();
2423  break;
2424  }
2425  }
2426 
2427  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2428 
2429  return proc.result();
2430 }
2431 
2432 #undef _c4dbgfps
2433 
2434 
2435 template<class EventHandler>
2436 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2437 {
2438  FilterProcessorSrcDst proc(scalar, dst);
2439  return _filter_plain(proc, indentation);
2440 }
2441 
2442 template<class EventHandler>
2443 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2444 {
2445  FilterProcessorInplaceEndExtending proc(dst, cap);
2446  return _filter_plain(proc, indentation);
2447 }
2448 
2449 
2450 //-----------------------------------------------------------------------------
2451 //-----------------------------------------------------------------------------
2452 //-----------------------------------------------------------------------------
2453 /* single quoted */
2454 
2455 // a debugging scaffold:
2456 #if 0
2457 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2458 #else
2459 #define _c4dbgfsq(fmt, ...)
2460 #endif
2461 
2462 template<class EventHandler>
2463 template<class FilterProcessor>
2464 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2465 {
2466  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2467 
2468  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2469  size_t ii = proc.rpos;
2470  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2471  if(numnl_following)
2472  {
2473  proc.set('\n', numnl_following);
2474  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2475  }
2476  else
2477  {
2478  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2479  if(ret != npos)
2480  {
2481  proc.set(' ');
2482  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2483  }
2484  else
2485  {
2486  proc.set(' ');
2487  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2488  }
2489  }
2490  proc.rpos = ii;
2491 }
2492 
2493 template<class EventHandler>
2494 template<class FilterProcessor>
2495 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2496 {
2497  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2498 
2499  // from the YAML spec for double-quoted scalars:
2500  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2501  while(proc.has_more_chars())
2502  {
2503  const char curr = proc.curr();
2504  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2505  switch(curr)
2506  {
2507  case ' ':
2508  case '\t':
2509  _c4dbgfsq("whitespace", curr);
2510  _filter_ws_copy_trailing(proc);
2511  break;
2512  case '\n':
2513  _c4dbgfsq("newline", curr);
2514  _filter_nl_squoted(proc);
2515  break;
2516  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2517  _c4dbgfsq("skip cr", curr);
2518  proc.skip();
2519  break;
2520  case '\'':
2521  _c4dbgfsq("squote", curr);
2522  if(proc.next() == '\'')
2523  {
2524  _c4dbgfsq("two consecutive squotes", curr);
2525  proc.skip();
2526  proc.copy();
2527  }
2528  else
2529  {
2530  _c4err("filter error");
2531  }
2532  break;
2533  default:
2534  proc.copy();
2535  break;
2536  }
2537  }
2538 
2539  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2540 
2541  return proc.result();
2542 }
2543 
2544 #undef _c4dbgfsq
2545 
2546 template<class EventHandler>
2547 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2548 {
2549  FilterProcessorSrcDst proc(scalar, dst);
2550  return _filter_squoted(proc);
2551 }
2552 
2553 template<class EventHandler>
2554 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2555 {
2556  FilterProcessorInplaceEndExtending proc(dst, cap);
2557  return _filter_squoted(proc);
2558 }
2559 
2560 
2561 //-----------------------------------------------------------------------------
2562 //-----------------------------------------------------------------------------
2563 //-----------------------------------------------------------------------------
2564 /* double quoted */
2565 
2566 // a debugging scaffold:
2567 #if 0
2568 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2569 #else
2570 #define _c4dbgfdq(...)
2571 #endif
2572 
2573 template<class EventHandler>
2574 template<class FilterProcessor>
2575 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2576 {
2577  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2578 
2579  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2580  size_t ii = proc.rpos;
2581  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2582  if(numnl_following)
2583  {
2584  proc.set('\n', numnl_following);
2585  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2586  }
2587  else
2588  {
2589  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2590  if(ret != npos)
2591  {
2592  proc.set(' ');
2593  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2594  }
2595  else
2596  {
2597  proc.set(' ');
2598  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2599  }
2600  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2601  {
2602  _c4dbgfdq("backslash at [{}]", ii);
2603  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2604  if(next == ' ' || next == '\t')
2605  {
2606  _c4dbgfdq("extend skip to backslash", "");
2607  ++ii;
2608  }
2609  }
2610  }
2611  proc.rpos = ii;
2612 }
2613 
2614 template<class EventHandler>
2615 template<class FilterProcessor>
2616 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2617 {
2618  char next = proc.next();
2619  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2620  if(next == '\r')
2621  {
2622  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2623  {
2624  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2625  next = '\n';
2626  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2627  }
2628  }
2629 
2630  if(next == '\n')
2631  {
2632  size_t ii = proc.rpos + 2;
2633  for( ; ii < proc.src.len; ++ii)
2634  {
2635  // skip leading whitespace
2636  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2637  ;
2638  else
2639  break;
2640  }
2641  proc.skip(ii - proc.rpos);
2642  }
2643  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2644  {
2645  // escapes for json compatibility
2646  proc.translate_esc(next);
2647  _c4dbgfdq("here, used '{}'", _c4prc(next));
2648  }
2649  else if(next == '\r')
2650  {
2651  proc.skip();
2652  }
2653  else if(next == 'n')
2654  {
2655  proc.translate_esc('\n');
2656  }
2657  else if(next == 'r')
2658  {
2659  proc.translate_esc('\r');
2660  }
2661  else if(next == 't')
2662  {
2663  proc.translate_esc('\t');
2664  }
2665  else if(next == '\\')
2666  {
2667  proc.translate_esc('\\');
2668  }
2669  else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2670  {
2671  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2672  _c4err_("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2673  char readbuf[8];
2674  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2675  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2676  uint32_t codepoint_val = {};
2677  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2678  _c4err_("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2679  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2680  if(C4_UNLIKELY(numbytes == 0))
2681  _c4err_("failed to decode code point={}", proc.rpos);
2682  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2683  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/3u);
2684  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2685  }
2686  else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2687  {
2688  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2689  _c4err_("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2690  char readbuf[8];
2691  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2692  uint32_t codepoint_val = {};
2693  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2694  _c4err_("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2695  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2696  if(C4_UNLIKELY(numbytes == 0))
2697  _c4err_("failed to decode code point={}", proc.rpos);
2698  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2699  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2700  }
2701  else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2702  {
2703  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2704  _c4err_("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2705  char readbuf[8];
2706  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2707  uint32_t codepoint_val = {};
2708  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2709  _c4err_("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2710  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2711  if(C4_UNLIKELY(numbytes == 0))
2712  _c4err_("failed to decode code point={}", proc.rpos);
2713  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2714  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2715  }
2716  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2717  else if(next == '0')
2718  {
2719  proc.translate_esc('\0');
2720  }
2721  else if(next == 'b') // backspace
2722  {
2723  proc.translate_esc('\b');
2724  }
2725  else if(next == 'f') // form feed
2726  {
2727  proc.translate_esc('\f');
2728  }
2729  else if(next == 'a') // bell character
2730  {
2731  proc.translate_esc('\a');
2732  }
2733  else if(next == 'v') // vertical tab
2734  {
2735  proc.translate_esc('\v');
2736  }
2737  else if(next == 'e') // escape character
2738  {
2739  proc.translate_esc('\x1b');
2740  }
2741  else if(next == '_') // unicode non breaking space \u00a0
2742  {
2743  // https://www.compart.com/en/unicode/U+00a0
2744  const char payload[] = {
2745  _RYML_CHCONST(-0x3e, 0xc2),
2746  _RYML_CHCONST(-0x60, 0xa0),
2747  };
2748  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2749  }
2750  else if(next == 'N') // unicode next line \u0085
2751  {
2752  // https://www.compart.com/en/unicode/U+0085
2753  const char payload[] = {
2754  _RYML_CHCONST(-0x3e, 0xc2),
2755  _RYML_CHCONST(-0x7b, 0x85),
2756  };
2757  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2758  }
2759  else if(next == 'L') // unicode line separator \u2028
2760  {
2761  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2762  const char payload[] = {
2763  _RYML_CHCONST(-0x1e, 0xe2),
2764  _RYML_CHCONST(-0x80, 0x80),
2765  _RYML_CHCONST(-0x58, 0xa8),
2766  };
2767  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2768  }
2769  else if(next == 'P') // unicode paragraph separator \u2029
2770  {
2771  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2772  const char payload[] = {
2773  _RYML_CHCONST(-0x1e, 0xe2),
2774  _RYML_CHCONST(-0x80, 0x80),
2775  _RYML_CHCONST(-0x57, 0xa9),
2776  };
2777  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2778  }
2779  else if(next == '\0')
2780  {
2781  proc.skip();
2782  }
2783  else
2784  {
2785  _c4err_("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2786  }
2787  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2788 }
2789 
2790 
2791 template<class EventHandler>
2792 template<class FilterProcessor>
2793 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2794 {
2795  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2796  // from the YAML spec for double-quoted scalars:
2797  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2798  while(proc.has_more_chars())
2799  {
2800  const char curr = proc.curr();
2801  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2802  switch(curr)
2803  {
2804  case ' ':
2805  case '\t':
2806  {
2807  _c4dbgfdq("whitespace", curr);
2808  _filter_ws_copy_trailing(proc);
2809  break;
2810  }
2811  case '\n':
2812  {
2813  _c4dbgfdq("newline", curr);
2814  _filter_nl_dquoted(proc);
2815  break;
2816  }
2817  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2818  {
2819  _c4dbgfdq("carriage return, ignore", curr);
2820  proc.skip();
2821  break;
2822  }
2823  case '\\':
2824  {
2825  _filter_dquoted_backslash(proc);
2826  break;
2827  }
2828  default:
2829  {
2830  proc.copy();
2831  break;
2832  }
2833  }
2834  }
2835  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2836  return proc.result();
2837 }
2838 
2839 #undef _c4dbgfdq
2840 
2841 
2842 template<class EventHandler>
2843 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2844 {
2845  FilterProcessorSrcDst proc(scalar, dst);
2846  return _filter_dquoted(proc);
2847 }
2848 
2849 template<class EventHandler>
2850 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2851 {
2852  FilterProcessorInplaceMidExtending proc(dst, cap);
2853  return _filter_dquoted(proc);
2854 }
2855 
2856 
2857 //-----------------------------------------------------------------------------
2858 //-----------------------------------------------------------------------------
2859 //-----------------------------------------------------------------------------
2860 // block filtering helpers
2861 
2862 C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
2863 {
2864  if(indentation + 1 > s.len)
2865  return npos;
2866  for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
2867  {
2868  if(s.str[i] == '\n')
2869  {
2870  csubstr rem = s.sub(i + 1);
2871  size_t first = rem.first_not_of(' ');
2872  first = (first != npos) ? first : rem.len;
2873  if(first > indentation)
2874  return i;
2875  }
2876  }
2877  return npos;
2878 }
2879 
2880 template<class EventHandler>
2881 template<class FilterProcessor>
2882 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2883 {
2884  _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2885  _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2886 
2887  // a debugging scaffold:
2888  #if 0
2889  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2890  #else
2891  #define _c4dbgchomp(...)
2892  #endif
2893 
2894  // advance to the last line having spaces beyond the indentation
2895  {
2896  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2897  if(last != npos)
2898  {
2899  _c4dbgchomp("found newline and larger indentation. last={}", last);
2900  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2901  _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2902  // remove indentation spaces, copy the rest
2903  while((proc.rpos < last) && proc.has_more_chars())
2904  {
2905  const char curr = proc.curr();
2906  _c4dbgchomp("curr='{}'", _c4prc(curr));
2907  switch(curr)
2908  {
2909  case '\n':
2910  {
2911  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2912  proc.copy();
2913  // are there spaces after the newline?
2914  csubstr at_next_line = proc.rem();
2915  if(at_next_line.begins_with(' '))
2916  {
2917  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2918  // there are spaces.
2919  size_t first_non_space = at_next_line.first_not_of(' ');
2920  _c4dbgchomp("first_non_space={}", first_non_space);
2921  if(first_non_space == npos)
2922  {
2923  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2924  first_non_space = at_next_line.len;
2925  }
2926  if(first_non_space <= indentation)
2927  {
2928  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2929  proc.skip(first_non_space);
2930  }
2931  else
2932  {
2933  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2934  proc.skip(indentation);
2935  // copy the spaces after the indentation
2936  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2937  proc.copy(first_non_space - indentation);
2938  }
2939  }
2940  break;
2941  }
2942  case '\r':
2943  proc.skip();
2944  break;
2945  default:
2946  _c4err("parse error");
2947  break;
2948  }
2949  }
2950  }
2951  }
2952 
2953  // from now on, we only have line ends (or indentation spaces)
2954  switch(chomp)
2955  {
2956  case CHOMP_CLIP:
2957  {
2958  bool had_one = false;
2959  while(proc.has_more_chars())
2960  {
2961  const char curr = proc.curr();
2962  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
2963  switch(curr)
2964  {
2965  case '\n':
2966  {
2967  _c4dbgchomp("copy newline!", curr);
2968  proc.copy();
2969  proc.set_at_end();
2970  had_one = true;
2971  break;
2972  }
2973  case ' ':
2974  case '\r':
2975  _c4dbgchomp("skip!", curr);
2976  proc.skip();
2977  break;
2978  }
2979  }
2980  if(!had_one) // there were no newline characters. add one.
2981  {
2982  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
2983  proc.set('\n');
2984  }
2985  break;
2986  }
2987  case CHOMP_KEEP:
2988  {
2989  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2990  while(proc.has_more_chars())
2991  {
2992  const char curr = proc.curr();
2993  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
2994  switch(curr)
2995  {
2996  case '\n':
2997  _c4dbgchomp("copy newline!", curr);
2998  proc.copy();
2999  break;
3000  case ' ':
3001  case '\r':
3002  _c4dbgchomp("skip!", curr);
3003  proc.skip();
3004  break;
3005  }
3006  }
3007  break;
3008  }
3009  case CHOMP_STRIP:
3010  {
3011  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3012  // nothing to do!
3013  break;
3014  }
3015  }
3016 
3017  #undef _c4dbgchomp
3018 }
3019 
3020 
3021 // a debugging scaffold:
3022 #if 0
3023 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3024 #else
3025 #define _c4dbgfb(...)
3026 #endif
3027 
3028 template<class EventHandler>
3029 template<class FilterProcessor>
3030 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3031 {
3032  csubstr rem = proc.rem(); // remaining
3033  if(rem.len)
3034  {
3035  size_t first = rem.first_not_of(' ');
3036  if(first != npos)
3037  {
3038  _c4dbgfb("{} spaces follow before next nonws character", first);
3039  if(first < indentation)
3040  {
3041  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3042  proc.skip(first);
3043  }
3044  else
3045  {
3046  _c4dbgfb("skip {} spaces from indentation", indentation);
3047  proc.skip(indentation);
3048  }
3049  }
3050  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3051  else
3052  {
3053  _c4dbgfb("all spaces to the end: {} spaces", first);
3054  first = rem.len;
3055  if(first)
3056  {
3057  if(first < indentation)
3058  {
3059  _c4dbgfb("skip everything", first);
3060  proc.skip(proc.src.len - proc.rpos);
3061  }
3062  else
3063  {
3064  _c4dbgfb("skip {} spaces from indentation", indentation);
3065  proc.skip(indentation);
3066  }
3067  }
3068  }
3069  #endif
3070  }
3071 }
3072 
3073 template<class EventHandler>
3074 template<class FilterProcessor>
3075 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3076 {
3077  csubstr contents = proc.src.trimr(" \n\r");
3078  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3079  if(!contents.len)
3080  {
3081  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3082  if(chomp == CHOMP_KEEP && proc.src.len)
3083  {
3084  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3085  while(proc.has_more_chars())
3086  {
3087  const char curr = proc.curr();
3088  if(curr == '\n')
3089  proc.copy();
3090  else
3091  proc.skip();
3092  }
3093  if(!proc.wpos)
3094  {
3095  proc.set('\n');
3096  }
3097  }
3098  }
3099  return contents.len;
3100 }
3101 
3102 template<class EventHandler>
3103 template<class FilterProcessor>
3104 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3105 {
3106  _c4dbgfb("contents_len={}", contents_len);
3107 
3108  _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3109 
3110  // extend contents to just before the first newline at the end,
3111  // in case it is preceded by spaces
3112  size_t firstnewl = proc.src.first_of('\n', contents_len);
3113  if(firstnewl != npos)
3114  {
3115  contents_len = firstnewl;
3116  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3117  }
3118  else
3119  {
3120  contents_len = proc.src.len;
3121  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3122  }
3123 
3124  return contents_len;
3125 }
3126 
3127 #undef _c4dbgfb
3128 
3129 
3130 //-----------------------------------------------------------------------------
3131 //-----------------------------------------------------------------------------
3132 //-----------------------------------------------------------------------------
3133 
3134 // a debugging scaffold:
3135 #if 0
3136 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3137 #else
3138 #define _c4dbgfbl(...)
3139 #endif
3140 
3141 template<class EventHandler>
3142 template<class FilterProcessor>
3143 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3144 {
3145  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3146 
3147  size_t contents_len = _handle_all_whitespace(proc, chomp);
3148  if(!contents_len)
3149  return proc.result();
3150 
3151  contents_len = _extend_to_chomp(proc, contents_len);
3152 
3153  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3154 
3155  _filter_block_indentation(proc, indentation);
3156 
3157  // now filter the bulk
3158  while(proc.has_more_chars(/*maxpos*/contents_len))
3159  {
3160  const char curr = proc.curr();
3161  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3162  switch(curr)
3163  {
3164  case '\n':
3165  {
3166  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3167  proc.copy(); // copy the newline
3168  _filter_block_indentation(proc, indentation);
3169  break;
3170  }
3171  case '\r':
3172  proc.skip();
3173  break;
3174  default:
3175  proc.copy();
3176  break;
3177  }
3178  }
3179 
3180  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3181 
3182  _filter_chomp(proc, chomp, indentation);
3183 
3184  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3185 
3186  return proc.result();
3187 }
3188 
3189 #undef _c4dbgfbl
3190 
3191 template<class EventHandler>
3192 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3193 {
3194  FilterProcessorSrcDst proc(scalar, dst);
3195  return _filter_block_literal(proc, indentation, chomp);
3196 }
3197 
3198 template<class EventHandler>
3199 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3200 {
3201  FilterProcessorInplaceEndExtending proc(scalar, cap);
3202  return _filter_block_literal(proc, indentation, chomp);
3203 }
3204 
3205 
3206 //-----------------------------------------------------------------------------
3207 //-----------------------------------------------------------------------------
3208 //-----------------------------------------------------------------------------
3209 
3210 // a debugging scaffold:
3211 #if 0
3212 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3213 #else
3214 #define _c4dbgfbf(...)
3215 #endif
3216 
3217 
3218 template<class EventHandler>
3219 template<class FilterProcessor>
3220 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3221 {
3222  _filter_block_indentation(proc, indentation);
3223  while(proc.has_more_chars(len))
3224  {
3225  const char curr = proc.curr();
3226  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3227  switch(curr)
3228  {
3229  case '\n':
3230  _c4dbgfbf("newline.", curr);
3231  proc.copy();
3232  _filter_block_indentation(proc, indentation);
3233  break;
3234  case '\r':
3235  proc.skip();
3236  break;
3237  case ' ':
3238  case '\t':
3239  {
3240  size_t first = proc.rem().first_not_of(" \t");
3241  _c4dbgfbf("space. first={}", first);
3242  if(first == npos)
3243  first = proc.rem().len;
3244  _c4dbgfbf("... indentation increased to {}", first);
3245  _filter_block_folded_indented_block(proc, indentation, len, first);
3246  break;
3247  }
3248  default:
3249  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3250  return;
3251  }
3252  }
3253 }
3254 
3255 template<class EventHandler>
3256 template<class FilterProcessor>
3257 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3258 {
3259  switch(num_newl)
3260  {
3261  case 1u:
3262  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3263  wpos_at_first_newl = proc.wpos;
3264  proc.skip();
3265  proc.set(' ');
3266  break;
3267  case 2u:
3268  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3269  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
3270  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3271  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3272  proc.skip();
3273  proc.set_at(wpos_at_first_newl, '\n');
3274  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3275  break;
3276  default:
3277  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3278  proc.copy();
3279  break;
3280  }
3281  return wpos_at_first_newl;
3282 }
3283 
3284 template<class EventHandler>
3285 template<class FilterProcessor>
3286 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3287 {
3288  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
3289  size_t num_newl = 0;
3290  size_t wpos_at_first_newl = npos;
3291  while(proc.has_more_chars(len))
3292  {
3293  const char curr = proc.curr();
3294  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3295  switch(curr)
3296  {
3297  case '\n':
3298  {
3299  _c4dbgfbf("newline. sofar={}", num_newl);
3300  // NOTE: vs2022-32bit-release builds were giving wrong
3301  // results in this block, if it was written as either
3302  // as a switch(num_newl) or its equivalent if-form.
3303  //
3304  // For this reason, we're using a dedicated function
3305  // (**_compress), which seems to work around the issue.
3306  //
3307  // The manifested problem was that somewhere between the
3308  // assignment to curr and this point, proc.wpos (the
3309  // write-position of the processor) jumped to npos, which
3310  // made the write wrap-around! To make things worse,
3311  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3312  // problem go away!
3313  //
3314  // The only way to make the problem appear with prints
3315  // enabled was by disabling all prints in this function
3316  // (including in the block which was moved to the compress
3317  // function) and then selectively enabling only some of
3318  // those prints.
3319  //
3320  // This may be due to some bug in the cl-x86 optimizer; or
3321  // it may be triggered by some UB which may be
3322  // inadvertedly present in this function or in the filter
3323  // processor. This is despite our best efforts to weed out
3324  // any such UB problem: neither clang-tidy nor none of the
3325  // sanitizers, or gcc's -fanalyzer pointed to any problems
3326  // in this code.
3327  //
3328  // In the end, moving this block to a separate function
3329  // was the only way to bury the problem. But it may
3330  // resurface again, as The Undead, rising to from the
3331  // grave to haunt us with his terrible presence.
3332  //
3333  // We may have to revisit this. With a stake, and lots of
3334  // garlic.
3335  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3336  _filter_block_indentation(proc, indentation);
3337  break;
3338  }
3339  case ' ':
3340  case '\t':
3341  {
3342  size_t first = proc.rem().first_not_of(" \t");
3343  _c4dbgfbf("space. first={}", first);
3344  if(first == npos)
3345  first = proc.rem().len;
3346  _c4dbgfbf("... indentation increased to {}", first);
3347  if(num_newl)
3348  {
3349  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3350  proc.set_at(wpos_at_first_newl, '\n');
3351  }
3352  if(num_newl > 1u)
3353  {
3354  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3355  proc.set('\n');
3356  }
3357  _filter_block_folded_indented_block(proc, indentation, len, first);
3358  num_newl = 0;
3359  wpos_at_first_newl = npos;
3360  break;
3361  }
3362  case '\r':
3363  proc.skip();
3364  break;
3365  default:
3366  _c4dbgfbf("not space, not newline. stop.", 0);
3367  return;
3368  }
3369  }
3370 }
3371 
3372 
3373 template<class EventHandler>
3374 template<class FilterProcessor>
3375 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3376 {
3377  _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3378  if(curr_indentation)
3379  proc.copy(curr_indentation);
3380  while(proc.has_more_chars(len))
3381  {
3382  const char curr = proc.curr();
3383  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3384  switch(curr)
3385  {
3386  case '\n':
3387  {
3388  proc.copy();
3389  _filter_block_indentation(proc, indentation);
3390  csubstr rem = proc.rem();
3391  const size_t first = rem.first_not_of(' ');
3392  _c4dbgfbf("newline. firstns={}", first);
3393  if(first == 0)
3394  {
3395  const char c = rem[first];
3396  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3397  if(c == '\n' || c == '\r')
3398  {
3399  ;
3400  }
3401  else
3402  {
3403  _c4dbgfbf("done with indented block", first);
3404  goto endloop;
3405  }
3406  }
3407  else if(first != npos)
3408  {
3409  proc.copy(first);
3410  _c4dbgfbf("copy all {} spaces", first);
3411  }
3412  break;
3413  }
3414  break;
3415  case '\r':
3416  proc.skip();
3417  break;
3418  default:
3419  proc.copy();
3420  break;
3421  }
3422  }
3423  endloop:
3424  return;
3425 }
3426 
3427 
3428 template<class EventHandler>
3429 template<class FilterProcessor>
3430 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3431 {
3432  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3433 
3434  size_t contents_len = _handle_all_whitespace(proc, chomp);
3435  if(!contents_len)
3436  return proc.result();
3437 
3438  contents_len = _extend_to_chomp(proc, contents_len);
3439 
3440  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3441 
3442  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3443 
3444  // now filter the bulk
3445  while(proc.has_more_chars(/*maxpos*/contents_len))
3446  {
3447  const char curr = proc.curr();
3448  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3449  switch(curr)
3450  {
3451  case '\n':
3452  {
3453  _c4dbgfbf("found newline", curr);
3454  _filter_block_folded_newlines(proc, indentation, contents_len);
3455  break;
3456  }
3457  case '\r':
3458  proc.skip();
3459  break;
3460  default:
3461  proc.copy();
3462  break;
3463  }
3464  }
3465 
3466  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3467 
3468  _filter_chomp(proc, chomp, indentation);
3469 
3470  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3471 
3472  return proc.result();
3473 }
3474 
3475 #undef _c4dbgfbf
3476 
3477 template<class EventHandler>
3478 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3479 {
3480  FilterProcessorSrcDst proc(scalar, dst);
3481  return _filter_block_folded(proc, indentation, chomp);
3482 }
3483 
3484 template<class EventHandler>
3485 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3486 {
3487  FilterProcessorInplaceEndExtending proc(scalar, cap);
3488  return _filter_block_folded(proc, indentation, chomp);
3489 }
3490 
3491 
3492 //-----------------------------------------------------------------------------
3493 //-----------------------------------------------------------------------------
3494 //-----------------------------------------------------------------------------
3495 
3496 template<class EventHandler>
3497 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3498 {
3499  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3500  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3501  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3502  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3503  return r.get();
3504 }
3505 
3506 //-----------------------------------------------------------------------------
3507 
3508 template<class EventHandler>
3509 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3510 {
3511  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3512  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3513  _RYML_CB_ASSERT(this->callbacks(), r.valid());
3514  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3515  return r.get();
3516 }
3517 
3518 
3519 //-----------------------------------------------------------------------------
3520 
3521 template<class EventHandler>
3522 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3523 {
3524  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3525  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3526  if(C4_LIKELY(r.valid()))
3527  {
3528  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3529  return r.get();
3530  }
3531  else
3532  {
3533  const size_t len = r.required_len();
3534  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3535  substr dst = m_evt_handler->alloc_arena(len, &s);
3536  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3537  if(dst.str)
3538  {
3539  _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3540  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3541  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3542  _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3543  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3544  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3545  return rsd.get();
3546  }
3547  return dst;
3548  }
3549 }
3550 
3551 
3552 //-----------------------------------------------------------------------------
3553 
3554 template<class EventHandler>
3555 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3556 {
3557  if(s.is_sub(m_buf))
3558  {
3559  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.str > m_buf.str);
3560  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.str-1 >= m_buf.str);
3561  if(s.len)
3562  memmove(s.str - 1, s.str, s.len);
3563  --s.str;
3564  s.str[s.len] = '\n';
3565  ++s.len;
3566  return s;
3567  }
3568  else
3569  {
3570  substr dst = m_evt_handler->alloc_arena(s.len + 1);
3571  if(s.len)
3572  memcpy(dst.str, s.str, s.len);
3573  dst[s.len] = '\n';
3574  return dst;
3575  }
3576 }
3577 
3578 template<class EventHandler>
3579 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3580 {
3581  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3582  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3583  csubstr result;
3584  if(C4_LIKELY(r.valid()))
3585  {
3586  result = r.get();
3587  }
3588  else
3589  {
3590  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3591  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3592  // this can only happen when adding a single newline in clip mode.
3593  // so we shift left the scalar by one place
3594  result = _move_scalar_left_and_add_newline(s);
3595  }
3596  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", result.len, result);
3597  return result;
3598 }
3599 
3600 
3601 //-----------------------------------------------------------------------------
3602 template<class EventHandler>
3603 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3604 {
3605  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3606  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3607  csubstr result;
3608  if(C4_LIKELY(r.valid()))
3609  {
3610  result = r.get();
3611  }
3612  else
3613  {
3614  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3615  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3616  // this can only happen when adding a single newline in clip mode.
3617  // so we shift left the scalar by one place
3618  result = _move_scalar_left_and_add_newline(s);
3619  }
3620  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", result.len, result);
3621  return result;
3622 }
3623 
3624 
3625 //-----------------------------------------------------------------------------
3626 
3627 template<class EventHandler>
3628 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3629 {
3630  if(sc.needs_filter)
3631  {
3632  if(m_options.scalar_filtering())
3633  {
3634  return _filter_scalar_plain(sc.scalar, indentation);
3635  }
3636  else
3637  {
3638  _c4dbgp("plain scalar left unfiltered");
3639  m_evt_handler->mark_key_scalar_unfiltered();
3640  }
3641  }
3642  else
3643  {
3644  _c4dbgp("plain scalar doesn't need filtering");
3645  }
3646  return sc.scalar;
3647 }
3648 
3649 template<class EventHandler>
3650 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3651 {
3652  if(sc.needs_filter)
3653  {
3654  if(m_options.scalar_filtering())
3655  {
3656  return _filter_scalar_plain(sc.scalar, indentation);
3657  }
3658  else
3659  {
3660  _c4dbgp("plain scalar left unfiltered");
3661  m_evt_handler->mark_val_scalar_unfiltered();
3662  }
3663  }
3664  else
3665  {
3666  _c4dbgp("plain scalar doesn't need filtering");
3667  }
3668  return sc.scalar;
3669 }
3670 
3671 
3672 //-----------------------------------------------------------------------------
3673 
3674 template<class EventHandler>
3675 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3676 {
3677  if(sc.needs_filter)
3678  {
3679  if(m_options.scalar_filtering())
3680  {
3681  return _filter_scalar_squot(sc.scalar);
3682  }
3683  else
3684  {
3685  _c4dbgp("squo key scalar left unfiltered");
3686  m_evt_handler->mark_key_scalar_unfiltered();
3687  }
3688  }
3689  else
3690  {
3691  _c4dbgp("squo key scalar doesn't need filtering");
3692  }
3693  return sc.scalar;
3694 }
3695 
3696 template<class EventHandler>
3697 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3698 {
3699  if(sc.needs_filter)
3700  {
3701  if(m_options.scalar_filtering())
3702  {
3703  return _filter_scalar_squot(sc.scalar);
3704  }
3705  else
3706  {
3707  _c4dbgp("squo val scalar left unfiltered");
3708  m_evt_handler->mark_val_scalar_unfiltered();
3709  }
3710  }
3711  else
3712  {
3713  _c4dbgp("squo val scalar doesn't need filtering");
3714  }
3715  return sc.scalar;
3716 }
3717 
3718 
3719 //-----------------------------------------------------------------------------
3720 
3721 template<class EventHandler>
3722 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3723 {
3724  if(sc.needs_filter)
3725  {
3726  if(m_options.scalar_filtering())
3727  {
3728  return _filter_scalar_dquot(sc.scalar);
3729  }
3730  else
3731  {
3732  _c4dbgp("dquo scalar left unfiltered");
3733  m_evt_handler->mark_key_scalar_unfiltered();
3734  }
3735  }
3736  else
3737  {
3738  _c4dbgp("dquo scalar doesn't need filtering");
3739  }
3740  return sc.scalar;
3741 }
3742 
3743 template<class EventHandler>
3744 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3745 {
3746  if(sc.needs_filter)
3747  {
3748  if(m_options.scalar_filtering())
3749  {
3750  return _filter_scalar_dquot(sc.scalar);
3751  }
3752  else
3753  {
3754  _c4dbgp("dquo scalar left unfiltered");
3755  m_evt_handler->mark_val_scalar_unfiltered();
3756  }
3757  }
3758  else
3759  {
3760  _c4dbgp("dquo scalar doesn't need filtering");
3761  }
3762  return sc.scalar;
3763 }
3764 
3765 
3766 //-----------------------------------------------------------------------------
3767 
3768 template<class EventHandler>
3769 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3770 {
3771  if(m_options.scalar_filtering())
3772  {
3773  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3774  }
3775  else
3776  {
3777  _c4dbgp("literal scalar left unfiltered");
3778  m_evt_handler->mark_key_scalar_unfiltered();
3779  }
3780  return sb.scalar;
3781 }
3782 
3783 template<class EventHandler>
3784 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3785 {
3786  if(m_options.scalar_filtering())
3787  {
3788  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3789  }
3790  else
3791  {
3792  _c4dbgp("literal scalar left unfiltered");
3793  m_evt_handler->mark_val_scalar_unfiltered();
3794  }
3795  return sb.scalar;
3796 }
3797 
3798 
3799 //-----------------------------------------------------------------------------
3800 
3801 template<class EventHandler>
3802 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3803 {
3804  if(m_options.scalar_filtering())
3805  {
3806  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3807  }
3808  else
3809  {
3810  _c4dbgp("folded scalar left unfiltered");
3811  m_evt_handler->mark_key_scalar_unfiltered();
3812  }
3813  return sb.scalar;
3814 }
3815 
3816 template<class EventHandler>
3817 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3818 {
3819  if(m_options.scalar_filtering())
3820  {
3821  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3822  }
3823  else
3824  {
3825  _c4dbgp("folded scalar left unfiltered");
3826  m_evt_handler->mark_val_scalar_unfiltered();
3827  }
3828  return sb.scalar;
3829 }
3830 
3831 
3832 //-----------------------------------------------------------------------------
3833 //-----------------------------------------------------------------------------
3834 //-----------------------------------------------------------------------------
3835 
3836 #ifdef RYML_DBG // !!! <----------------------------------
3837 
3838 template<class EventHandler>
3839 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3840 {
3841  char buf1_[64], buf2_[64], buf3_[64];
3842  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3843  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3844  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3845  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3846  s->flags |= on;
3847 }
3848 
3849 template<class EventHandler>
3850 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3851 {
3852  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3853  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3854  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3855  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3856  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3857  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3858  s->flags |= on;
3859  s->flags &= ~off;
3860 }
3861 
3862 template<class EventHandler>
3863 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3864 {
3865  char buf1_[64], buf2_[64], buf3_[64];
3866  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3867  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3868  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3869  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3870  s->flags &= ~off;
3871 }
3872 
3873 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3874 {
3875  size_t pos = 0;
3876  bool gotone = false;
3877 
3878  #define _prflag(fl) \
3879  if((flags & fl) == (fl)) \
3880  { \
3881  if(gotone) \
3882  { \
3883  if(pos + 1 < buf.len) \
3884  buf[pos] = '|'; \
3885  ++pos; \
3886  } \
3887  csubstr fltxt = #fl; \
3888  if(pos + fltxt.len <= buf.len) \
3889  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3890  pos += fltxt.len; \
3891  gotone = true; \
3892  }
3893 
3894  _prflag(RTOP);
3895  _prflag(RUNK);
3896  _prflag(RMAP);
3897  _prflag(RSEQ);
3898  _prflag(FLOW);
3899  _prflag(BLCK);
3900  _prflag(QMRK);
3901  _prflag(RKEY);
3902  _prflag(RVAL);
3903  _prflag(RKCL);
3904  _prflag(RNXT);
3905  _prflag(SSCL);
3906  _prflag(QSCL);
3907  _prflag(RSET);
3908  _prflag(RDOC);
3909  _prflag(NDOC);
3910  _prflag(USTY);
3911  _prflag(RSEQIMAP);
3912 
3913  #undef _prflag
3914 
3915  if(pos == 0)
3916  if(buf.len > 0)
3917  buf[pos++] = '0';
3918 
3919  RYML_CHECK(pos <= buf.len);
3920 
3921  return buf.first(pos);
3922 }
3923 
3924 #endif // RYML_DBG !!! <----------------------------------
3925 
3926 
3927 //-----------------------------------------------------------------------------
3928 //-----------------------------------------------------------------------------
3929 //-----------------------------------------------------------------------------
3930 
3931 template<class EventHandler>
3932 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
3933 {
3934  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3935  return m_buf.sub(loc.offset);
3936 }
3937 
3938 template<class EventHandler>
3939 Location ParseEngine<EventHandler>::val_location(const char *val) const
3940 {
3941  if(C4_UNLIKELY(val == nullptr))
3942  return {m_file, 0, 0, 0};
3943  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3944  // NOTE: if any of these checks fails, the parser needs to be
3945  // instantiated with locations enabled.
3946  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3947  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3948  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3949  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3950  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
3951  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3952  // NOTE: the pointer needs to belong to the buffer that was used to parse.
3953  csubstr src = m_buf;
3954  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
3955  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
3956  // ok. search the first stored newline after the given ptr
3957  using lineptr_type = size_t const* C4_RESTRICT;
3958  lineptr_type lineptr = nullptr;
3959  size_t offset = (size_t)(val - src.begin());
3960  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
3961  {
3962  // just do a linear search if the size is small.
3963  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3964  {
3965  if(*curr > offset)
3966  {
3967  lineptr = curr;
3968  break;
3969  }
3970  }
3971  }
3972  else
3973  {
3974  // do a bisection search if the size is not small.
3975  //
3976  // We could use std::lower_bound but this is simple enough and
3977  // spares the costly include of <algorithm>.
3978  size_t count = m_newline_offsets_size;
3979  size_t step;
3980  lineptr_type it;
3981  lineptr = m_newline_offsets;
3982  while(count)
3983  {
3984  step = count >> 1;
3985  it = lineptr + step;
3986  if(*it < offset)
3987  {
3988  lineptr = ++it;
3989  count -= step + 1;
3990  }
3991  else
3992  {
3993  count = step;
3994  }
3995  }
3996  }
3997  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
3998  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
3999  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4000  Location loc;
4001  loc.name = m_file;
4002  loc.offset = offset;
4003  loc.line = (size_t)(lineptr - m_newline_offsets);
4004  if(lineptr > m_newline_offsets)
4005  loc.col = (offset - *(lineptr-1) - 1u);
4006  else
4007  loc.col = offset;
4008  return loc;
4009 }
4010 
4011 template<class EventHandler>
4012 void ParseEngine<EventHandler>::_prepare_locations()
4013 {
4014  m_newline_offsets_buf = m_buf;
4015  size_t numnewlines = 1u + m_buf.count('\n');
4016  _resize_locations(numnewlines);
4017  m_newline_offsets_size = 0;
4018  for(size_t i = 0; i < m_buf.len; i++)
4019  if(m_buf[i] == '\n')
4020  m_newline_offsets[m_newline_offsets_size++] = i;
4021  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4022  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4023 }
4024 
4025 template<class EventHandler>
4026 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4027 {
4028  if(numnewlines > m_newline_offsets_capacity)
4029  {
4030  if(m_newline_offsets)
4031  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4032  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4033  m_newline_offsets_capacity = numnewlines;
4034  }
4035 }
4036 
4037 template<class EventHandler>
4038 bool ParseEngine<EventHandler>::_locations_dirty() const
4039 {
4040  return !m_newline_offsets_size;
4041 }
4042 
4043 
4044 //-----------------------------------------------------------------------------
4045 //-----------------------------------------------------------------------------
4046 //-----------------------------------------------------------------------------
4047 
4048 template<class EventHandler>
4049 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4050 {
4051  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4052  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4053  {
4054  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4055  {
4056  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4057  _skipchars(" \t");
4058  }
4059  // comments
4060  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4061  {
4062  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4063  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4064  }
4065  }
4066 }
4067 
4068 
4069 //-----------------------------------------------------------------------------
4070 
4071 
4072 template<class EventHandler>
4073 void ParseEngine<EventHandler>::_handle_colon()
4074 {
4075  size_t curr = m_evt_handler->m_curr->pos.line;
4076  if(m_prev_colon != npos)
4077  {
4078  if(curr == m_prev_colon)
4079  _c4err("two colons on same line");
4080  }
4081  m_prev_colon = curr;
4082 }
4083 
4084 template<class EventHandler>
4085 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4086 {
4087  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4088  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) // NOLINT(bugprone-sizeof-expression)
4089  _c4err("too many annotations");
4090  dst->annotations[dst->num_entries].str = str;
4091  dst->annotations[dst->num_entries].indentation = indentation;
4092  dst->annotations[dst->num_entries].line = line;
4093  ++dst->num_entries;
4094 }
4095 
4096 template<class EventHandler>
4097 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4098 {
4099  dst->num_entries = 0;
4100 }
4101 
4102 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4103 template<class EventHandler>
4104 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4105 {
4106  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4107  {
4108  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4109  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4110  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4111  size_t to_skip = m_evt_handler->m_curr->indref;
4112  if(m_pending_anchors.num_entries)
4113  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4114  if(m_pending_tags.num_entries)
4115  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4116  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4117  _maybe_skipchars_up_to(' ', to_skip);
4118  return true;
4119  }
4120  return false;
4121 }
4122 #endif
4123 
4124 template<class EventHandler>
4125 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4126 {
4127  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4128 }
4129 
4130 template<class EventHandler>
4131 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4132 {
4133  if(!tag.begins_with("!<"))
4134  {
4135  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4136  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4137  }
4138  else
4139  {
4140  if(C4_UNLIKELY(!tag.ends_with('>')))
4141  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos);
4142  }
4143 }
4144 
4145 template<class EventHandler>
4146 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4147 {
4148  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4149  if(m_pending_tags.num_entries)
4150  {
4151  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4152  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4153  {
4154  _check_tag(m_pending_tags.annotations[0].str);
4155  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4156  _clear_annotations(&m_pending_tags);
4157  }
4158  else
4159  {
4160  _c4err("too many tags");
4161  }
4162  }
4163  if(m_pending_anchors.num_entries)
4164  {
4165  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4166  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4167  {
4168  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4169  _clear_annotations(&m_pending_anchors);
4170  }
4171  else
4172  {
4173  _c4err("too many anchors");
4174  }
4175  }
4176 }
4177 
4178 template<class EventHandler>
4179 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4180 {
4181  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4182  if(m_pending_tags.num_entries)
4183  {
4184  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4185  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4186  {
4187  _check_tag(m_pending_tags.annotations[0].str);
4188  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4189  _clear_annotations(&m_pending_tags);
4190  }
4191  else
4192  {
4193  _c4err("too many tags");
4194  }
4195  }
4196  if(m_pending_anchors.num_entries)
4197  {
4198  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4199  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4200  {
4201  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4202  _clear_annotations(&m_pending_anchors);
4203  }
4204  else
4205  {
4206  _c4err("too many anchors");
4207  }
4208  }
4209 }
4210 
4211 template<class EventHandler>
4212 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4213 {
4214  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4215  if(m_pending_tags.num_entries == 2)
4216  {
4217  _c4dbgp("2 tags, setting entry 0");
4218  _check_tag(m_pending_tags.annotations[0].str);
4219  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4220  }
4221  else if(m_pending_tags.num_entries == 1)
4222  {
4223  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4224  if(m_pending_tags.annotations[0].line < current_line)
4225  {
4226  _c4dbgp("...tag is for the map. setting it.");
4227  _check_tag(m_pending_tags.annotations[0].str);
4228  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4229  _clear_annotations(&m_pending_tags);
4230  }
4231  }
4232  //
4233  if(m_pending_anchors.num_entries == 2)
4234  {
4235  _c4dbgp("2 anchors, setting entry 0");
4236  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4237  }
4238  else if(m_pending_anchors.num_entries == 1)
4239  {
4240  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4241  if(m_pending_anchors.annotations[0].line < current_line)
4242  {
4243  _c4dbgp("...anchor is for the map. setting it.");
4244  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4245  _clear_annotations(&m_pending_anchors);
4246  }
4247  }
4248 }
4249 
4250 template<class EventHandler>
4251 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4252 {
4253  _c4dbgp("annotations_before_start_mapblck_as_key");
4254  if(m_pending_tags.num_entries == 2)
4255  {
4256  _check_tag(m_pending_tags.annotations[0].str);
4257  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4258  }
4259  if(m_pending_anchors.num_entries == 2)
4260  {
4261  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4262  }
4263 }
4264 
4265 template<class EventHandler>
4266 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4267 {
4268  _c4dbgp("annotations_after_start_mapblck");
4269  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4270  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4271  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4272  {
4273  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4274  switch(m_pending_tags.num_entries)
4275  {
4276  case 1u:
4277  _check_tag(m_pending_tags.annotations[0].str);
4278  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4279  _clear_annotations(&m_pending_tags);
4280  break;
4281  case 2u:
4282  _check_tag(m_pending_tags.annotations[1].str);
4283  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4284  _clear_annotations(&m_pending_tags);
4285  break;
4286  }
4287  switch(m_pending_anchors.num_entries)
4288  {
4289  case 1u:
4290  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4291  _clear_annotations(&m_pending_anchors);
4292  break;
4293  case 2u:
4294  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4295  _clear_annotations(&m_pending_anchors);
4296  break;
4297  }
4298  }
4299  _set_indentation(key_indentation);
4300 }
4301 
4302 template<class EventHandler>
4303 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4304 {
4305  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4306  // select the left-most annotation on the max line
4307  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4308  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4309  {
4310  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4311  if(ann.line > curr->line)
4312  curr = &ann;
4313  else if(ann.indentation < curr->indentation)
4314  curr = &ann;
4315  }
4316  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4317  {
4318  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4319  if(ann.line > curr->line)
4320  curr = &ann;
4321  else if(ann.indentation < curr->indentation)
4322  curr = &ann;
4323  }
4324  return curr->line < val_line ? val_indentation : curr->indentation;
4325 }
4326 
4327 template<class EventHandler>
4328 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4329 {
4330  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4331  const size_t pos = rem.find('#');
4332  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4333  if(pos == npos) // no comments
4334  {
4335  m_evt_handler->add_directive(rem);
4336  _line_progressed(rem.len);
4337  }
4338  else
4339  {
4340  csubstr to_comment = rem.first(pos);
4341  csubstr trimmed = to_comment.trimr(" \t");
4342  m_evt_handler->add_directive(trimmed);
4343  _line_progressed(pos);
4344  _skip_comment();
4345  }
4346 }
4347 
4348 template<class EventHandler>
4349 bool ParseEngine<EventHandler>::_handle_bom()
4350 {
4351  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4352  if(rem.len)
4353  {
4354  const csubstr rest = rem.sub(1);
4355  // https://yaml.org/spec/1.2.2/#52-character-encodings
4356  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4357  if(rem.begins_with({"\x00\x00\xfe\xff", 4}) || (rem.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4358  {
4359  _c4dbgp("byte order mark: UTF32BE");
4360  _handle_bom(UTF32BE);
4361  _line_progressed(4);
4362  return true;
4363  }
4364  else if(rem.begins_with("\xff\xfe\x00\x00") || (rest.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4365  {
4366  _c4dbgp("byte order mark: UTF32LE");
4367  _handle_bom(UTF32LE);
4368  _line_progressed(4);
4369  return true;
4370  }
4371  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4372  {
4373  _c4dbgp("byte order mark: UTF16BE");
4374  _handle_bom(UTF16BE);
4375  _line_progressed(2);
4376  return true;
4377  }
4378  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4379  {
4380  _c4dbgp("byte order mark: UTF16LE");
4381  _handle_bom(UTF16LE);
4382  _line_progressed(2);
4383  return true;
4384  }
4385  else if(rem.begins_with("\xef\xbb\xbf"))
4386  {
4387  _c4dbgp("byte order mark: UTF8");
4388  _handle_bom(UTF8);
4389  _line_progressed(3);
4390  return true;
4391  }
4392  #undef _rymlisascii
4393  }
4394  return false;
4395 }
4396 
4397 template<class EventHandler>
4398 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4399 {
4400  if(m_encoding == NOBOM)
4401  {
4402  const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
4403  if(enc == UTF8 || is_beginning_of_file)
4404  m_encoding = enc;
4405  else
4406  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4407  }
4408  else if(enc != m_encoding)
4409  {
4410  _c4err("byte order mark can only be set once");
4411  }
4412 }
4413 
4414 
4415 //-----------------------------------------------------------------------------
4416 
4417 template<class EventHandler>
4418 void ParseEngine<EventHandler>::_handle_seq_json()
4419 {
4420 seqjson_start:
4421  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4422 
4423  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4424  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4425  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4426  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4427  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4428 
4429  _handle_flow_skip_whitespace();
4430  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4431  if(!rem.len)
4432  goto seqjson_again;
4433 
4434  if(has_any(RVAL))
4435  {
4436  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4437  const char first = rem.str[0];
4438  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4439  switch(first)
4440  {
4441  case '"':
4442  {
4443  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4444  ScannedScalar sc = _scan_scalar_dquot();
4445  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4446  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4447  addrem_flags(RNXT, RVAL);
4448  break;
4449  }
4450  case '[':
4451  {
4452  _c4dbgp("seqjson[RVAL]: start child seqjson");
4453  addrem_flags(RNXT, RVAL);
4454  m_evt_handler->begin_seq_val_flow();
4455  addrem_flags(RVAL, RNXT);
4456  _line_progressed(1);
4457  break;
4458  }
4459  case '{':
4460  {
4461  _c4dbgp("seqjson[RVAL]: start child mapjson");
4462  addrem_flags(RNXT, RVAL);
4463  m_evt_handler->begin_map_val_flow();
4464  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4465  _line_progressed(1);
4466  goto seqjson_finish;
4467  }
4468  case ']': // this happens on a trailing comma like ", ]"
4469  {
4470  _c4dbgp("seqjson[RVAL]: end!");
4471  rem_flags(RSEQ);
4472  m_evt_handler->end_seq();
4473  _line_progressed(1);
4474  if(!has_all(RSEQ|FLOW))
4475  goto seqjson_finish;
4476  break;
4477  }
4478  default:
4479  {
4480  ScannedScalar sc;
4481  if(_scan_scalar_seq_json(&sc))
4482  {
4483  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4484  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4485  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4486  addrem_flags(RNXT, RVAL);
4487  }
4488  else
4489  {
4490  _c4err("parse error");
4491  }
4492  }
4493  }
4494  }
4495  else // RNXT
4496  {
4497  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4498  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4499  const char first = rem.str[0];
4500  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4501  switch(first)
4502  {
4503  case ',':
4504  {
4505  _c4dbgp("seqjson[RNXT]: expect next val");
4506  addrem_flags(RVAL, RNXT);
4507  m_evt_handler->add_sibling();
4508  _line_progressed(1);
4509  break;
4510  }
4511  case ']':
4512  {
4513  _c4dbgp("seqjson[RNXT]: end!");
4514  m_evt_handler->end_seq();
4515  _line_progressed(1);
4516  goto seqjson_finish;
4517  }
4518  default:
4519  _c4err("parse error");
4520  }
4521  }
4522 
4523  seqjson_again:
4524  _c4dbgt("seqjson: go again", 0);
4525  if(_finished_line())
4526  {
4527  if(C4_LIKELY(!_finished_file()))
4528  {
4529  _line_ended();
4530  _scan_line();
4531  _c4dbgnextline();
4532  }
4533  else
4534  {
4535  _c4err("missing terminating ]");
4536  }
4537  }
4538  goto seqjson_start;
4539 
4540  seqjson_finish:
4541  _c4dbgp("seqjson: finish");
4542 }
4543 
4544 
4545 //-----------------------------------------------------------------------------
4546 
4547 template<class EventHandler>
4548 void ParseEngine<EventHandler>::_handle_map_json()
4549 {
4550 mapjson_start:
4551  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4552 
4553  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4554  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4555  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4556  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4557  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4558 
4559  _handle_flow_skip_whitespace();
4560  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4561  if(!rem.len)
4562  goto mapjson_again;
4563 
4564  if(has_any(RKEY))
4565  {
4566  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4567  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4568  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4569  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4570  const char first = rem.str[0];
4571  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4572  switch(first)
4573  {
4574  case '"':
4575  {
4576  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4577  ScannedScalar sc = _scan_scalar_dquot();
4578  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4579  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4580  addrem_flags(RKCL, RKEY);
4581  break;
4582  }
4583  case '}': // this happens on a trailing comma like ", }"
4584  {
4585  _c4dbgp("mapjson[RKEY]: end!");
4586  m_evt_handler->end_map();
4587  _line_progressed(1);
4588  goto mapjson_finish;
4589  }
4590  default:
4591  _c4err("parse error");
4592  }
4593  }
4594  else if(has_any(RVAL))
4595  {
4596  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4597  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4598  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4599  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4600  const char first = rem.str[0];
4601  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4602  switch(first)
4603  {
4604  case '"':
4605  {
4606  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4607  ScannedScalar sc = _scan_scalar_dquot();
4608  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4609  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4610  addrem_flags(RNXT, RVAL);
4611  break;
4612  }
4613  case '[':
4614  {
4615  _c4dbgp("mapjson[RVAL]: start val seqjson");
4616  addrem_flags(RNXT, RVAL);
4617  m_evt_handler->begin_seq_val_flow();
4618  _set_indentation(m_evt_handler->m_parent->indref);
4619  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4620  _line_progressed(1);
4621  goto mapjson_finish;
4622  }
4623  case '{':
4624  {
4625  _c4dbgp("mapjson[RVAL]: start val mapjson");
4626  addrem_flags(RNXT, RVAL);
4627  m_evt_handler->begin_map_val_flow();
4628  _set_indentation(m_evt_handler->m_parent->indref);
4629  addrem_flags(RKEY, RNXT);
4630  _line_progressed(1);
4631  // keep going in this function
4632  break;
4633  }
4634  default:
4635  {
4636  ScannedScalar sc;
4637  if(_scan_scalar_map_json(&sc))
4638  {
4639  _c4dbgp("mapjson[RVAL]: plain scalar.");
4640  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4641  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4642  addrem_flags(RNXT, RVAL);
4643  }
4644  else
4645  {
4646  _c4err("parse error");
4647  }
4648  break;
4649  }
4650  }
4651  }
4652  else if(has_any(RKCL)) // read the key colon
4653  {
4654  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4655  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4656  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4657  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4658  const char first = rem.str[0];
4659  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4660  if(first == ':')
4661  {
4662  _c4dbgp("mapjson[RKCL]: found the colon");
4663  addrem_flags(RVAL, RKCL);
4664  _line_progressed(1);
4665  }
4666  else
4667  {
4668  _c4err("parse error");
4669  }
4670  }
4671  else if(has_any(RNXT))
4672  {
4673  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4674  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4675  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4676  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4677  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4678  if(rem.begins_with(','))
4679  {
4680  _c4dbgp("mapjson[RNXT]: expect next keyval");
4681  m_evt_handler->add_sibling();
4682  addrem_flags(RKEY, RNXT);
4683  _line_progressed(1);
4684  }
4685  else if(rem.begins_with('}'))
4686  {
4687  _c4dbgp("mapjson[RNXT]: end!");
4688  m_evt_handler->end_map();
4689  _line_progressed(1);
4690  goto mapjson_finish;
4691  }
4692  else
4693  {
4694  _c4err("parse error");
4695  }
4696  }
4697 
4698  mapjson_again:
4699  _c4dbgt("mapjson: go again", 0);
4700  if(_finished_line())
4701  {
4702  if(C4_LIKELY(!_finished_file()))
4703  {
4704  _line_ended();
4705  _scan_line();
4706  _c4dbgnextline();
4707  }
4708  else
4709  {
4710  _c4err("missing terminating }");
4711  }
4712  }
4713  goto mapjson_start;
4714 
4715  mapjson_finish:
4716  _c4dbgp("mapjson: finish");
4717 }
4718 
4719 
4720 //-----------------------------------------------------------------------------
4721 
4722 template<class EventHandler>
4723 void ParseEngine<EventHandler>::_handle_seq_imap()
4724 {
4725 seqimap_start:
4726  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4727 
4728  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4729  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4730  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4731  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4732  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4733 
4734  _handle_flow_skip_whitespace();
4735  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4736  if(!rem.len)
4737  goto seqimap_again;
4738 
4739  if(has_any(RVAL))
4740  {
4741  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4742  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4743  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4744  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4745  const char first = rem.str[0];
4746  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4747  ScannedScalar sc;
4748  if(first == '\'')
4749  {
4750  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4751  sc = _scan_scalar_squot();
4752  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4753  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4754  m_evt_handler->end_map();
4755  goto seqimap_finish;
4756  }
4757  else if(first == '"')
4758  {
4759  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4760  sc = _scan_scalar_dquot();
4761  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4762  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4763  m_evt_handler->end_map();
4764  goto seqimap_finish;
4765  }
4766  // block scalars (ie | and >) cannot appear in flow containers
4767  else if(_scan_scalar_plain_map_flow(&sc))
4768  {
4769  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4770  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4771  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4772  m_evt_handler->end_map();
4773  goto seqimap_finish;
4774  }
4775  else if(first == '[')
4776  {
4777  _c4dbgp("seqimap[RVAL]: start child seqflow");
4778  addrem_flags(RNXT, RVAL);
4779  m_evt_handler->begin_seq_val_flow();
4780  addrem_flags(RVAL, RNXT|RSEQIMAP);
4781  _set_indentation(m_evt_handler->m_parent->indref);
4782  _line_progressed(1);
4783  goto seqimap_finish;
4784  }
4785  else if(first == '{')
4786  {
4787  _c4dbgp("seqimap[RVAL]: start child mapflow");
4788  addrem_flags(RNXT, RVAL);
4789  m_evt_handler->begin_map_val_flow();
4790  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4791  _set_indentation(m_evt_handler->m_parent->indref);
4792  _line_progressed(1);
4793  goto seqimap_finish;
4794  }
4795  else if(first == ',' || first == ']')
4796  {
4797  _c4dbgp("seqimap[RVAL]: finish without val.");
4798  m_evt_handler->set_val_scalar_plain_empty();
4799  m_evt_handler->end_map();
4800  goto seqimap_finish;
4801  }
4802  else if(first == '&')
4803  {
4804  csubstr anchor = _scan_anchor();
4805  _c4dbgp("seqimap[RVAL]: anchor!");
4806  m_evt_handler->set_val_anchor(anchor);
4807  }
4808  else if(first == '*')
4809  {
4810  csubstr ref = _scan_ref_seq();
4811  _c4dbgp("seqimap[RVAL]: ref!");
4812  m_evt_handler->set_val_ref(ref);
4813  addrem_flags(RNXT, RVAL);
4814  }
4815  else
4816  {
4817  _c4err("parse error");
4818  }
4819  }
4820  else if(has_any(RNXT))
4821  {
4822  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4823  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4824  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4825  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4826  const char first = rem.str[0];
4827  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4828  if(first == ',' || first == ']')
4829  {
4830  // we may get here because a map or a seq started and we
4831  // return later
4832  _c4dbgp("seqimap: done");
4833  m_evt_handler->end_map();
4834  goto seqimap_finish;
4835  }
4836  else
4837  {
4838  _c4err("parse error");
4839  }
4840  }
4841  else if(has_any(QMRK))
4842  {
4843  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4844  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4845  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4846  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4847  const char first = rem.str[0];
4848  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4849  ScannedScalar sc;
4850  if(first == '\'')
4851  {
4852  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4853  sc = _scan_scalar_squot();
4854  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4855  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4856  addrem_flags(RKCL, QMRK);
4857  goto seqimap_again;
4858  }
4859  else if(first == '"')
4860  {
4861  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4862  sc = _scan_scalar_dquot();
4863  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4864  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4865  addrem_flags(RKCL, QMRK);
4866  goto seqimap_again;
4867  }
4868  // block scalars (ie | and >) cannot appear in flow containers
4869  else if(_scan_scalar_plain_map_flow(&sc))
4870  {
4871  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4872  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4873  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4874  addrem_flags(RKCL, QMRK);
4875  goto seqimap_again;
4876  }
4877  else if(first == '[')
4878  {
4879  _c4dbgp("seqimap[QMRK]: start child seqflow");
4880  addrem_flags(RKCL, QMRK);
4881  m_evt_handler->begin_seq_key_flow();
4882  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4883  _set_indentation(m_evt_handler->m_parent->indref);
4884  _line_progressed(1);
4885  goto seqimap_finish;
4886  }
4887  else if(first == '{')
4888  {
4889  _c4dbgp("seqimap[QMRK]: start child mapflow");
4890  addrem_flags(RKCL, QMRK);
4891  m_evt_handler->begin_map_key_flow();
4892  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4893  _set_indentation(m_evt_handler->m_parent->indref);
4894  _line_progressed(1);
4895  goto seqimap_finish;
4896  }
4897  else if(first == ',' || first == ']')
4898  {
4899  _c4dbgp("seqimap[QMRK]: finish without key.");
4900  m_evt_handler->set_key_scalar_plain_empty();
4901  m_evt_handler->set_val_scalar_plain_empty();
4902  m_evt_handler->end_map();
4903  goto seqimap_finish;
4904  }
4905  else if(first == '&')
4906  {
4907  csubstr anchor = _scan_anchor();
4908  _c4dbgp("seqimap[QMRK]: anchor!");
4909  m_evt_handler->set_key_anchor(anchor);
4910  }
4911  else if(first == '*')
4912  {
4913  csubstr ref = _scan_ref_seq();
4914  _c4dbgp("seqimap[QMRK]: ref!");
4915  m_evt_handler->set_key_ref(ref);
4916  addrem_flags(RKCL, QMRK);
4917  }
4918  else
4919  {
4920  _c4err("parse error");
4921  }
4922  }
4923  else if(has_any(RKCL))
4924  {
4925  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4926  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4927  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4928  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4929  const char first = rem.str[0];
4930  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4931  if(first == ':')
4932  {
4933  _c4dbgp("seqimap[RKCL]: found ':'");
4934  addrem_flags(RVAL, RKCL);
4935  _line_progressed(1);
4936  goto seqimap_again;
4937  }
4938  else if(first == ',' || first == ']')
4939  {
4940  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
4941  m_evt_handler->set_val_scalar_plain_empty();
4942  m_evt_handler->end_map();
4943  goto seqimap_finish;
4944  }
4945  else
4946  {
4947  _c4err("parse error");
4948  }
4949  }
4950 
4951  seqimap_again:
4952  _c4dbgt("seqimap: go again", 0);
4953  if(_finished_line())
4954  {
4955  if(C4_LIKELY(!_finished_file()))
4956  {
4957  _line_ended();
4958  _scan_line();
4959  _c4dbgnextline();
4960  }
4961  else
4962  {
4963  _c4err("parse error");
4964  }
4965  }
4966  goto seqimap_start;
4967 
4968  seqimap_finish:
4969  _c4dbgp("seqimap: finish");
4970 }
4971 
4972 
4973 //-----------------------------------------------------------------------------
4974 
4975 template<class EventHandler>
4976 void ParseEngine<EventHandler>::_handle_seq_flow()
4977 {
4978 seqflow_start:
4979  _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4980 
4981  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4982  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4983  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4984  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4985  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4986  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
4987 
4988  _handle_flow_skip_whitespace();
4989  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4990  if(!m_evt_handler->m_curr->line_contents.rem.len)
4991  goto seqflow_again;
4992 
4993  if(has_any(RVAL))
4994  {
4995  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4996  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
4997  ScannedScalar sc;
4998  if(first == '\'')
4999  {
5000  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5001  sc = _scan_scalar_squot();
5002  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5003  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5004  addrem_flags(RNXT, RVAL);
5005  }
5006  else if(first == '"')
5007  {
5008  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5009  sc = _scan_scalar_dquot();
5010  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5011  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5012  addrem_flags(RNXT, RVAL);
5013  }
5014  // block scalars (ie | and >) cannot appear in flow containers
5015  else if(_scan_scalar_plain_seq_flow(&sc))
5016  {
5017  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5018  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5019  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5020  addrem_flags(RNXT, RVAL);
5021  }
5022  else if(first == '[')
5023  {
5024  _c4dbgp("seqflow[RVAL]: start child seqflow");
5025  addrem_flags(RNXT, RVAL);
5026  m_evt_handler->begin_seq_val_flow();
5027  _set_indentation(m_evt_handler->m_parent->indref);
5028  addrem_flags(RVAL, RNXT);
5029  _line_progressed(1);
5030  }
5031  else if(first == '{')
5032  {
5033  _c4dbgp("seqflow[RVAL]: start child mapflow");
5034  addrem_flags(RNXT, RVAL);
5035  m_evt_handler->begin_map_val_flow();
5036  _set_indentation(m_evt_handler->m_parent->indref);
5037  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5038  _line_progressed(1);
5039  goto seqflow_finish;
5040  }
5041  else if(first == ']') // this happens on a trailing comma like ", ]"
5042  {
5043  _c4dbgp("seqflow[RVAL]: end!");
5044  _line_progressed(1);
5045  m_evt_handler->end_seq();
5046  goto seqflow_finish;
5047  }
5048  else if(first == '*')
5049  {
5050  csubstr ref = _scan_ref_seq();
5051  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5052  m_evt_handler->set_val_ref(ref);
5053  addrem_flags(RNXT, RVAL);
5054  }
5055  else if(first == '&')
5056  {
5057  csubstr anchor = _scan_anchor();
5058  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5059  m_evt_handler->set_val_anchor(anchor);
5060  if(_maybe_scan_following_comma())
5061  {
5062  _c4dbgp("seqflow[RVAL]: empty scalar!");
5063  m_evt_handler->set_val_scalar_plain_empty();
5064  m_evt_handler->add_sibling();
5065  }
5066  }
5067  else if(first == '!')
5068  {
5069  csubstr tag = _scan_tag();
5070  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5071  _check_tag(tag);
5072  m_evt_handler->set_val_tag(tag);
5073  if(_maybe_scan_following_comma())
5074  {
5075  _c4dbgp("seqflow[RVAL]: empty scalar!");
5076  m_evt_handler->set_val_scalar_plain_empty();
5077  m_evt_handler->add_sibling();
5078  }
5079  }
5080  else if(first == ':')
5081  {
5082  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5083  addrem_flags(RNXT, RVAL);
5084  m_evt_handler->begin_map_val_flow();
5085  _set_indentation(m_evt_handler->m_parent->indref);
5086  m_evt_handler->set_key_scalar_plain_empty();
5087  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5088  _line_progressed(1);
5089  goto seqflow_finish;
5090  }
5091  else if(first == '?')
5092  {
5093  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5094  addrem_flags(RNXT, RVAL);
5095  m_was_inside_qmrk = true;
5096  m_evt_handler->begin_map_val_flow();
5097  _set_indentation(m_evt_handler->m_parent->indref);
5098  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5099  _line_progressed(1);
5100  _maybe_skip_whitespace_tokens();
5101  goto seqflow_finish;
5102  }
5103  else
5104  {
5105  _c4err("parse error");
5106  }
5107  }
5108  else // RNXT
5109  {
5110  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5111  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5112  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5113  if(first == ',')
5114  {
5115  _c4dbgp("seqflow[RNXT]: expect next val");
5116  addrem_flags(RVAL, RNXT);
5117  m_evt_handler->add_sibling();
5118  _line_progressed(1);
5119  }
5120  else if(first == ']')
5121  {
5122  _c4dbgp("seqflow[RNXT]: end!");
5123  m_evt_handler->end_seq();
5124  _line_progressed(1);
5125  goto seqflow_finish;
5126  }
5127  else if(first == ':')
5128  {
5129  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5130  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5131  _set_indentation(m_evt_handler->m_parent->indref);
5132  _line_progressed(1);
5133  addrem_flags(RSEQIMAP|RVAL, RNXT);
5134  goto seqflow_finish;
5135  }
5136  else
5137  {
5138  _c4err("parse error");
5139  }
5140  }
5141 
5142  seqflow_again:
5143  _c4dbgt("seqflow: go again", 0);
5144  if(_finished_line())
5145  {
5146  if(C4_LIKELY(!_finished_file()))
5147  {
5148  _line_ended();
5149  _scan_line();
5150  _c4dbgnextline();
5151  }
5152  else
5153  {
5154  _c4err("missing terminating ]");
5155  }
5156  }
5157  goto seqflow_start;
5158 
5159  seqflow_finish:
5160  _c4dbgp("seqflow: finish");
5161 }
5162 
5163 
5164 //-----------------------------------------------------------------------------
5165 
5166 template<class EventHandler>
5167 void ParseEngine<EventHandler>::_handle_map_flow()
5168 {
5169 mapflow_start:
5170  _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5171 
5172  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5173  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5174  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5175  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5176 
5177  _handle_flow_skip_whitespace();
5178  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5179  if(!rem.len)
5180  goto mapflow_again;
5181 
5182  if(has_any(RKEY))
5183  {
5184  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5185  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5186  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5187  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5188  const char first = rem.str[0];
5189  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5190  ScannedScalar sc;
5191  if(first == '\'')
5192  {
5193  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5194  sc = _scan_scalar_squot();
5195  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5196  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5197  addrem_flags(RKCL, RKEY|QMRK);
5198  }
5199  else if(first == '"')
5200  {
5201  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5202  sc = _scan_scalar_dquot();
5203  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5204  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5205  addrem_flags(RKCL, RKEY|QMRK);
5206  }
5207  // block scalars (ie | and >) cannot appear in flow containers
5208  else if(_scan_scalar_plain_map_flow(&sc))
5209  {
5210  _c4dbgp("mapflow[RKEY]: plain scalar");
5211  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5212  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5213  addrem_flags(RKCL, RKEY|QMRK);
5214  }
5215  else if(first == '?')
5216  {
5217  _c4dbgp("mapflow[RKEY]: explicit key");
5218  _line_progressed(1);
5219  addrem_flags(QMRK, RKEY);
5220  _maybe_skip_whitespace_tokens();
5221  }
5222  else if(first == ':')
5223  {
5224  _c4dbgp("mapflow[RKEY]: setting empty key");
5225  m_evt_handler->set_key_scalar_plain_empty();
5226  addrem_flags(RVAL, RKEY|QMRK);
5227  _line_progressed(1);
5228  _maybe_skip_whitespace_tokens();
5229  }
5230  else if(first == ',')
5231  {
5232  _c4dbgp("mapflow[RKEY]: empty key+val!");
5233  m_evt_handler->set_key_scalar_plain_empty();
5234  m_evt_handler->set_val_scalar_plain_empty();
5235  addrem_flags(RNXT, RKEY|QMRK);
5236  // keep going in this function
5237  }
5238  else if(first == '}') // this happens on a trailing comma like ", }"
5239  {
5240  _c4dbgp("mapflow[RKEY]: end!");
5241  m_evt_handler->end_map();
5242  _line_progressed(1);
5243  goto mapflow_finish;
5244  }
5245  else if(first == '&')
5246  {
5247  csubstr anchor = _scan_anchor();
5248  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5249  m_evt_handler->set_key_anchor(anchor);
5250  }
5251  else if(first == '*')
5252  {
5253  csubstr ref = _scan_ref_map();
5254  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5255  m_evt_handler->set_key_ref(ref);
5256  addrem_flags(RKCL, RKEY);
5257  }
5258  else if(first == '[')
5259  {
5260  // RYML's tree cannot store container keys, but that's
5261  // handled inside the tree sink. Other sink types may be
5262  // able to handle it.
5263  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5264  addrem_flags(RKCL, RKEY);
5265  m_evt_handler->begin_seq_key_flow();
5266  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5267  _set_indentation(m_evt_handler->m_parent->indref);
5268  _line_progressed(1);
5269  goto mapflow_finish;
5270  }
5271  else if(first == '{')
5272  {
5273  // RYML's tree cannot store container keys, but that's
5274  // handled inside the tree sink. Other sink types may be
5275  // able to handle it.
5276  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5277  addrem_flags(RKCL, RKEY);
5278  m_evt_handler->begin_map_key_flow();
5279  addrem_flags(RKEY, RVAL|RKCL);
5280  _set_indentation(m_evt_handler->m_parent->indref);
5281  _line_progressed(1);
5282  // keep going in this function
5283  }
5284  else if(first == '!')
5285  {
5286  csubstr tag = _scan_tag();
5287  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5288  _check_tag(tag);
5289  m_evt_handler->set_key_tag(tag);
5290  }
5291  else
5292  {
5293  _c4err("parse error");
5294  }
5295  }
5296  else if(has_any(RKCL)) // read the key colon
5297  {
5298  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5299  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5300  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5301  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5302  const char first = rem.str[0];
5303  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5304  if(first == ':')
5305  {
5306  _c4dbgp("mapflow[RKCL]: found the colon");
5307  addrem_flags(RVAL, RKCL);
5308  _line_progressed(1);
5309  }
5310  else if(first == '}')
5311  {
5312  _c4dbgp("mapflow[RKCL]: end with missing val!");
5313  addrem_flags(RVAL, RKCL);
5314  m_evt_handler->set_val_scalar_plain_empty();
5315  m_evt_handler->end_map();
5316  _line_progressed(1);
5317  goto mapflow_finish;
5318  }
5319  else if(first == ',')
5320  {
5321  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5322  m_evt_handler->set_val_scalar_plain_empty();
5323  m_evt_handler->add_sibling();
5324  addrem_flags(RKEY, RKCL);
5325  _line_progressed(1);
5326  }
5327  else
5328  {
5329  _c4err("parse error");
5330  }
5331  }
5332  else if(has_any(RVAL))
5333  {
5334  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5335  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5336  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5337  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5338  const char first = rem.str[0];
5339  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5340  ScannedScalar sc;
5341  if(first == '\'')
5342  {
5343  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5344  sc = _scan_scalar_squot();
5345  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5346  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5347  addrem_flags(RNXT, RVAL);
5348  }
5349  else if(first == '"')
5350  {
5351  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5352  sc = _scan_scalar_dquot();
5353  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5354  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5355  addrem_flags(RNXT, RVAL);
5356  }
5357  // block scalars (ie | and >) cannot appear in flow containers
5358  else if(_scan_scalar_plain_map_flow(&sc))
5359  {
5360  _c4dbgp("mapflow[RVAL]: plain scalar.");
5361  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5362  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5363  addrem_flags(RNXT, RVAL);
5364  }
5365  else if(first == '[')
5366  {
5367  _c4dbgp("mapflow[RVAL]: start val seqflow");
5368  addrem_flags(RNXT, RVAL);
5369  m_evt_handler->begin_seq_val_flow();
5370  _set_indentation(m_evt_handler->m_parent->indref);
5371  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5372  _line_progressed(1);
5373  goto mapflow_finish;
5374  }
5375  else if(first == '{')
5376  {
5377  _c4dbgp("mapflow[RVAL]: start val mapflow");
5378  addrem_flags(RNXT, RVAL);
5379  m_evt_handler->begin_map_val_flow();
5380  _set_indentation(m_evt_handler->m_parent->indref);
5381  addrem_flags(RKEY, RNXT);
5382  _line_progressed(1);
5383  // keep going in this function
5384  }
5385  else if(first == '}')
5386  {
5387  _c4dbgp("mapflow[RVAL]: end!");
5388  m_evt_handler->set_val_scalar_plain_empty();
5389  m_evt_handler->end_map();
5390  _line_progressed(1);
5391  goto mapflow_finish;
5392  }
5393  else if(first == ',')
5394  {
5395  _c4dbgp("mapflow[RVAL]: empty val!");
5396  m_evt_handler->set_val_scalar_plain_empty();
5397  addrem_flags(RNXT, RVAL);
5398  // keep going in this function
5399  }
5400  else if(first == '*')
5401  {
5402  csubstr ref = _scan_ref_map();
5403  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5404  m_evt_handler->set_val_ref(ref);
5405  addrem_flags(RNXT, RVAL);
5406  }
5407  else if(first == '&')
5408  {
5409  csubstr anchor = _scan_anchor();
5410  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5411  m_evt_handler->set_val_anchor(anchor);
5412  }
5413  else if(first == '!')
5414  {
5415  csubstr tag = _scan_tag();
5416  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5417  _check_tag(tag);
5418  m_evt_handler->set_val_tag(tag);
5419  }
5420  else
5421  {
5422  _c4err("parse error");
5423  }
5424  }
5425  else if(has_any(RNXT))
5426  {
5427  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5428  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5429  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5430  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5431  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5432  if(rem.begins_with(','))
5433  {
5434  _c4dbgp("mapflow[RNXT]: expect next keyval");
5435  m_evt_handler->add_sibling();
5436  addrem_flags(RKEY, RNXT);
5437  _line_progressed(1);
5438  }
5439  else if(rem.begins_with('}'))
5440  {
5441  _c4dbgp("mapflow[RNXT]: end!");
5442  m_evt_handler->end_map();
5443  _line_progressed(1);
5444  goto mapflow_finish;
5445  }
5446  else
5447  {
5448  _c4err("parse error");
5449  }
5450  }
5451  else if(has_any(QMRK))
5452  {
5453  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5454  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5455  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5456  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5457  const char first = rem.str[0];
5458  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5459  ScannedScalar sc;
5460  if(first == '\'')
5461  {
5462  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5463  sc = _scan_scalar_squot();
5464  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5465  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5466  addrem_flags(RKCL, QMRK);
5467  }
5468  else if(first == '"')
5469  {
5470  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5471  sc = _scan_scalar_dquot();
5472  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5473  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5474  addrem_flags(RKCL, QMRK);
5475  }
5476  // block scalars (ie | and >) cannot appear in flow containers
5477  else if(_scan_scalar_plain_map_flow(&sc))
5478  {
5479  _c4dbgp("mapflow[QMRK]: plain scalar");
5480  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5481  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5482  addrem_flags(RKCL, QMRK);
5483  }
5484  else if(first == ':')
5485  {
5486  _c4dbgp("mapflow[QMRK]: setting empty key");
5487  m_evt_handler->set_key_scalar_plain_empty();
5488  addrem_flags(RVAL, QMRK);
5489  _line_progressed(1);
5490  _maybe_skip_whitespace_tokens();
5491  }
5492  else if(first == '}') // this happens on a trailing comma like ", }"
5493  {
5494  _c4dbgp("mapflow[QMRK]: end!");
5495  m_evt_handler->set_key_scalar_plain_empty();
5496  m_evt_handler->set_val_scalar_plain_empty();
5497  m_evt_handler->end_map();
5498  _line_progressed(1);
5499  goto mapflow_finish;
5500  }
5501  else if(first == ',')
5502  {
5503  _c4dbgp("mapflow[QMRK]: empty key+val!");
5504  m_evt_handler->set_key_scalar_plain_empty();
5505  m_evt_handler->set_val_scalar_plain_empty();
5506  addrem_flags(RNXT, QMRK);
5507  }
5508  else if(first == '&')
5509  {
5510  csubstr anchor = _scan_anchor();
5511  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5512  m_evt_handler->set_key_anchor(anchor);
5513  }
5514  else if(first == '*')
5515  {
5516  csubstr ref = _scan_ref_map();
5517  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5518  m_evt_handler->set_key_ref(ref);
5519  addrem_flags(RKCL, QMRK);
5520  }
5521  else if(first == '[')
5522  {
5523  // RYML's tree cannot store container keys, but that's
5524  // handled inside the tree sink. Other sink types may be
5525  // able to handle it.
5526  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5527  addrem_flags(RKCL, QMRK);
5528  m_evt_handler->begin_seq_key_flow();
5529  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5530  _set_indentation(m_evt_handler->m_parent->indref);
5531  _line_progressed(1);
5532  goto mapflow_finish;
5533  }
5534  else if(first == '{')
5535  {
5536  // RYML's tree cannot store container keys, but that's
5537  // handled inside the tree sink. Other sink types may be
5538  // able to handle it.
5539  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5540  addrem_flags(RKCL, QMRK);
5541  m_evt_handler->begin_map_key_flow();
5542  _set_indentation(m_evt_handler->m_parent->indref);
5543  addrem_flags(RKEY, RKCL);
5544  _line_progressed(1);
5545  // keep going in this function
5546  }
5547  else if(first == '!')
5548  {
5549  csubstr tag = _scan_tag();
5550  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5551  _check_tag(tag);
5552  m_evt_handler->set_key_tag(tag);
5553  }
5554  else
5555  {
5556  _c4err("parse error");
5557  }
5558  }
5559 
5560  mapflow_again:
5561  _c4dbgt("mapflow: go again", 0);
5562  if(_finished_line())
5563  {
5564  if(C4_LIKELY(!_finished_file()))
5565  {
5566  _line_ended();
5567  _scan_line();
5568  _c4dbgnextline();
5569  }
5570  else
5571  {
5572  _c4err("missing terminating }");
5573  }
5574  }
5575  goto mapflow_start;
5576 
5577  mapflow_finish:
5578  _c4dbgp("mapflow: finish");
5579 }
5580 
5581 
5582 //-----------------------------------------------------------------------------
5583 
5584 template<class EventHandler>
5585 void ParseEngine<EventHandler>::_handle_seq_block()
5586 {
5587 seqblck_start:
5588  _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5589 
5590  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5591  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5592  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5593  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5594 
5595  _maybe_skip_comment();
5596  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5597  if(!rem.len)
5598  goto seqblck_again;
5599 
5600  if(has_any(RVAL))
5601  {
5602  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5603  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5604  if(m_evt_handler->m_curr->at_line_beginning())
5605  {
5606  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5607  if(m_evt_handler->m_curr->indentation_ge())
5608  {
5609  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5610  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5611  rem = m_evt_handler->m_curr->line_contents.rem;
5612  if(!rem.len)
5613  goto seqblck_again;
5614  }
5615  else if(m_evt_handler->m_curr->indentation_lt())
5616  {
5617  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5618  _handle_indentation_pop_from_block_seq();
5619  goto seqblck_finish;
5620  }
5621  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5622  {
5623  _c4dbgp("seqblck[RVAL]: empty line!");
5624  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5625  goto seqblck_again;
5626  }
5627  }
5628  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5629  else
5630  {
5631  // accomodate annotation on the previous line. eg:
5632  // - &elm
5633  // foo # <-- on this line
5634  // - &elm
5635  // &foo foo: bar # <-- on this line
5636  if(rem.str[0] == ' ')
5637  {
5638  if(_handle_indentation_from_annotations())
5639  {
5640  _c4dbgp("seqblck[RVAL]: annotations!");
5641  rem = m_evt_handler->m_curr->line_contents.rem;
5642  if(!rem.len)
5643  goto seqblck_again;
5644  }
5645  }
5646  }
5647  #endif
5648  _RYML_CB_ASSERT(callbacks(), rem.len);
5649  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5650  const char first = rem.str[0];
5651  const size_t startline = m_evt_handler->m_curr->pos.line;
5652  // warning: the gcc optimizer on x86 builds is brittle with
5653  // this function:
5654  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5655  ScannedScalar sc;
5656  if(first == '\'')
5657  {
5658  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5659  sc = _scan_scalar_squot();
5660  if(!_maybe_scan_following_colon())
5661  {
5662  _c4dbgp("seqblck[RVAL]: set as val");
5663  _handle_annotations_before_blck_val_scalar();
5664  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5665  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5666  addrem_flags(RNXT, RVAL);
5667  }
5668  else
5669  {
5670  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5671  addrem_flags(RNXT, RVAL);
5672  _handle_annotations_before_start_mapblck(startline);
5673  _handle_colon();
5674  m_evt_handler->begin_map_val_block();
5675  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5676  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5677  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5678  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5679  _maybe_skip_whitespace_tokens();
5680  goto seqblck_finish;
5681  }
5682  }
5683  else if(first == '"')
5684  {
5685  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5686  sc = _scan_scalar_dquot();
5687  if(!_maybe_scan_following_colon())
5688  {
5689  _c4dbgp("seqblck[RVAL]: set as val");
5690  _handle_annotations_before_blck_val_scalar();
5691  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5692  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5693  addrem_flags(RNXT, RVAL);
5694  }
5695  else
5696  {
5697  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5698  addrem_flags(RNXT, RVAL);
5699  _handle_annotations_before_start_mapblck(startline);
5700  _handle_colon();
5701  m_evt_handler->begin_map_val_block();
5702  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5703  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5704  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5705  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5706  _maybe_skip_whitespace_tokens();
5707  goto seqblck_finish;
5708  }
5709  }
5710  // block scalars can only appear as keys when in QMRK scope
5711  // (ie, after ? tokens), so no need to scan following colon in
5712  // here.
5713  else if(first == '|')
5714  {
5715  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5716  ScannedBlock sb;
5717  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5718  _handle_annotations_before_blck_val_scalar();
5719  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5720  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5721  addrem_flags(RNXT, RVAL);
5722  }
5723  else if(first == '>')
5724  {
5725  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5726  ScannedBlock sb;
5727  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5728  _handle_annotations_before_blck_val_scalar();
5729  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5730  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5731  addrem_flags(RNXT, RVAL);
5732  }
5733  else if(_scan_scalar_plain_seq_blck(&sc))
5734  {
5735  _c4dbgp("seqblck[RVAL]: plain scalar.");
5736  if(!_maybe_scan_following_colon())
5737  {
5738  _c4dbgp("seqblck[RVAL]: set as val");
5739  _handle_annotations_before_blck_val_scalar();
5740  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
5741  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5742  addrem_flags(RNXT, RVAL);
5743  }
5744  else
5745  {
5746  if(startindent > m_evt_handler->m_curr->indref)
5747  {
5748  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5749  addrem_flags(RNXT, RVAL);
5750  _handle_annotations_before_start_mapblck(startline);
5751  _handle_colon();
5752  m_evt_handler->begin_map_val_block();
5753  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5754  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5755  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5756  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5757  _maybe_skip_whitespace_tokens();
5758  goto seqblck_finish;
5759  }
5760  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
5761  {
5762  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5763  m_evt_handler->set_val_scalar_plain_empty();
5764  m_evt_handler->end_seq();
5765  m_evt_handler->add_sibling();
5766  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5767  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5768  addrem_flags(RVAL, RNXT|RKEY);
5769  _maybe_skip_whitespace_tokens();
5770  goto seqblck_finish;
5771  }
5772  else
5773  {
5774  _c4err("parse error");
5775  }
5776  }
5777  }
5778  else if(first == '[')
5779  {
5780  _c4dbgp("seqblck[RVAL]: start child seqflow");
5781  addrem_flags(RNXT, RVAL);
5782  _handle_annotations_before_blck_val_scalar();
5783  m_evt_handler->begin_seq_val_flow();
5784  addrem_flags(FLOW|RVAL, BLCK|RNXT);
5785  _line_progressed(1);
5786  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5787  goto seqblck_finish;
5788  }
5789  else if(first == '{')
5790  {
5791  _c4dbgp("seqblck[RVAL]: start child mapflow");
5792  addrem_flags(RNXT, RVAL);
5793  _handle_annotations_before_blck_val_scalar();
5794  m_evt_handler->begin_map_val_flow();
5795  addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
5796  _line_progressed(1);
5797  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5798  goto seqblck_finish;
5799  }
5800  else if(first == '-')
5801  {
5802  if(startindent == m_evt_handler->m_curr->indref)
5803  {
5804  _c4dbgp("seqblck[RVAL]: prev val was empty");
5805  _handle_annotations_before_blck_val_scalar();
5806  m_evt_handler->set_val_scalar_plain_empty();
5807  // keep in RVAL, but for the next sibling
5808  m_evt_handler->add_sibling();
5809  }
5810  else
5811  {
5812  _c4dbgp("seqblck[RVAL]: start child seqblck");
5813  _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5814  addrem_flags(RNXT, RVAL);
5815  _handle_annotations_before_blck_val_scalar();
5816  m_evt_handler->begin_seq_val_block();
5817  addrem_flags(RVAL, RNXT);
5818  _save_indentation();
5819  // keep going on inside this function
5820  }
5821  _line_progressed(1);
5822  _maybe_skip_whitespace_tokens();
5823  }
5824  else if(first == ':')
5825  {
5826  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5827  addrem_flags(RNXT, RVAL);
5828  _handle_annotations_before_start_mapblck(startline);
5829  _handle_colon();
5830  m_evt_handler->begin_map_val_block();
5831  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5832  m_evt_handler->set_key_scalar_plain_empty();
5833  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5834  _line_progressed(1);
5835  _maybe_skip_whitespace_tokens();
5836  goto seqblck_finish;
5837  }
5838  else if(first == '&')
5839  {
5840  const csubstr anchor = _scan_anchor();
5841  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5842  // we need to buffer the anchors, as there may be two
5843  // consecutive anchors in here
5844  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5845  }
5846  else if(first == '*')
5847  {
5848  csubstr ref = _scan_ref_seq();
5849  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5850  if(!_maybe_scan_following_colon())
5851  {
5852  _c4dbgp("seqblck[RVAL]: set ref as val!");
5853  _handle_annotations_before_blck_val_scalar();
5854  m_evt_handler->set_val_ref(ref);
5855  addrem_flags(RNXT, RVAL);
5856  }
5857  else
5858  {
5859  _c4dbgp("seqblck[RVAL]: ref is key of map");
5860  addrem_flags(RNXT, RVAL);
5861  _handle_annotations_before_start_mapblck(startline);
5862  m_evt_handler->begin_map_val_block();
5863  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5864  m_evt_handler->set_key_ref(ref);
5865  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5866  _set_indentation(startindent);
5867  _maybe_skip_whitespace_tokens();
5868  goto seqblck_finish;
5869  }
5870  }
5871  else if(first == '!')
5872  {
5873  csubstr tag = _scan_tag();
5874  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5875  // we need to buffer the tags, as there may be two
5876  // consecutive tags in here
5877  _add_annotation(&m_pending_tags, tag, startindent, startline);
5878  }
5879  else if(first == '?')
5880  {
5881  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5882  addrem_flags(RNXT, RVAL);
5883  m_was_inside_qmrk = true;
5884  m_evt_handler->begin_map_val_block();
5885  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5886  _save_indentation();
5887  _line_progressed(1);
5888  _maybe_skip_whitespace_tokens();
5889  goto seqblck_finish;
5890  }
5891  else
5892  {
5893  _c4err("parse error");
5894  }
5895  }
5896  else // RNXT
5897  {
5898  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5899  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5900  //
5901  // handle indentation
5902  //
5903  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5904  if(C4_LIKELY(_at_line_begin()))
5905  {
5906  _c4dbgp("seqblck[RNXT]: at line begin");
5907  if(m_evt_handler->m_curr->indentation_ge())
5908  {
5909  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5910  _line_progressed(m_evt_handler->m_curr->indref);
5911  _maybe_skip_whitespace_tokens();
5912  rem = m_evt_handler->m_curr->line_contents.rem;
5913  if(!rem.len)
5914  goto seqblck_again;
5915  }
5916  else if(m_evt_handler->m_curr->indentation_lt())
5917  {
5918  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5919  _handle_indentation_pop_from_block_seq();
5920  if(has_all(RSEQ|BLCK))
5921  {
5922  _c4dbgp("seqblck[RNXT]: still seqblck!");
5923  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5924  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5925  rem = m_evt_handler->m_curr->line_contents.rem;
5926  if(!rem.len)
5927  goto seqblck_again;
5928  }
5929  else
5930  {
5931  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5932  goto seqblck_finish;
5933  }
5934  }
5935  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5936  {
5937  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5938  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5939  rem = m_evt_handler->m_curr->line_contents.rem;
5940  if(!rem.len)
5941  goto seqblck_again;
5942  }
5943  }
5944  else
5945  {
5946  _c4dbgp("seqblck[RNXT]: NOT at line begin");
5947  if(!rem.begins_with_any(" \t"))
5948  {
5949  _c4err("parse error");
5950  }
5951  else
5952  {
5953  _skipchars(" \t");
5954  rem = m_evt_handler->m_curr->line_contents.rem;
5955  if(!rem.len)
5956  {
5957  _c4dbgp("seqblck[RNXT]: again");
5958  goto seqblck_again;
5959  }
5960  }
5961  }
5962  //
5963  // now handle the tokens
5964  //
5965  const char first = rem.str[0];
5966  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5967  if(first == '-')
5968  {
5969  if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5970  {
5971  _c4dbgp("seqblck[RNXT]: expect next val");
5972  addrem_flags(RVAL, RNXT);
5973  m_evt_handler->add_sibling();
5974  _line_progressed(1);
5975  _maybe_skip_whitespace_tokens();
5976  }
5977  else
5978  {
5979  _c4dbgp("seqblck[RNXT]: start doc");
5980  _start_doc_suddenly();
5981  _line_progressed(3);
5982  _maybe_skip_whitespace_tokens();
5983  goto seqblck_finish;
5984  }
5985  }
5986  else if(first == ':')
5987  {
5988  // This happens for example in `- [a: b]: c` (after
5989  // terminating the seq, ie, after `]`). All other cases
5990  // (ie colon after scalars) are caught elsewhere (ie, in
5991  // RVAL state).
5992  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
5993  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
5994  {
5995  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
5996  m_evt_handler->end_seq();
5997  goto seqblck_finish;
5998  }
5999  else
6000  {
6001  _c4err("parse error");
6002  }
6003  }
6004  else if(first == '.')
6005  {
6006  _c4dbgp("seqblck[RNXT]: maybe doc?");
6007  csubstr rs = rem.sub(1);
6008  if(rs == ".." || rs.begins_with(".. "))
6009  {
6010  _c4dbgp("seqblck[RNXT]: end+start doc");
6011  _end_doc_suddenly();
6012  _line_progressed(3);
6013  _maybe_skip_whitespace_tokens();
6014  goto seqblck_finish;
6015  }
6016  else
6017  {
6018  _c4err("parse error");
6019  }
6020  }
6021  else
6022  {
6023  // may be an indentless sequence nested in a map...
6024  //if(m_evt_handler->m_stack.size() >= 2)
6025  #ifdef RYML_DBG
6026  char flagbuf_[128];
6027  for(auto const& s : m_evt_handler->m_stack)
6028  {
6029  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6030  }
6031  #endif
6032  if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6033  {
6034  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6035  _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6036  _handle_indentation_pop(m_evt_handler->m_parent);
6037  _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
6038  m_evt_handler->add_sibling();
6039  addrem_flags(RKEY, RNXT);
6040  goto seqblck_finish;
6041  }
6042  else //if(first != '*')
6043  {
6044  _c4err("parse error");
6045  }
6046  }
6047  }
6048 
6049  seqblck_again:
6050  _c4dbgt("seqblck: go again", 0);
6051  if(_finished_line())
6052  {
6053  _line_ended();
6054  _scan_line();
6055  if(_finished_file())
6056  {
6057  _c4dbgp("seqblck: finish!");
6058  _end_seq_blck();
6059  goto seqblck_finish;
6060  }
6061  _c4dbgnextline();
6062  }
6063  goto seqblck_start;
6064 
6065  seqblck_finish:
6066  _c4dbgp("seqblck: finish");
6067 }
6068 
6069 
6070 //-----------------------------------------------------------------------------
6071 
6072 template<class EventHandler>
6073 void ParseEngine<EventHandler>::_handle_map_block()
6074 {
6075 mapblck_start:
6076  _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6077 
6078  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
6079  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
6080  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
6081  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
6082  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
6083 
6084  _maybe_skip_comment();
6085  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6086  if(!rem.len)
6087  goto mapblck_again;
6088 
6089  if(has_any(RKEY))
6090  {
6091  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6092  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6093  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6094  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6095  //
6096  // handle indentation
6097  //
6098  if(m_evt_handler->m_curr->at_line_beginning())
6099  {
6100  if(m_evt_handler->m_curr->indentation_eq())
6101  {
6102  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6103  _line_progressed(m_evt_handler->m_curr->indref);
6104  rem = m_evt_handler->m_curr->line_contents.rem;
6105  if(!rem.len)
6106  goto mapblck_again;
6107  }
6108  else if(m_evt_handler->m_curr->indentation_lt())
6109  {
6110  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6111  _handle_indentation_pop_from_block_map();
6112  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6113  if(has_all(RMAP|BLCK))
6114  {
6115  _c4dbgp("mapblck[RKEY]: still mapblck!");
6116  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
6117  rem = m_evt_handler->m_curr->line_contents.rem;
6118  if(!rem.len)
6119  goto mapblck_again;
6120  }
6121  else
6122  {
6123  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6124  goto mapblck_finish;
6125  }
6126  }
6127  else
6128  {
6129  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6130  _c4err("invalid indentation");
6131  }
6132  }
6133  //
6134  // now handle the tokens
6135  //
6136  const char first = rem.str[0];
6137  const size_t startline = m_evt_handler->m_curr->pos.line;
6138  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6139  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6140  ScannedScalar sc;
6141  if(first == '\'')
6142  {
6143  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6144  sc = _scan_scalar_squot();
6145  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6146  _handle_annotations_before_blck_key_scalar();
6147  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6148  addrem_flags(RVAL, RKEY);
6149  if(!_maybe_scan_following_colon())
6150  _c4err("could not find ':' colon after key");
6151  _maybe_skip_whitespace_tokens();
6152  }
6153  else if(first == '"')
6154  {
6155  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6156  sc = _scan_scalar_dquot();
6157  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6158  _handle_annotations_before_blck_key_scalar();
6159  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6160  addrem_flags(RVAL, RKEY);
6161  if(!_maybe_scan_following_colon())
6162  _c4err("could not find ':' colon after key");
6163  _maybe_skip_whitespace_tokens();
6164  }
6165  // block scalars (| and >) can not be used as keys unless they
6166  // appear in an explicit QMRK scope (ie, after the ? token),
6167  else if(C4_UNLIKELY(first == '|'))
6168  {
6169  _c4err("block literal keys must be enclosed in '?'");
6170  }
6171  else if(C4_UNLIKELY(first == '>'))
6172  {
6173  _c4err("block literal keys must be enclosed in '?'");
6174  }
6175  else if(_scan_scalar_plain_map_blck(&sc))
6176  {
6177  _c4dbgp("mapblck[RKEY]: plain scalar");
6178  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6179  _handle_annotations_before_blck_key_scalar();
6180  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6181  addrem_flags(RVAL, RKEY);
6182  if(!_maybe_scan_following_colon())
6183  _c4err("could not find ':' colon after key");
6184  _maybe_skip_whitespace_tokens();
6185  }
6186  else if(first == '?')
6187  {
6188  _c4dbgp("mapblck[RKEY]: key token!");
6189  addrem_flags(QMRK, RKEY);
6190  _line_progressed(1);
6191  _maybe_skip_whitespace_tokens();
6192  m_was_inside_qmrk = true;
6193  goto mapblck_again;
6194  }
6195  else if(first == ':')
6196  {
6197  _c4dbgp("mapblck[RKEY]: setting empty key");
6198  _handle_annotations_before_blck_key_scalar();
6199  m_evt_handler->set_key_scalar_plain_empty();
6200  addrem_flags(RVAL, RKEY);
6201  _line_progressed(1);
6202  _maybe_skip_whitespace_tokens();
6203  }
6204  else if(first == '*')
6205  {
6206  csubstr ref = _scan_ref_map();
6207  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6208  _handle_annotations_before_blck_key_scalar();
6209  m_evt_handler->set_key_ref(ref);
6210  addrem_flags(RVAL, RKEY);
6211  if(!_maybe_scan_following_colon())
6212  _c4err("could not find ':' colon after key");
6213  _maybe_skip_whitespace_tokens();
6214  }
6215  else if(first == '&')
6216  {
6217  csubstr anchor = _scan_anchor();
6218  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6219  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6220  }
6221  else if(first == '!')
6222  {
6223  csubstr tag = _scan_tag();
6224  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6225  _add_annotation(&m_pending_tags, tag, startindent, startline);
6226  }
6227  else if(first == '[')
6228  {
6229  // RYML's tree cannot store container keys, but that's
6230  // handled inside the tree handler. Other handlers may be
6231  // able to handle it.
6232  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6233  addrem_flags(RKCL, RKEY);
6234  _handle_annotations_before_blck_key_scalar();
6235  m_evt_handler->begin_seq_key_flow();
6236  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
6237  _line_progressed(1);
6238  _set_indentation(startindent);
6239  goto mapblck_finish;
6240  }
6241  else if(first == '{')
6242  {
6243  // RYML's tree cannot store container keys, but that's
6244  // handled inside the tree handler. Other handlers may be
6245  // able to handle it.
6246  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6247  addrem_flags(RKCL, RKEY);
6248  _handle_annotations_before_blck_key_scalar();
6249  m_evt_handler->begin_map_key_flow();
6250  addrem_flags(FLOW|RKEY, BLCK|RKCL);
6251  _line_progressed(1);
6252  _set_indentation(startindent);
6253  goto mapblck_finish;
6254  }
6255  else if(first == '-')
6256  {
6257  _c4dbgp("mapblck[RKEY]: maybe doc?");
6258  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6259  {
6260  _c4dbgp("mapblck[RKEY]: end+start doc");
6261  _start_doc_suddenly();
6262  _line_progressed(3);
6263  _maybe_skip_whitespace_tokens();
6264  goto mapblck_finish;
6265  }
6266  else
6267  {
6268  _c4err("parse error");
6269  }
6270  }
6271  else if(first == '.')
6272  {
6273  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6274  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6275  {
6276  _c4dbgp("mapblck[RKEY]: end doc");
6277  _end_doc_suddenly();
6278  _line_progressed(3);
6279  _maybe_skip_whitespace_tokens();
6280  goto mapblck_finish;
6281  }
6282  else
6283  {
6284  _c4err("parse error");
6285  }
6286  }
6288  else if(first == '\t')
6289  {
6290  _c4dbgp("mapblck[RKEY]: skip tabs");
6291  _maybe_skipchars('\t');
6292  })
6293  else
6294  {
6295  _c4err("parse error");
6296  }
6297  }
6298  else if(has_any(RKCL)) // read the key colon
6299  {
6300  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6301  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6302  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6303  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6304  //
6305  // handle indentation
6306  //
6307  if(m_evt_handler->m_curr->at_line_beginning())
6308  {
6309  if(m_evt_handler->m_curr->indentation_eq())
6310  {
6311  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6312  _line_progressed(m_evt_handler->m_curr->indref);
6313  rem = m_evt_handler->m_curr->line_contents.rem;
6314  if(!rem.len)
6315  goto mapblck_again;
6316  }
6317  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6318  {
6319  _c4err("invalid indentation");
6320  }
6321  }
6322  const char first = rem.str[0];
6323  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6324  if(first == ':')
6325  {
6326  _c4dbgp("mapblck[RKCL]: found the colon");
6327  addrem_flags(RVAL, RKCL);
6328  _line_progressed(1);
6329  _maybe_skip_whitespace_tokens();
6330  }
6331  else if(first == '?')
6332  {
6333  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6334  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6335  m_evt_handler->set_val_scalar_plain_empty();
6336  m_evt_handler->add_sibling();
6337  addrem_flags(QMRK, RKCL);
6338  _line_progressed(1);
6339  _maybe_skip_whitespace_tokens();
6340  }
6341  else if(first == '-')
6342  {
6343  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6344  {
6345  _c4dbgp("mapblck[RKCL]: end+start doc");
6346  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6347  _start_doc_suddenly();
6348  _line_progressed(3);
6349  _maybe_skip_whitespace_tokens();
6350  goto mapblck_finish;
6351  }
6352  else
6353  {
6354  _c4err("parse error");
6355  }
6356  }
6357  else if(first == '.')
6358  {
6359  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6360  csubstr rs = rem.sub(1);
6361  if(rs == ".." || rs.begins_with(".. "))
6362  {
6363  _c4dbgp("mapblck[RKCL]: end+start doc");
6364  _end_doc_suddenly();
6365  _line_progressed(3);
6366  goto mapblck_finish;
6367  }
6368  else
6369  {
6370  _c4err("parse error");
6371  }
6372  }
6373  else if(m_was_inside_qmrk)
6374  {
6375  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6376  _c4dbgp("mapblck[RKCL]: missing :");
6377  m_evt_handler->set_val_scalar_plain_empty();
6378  m_evt_handler->add_sibling();
6379  m_was_inside_qmrk = false;
6380  addrem_flags(RKEY, RKCL);
6381  }
6382  else
6383  {
6384  _c4err("parse error");
6385  }
6386  }
6387  else if(has_any(RVAL))
6388  {
6389  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6390  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6391  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6392  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6393  //
6394  // handle indentation
6395  //
6396  if(m_evt_handler->m_curr->at_line_beginning())
6397  {
6398  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6399  m_evt_handler->m_curr->more_indented = false;
6400  if(m_evt_handler->m_curr->indref == npos)
6401  {
6402  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6403  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6404  _line_progressed(m_evt_handler->m_curr->indref);
6405  rem = m_evt_handler->m_curr->line_contents.rem;
6406  if(!rem.len)
6407  goto mapblck_again;
6408  }
6409  else if(m_evt_handler->m_curr->indentation_eq())
6410  {
6411  _c4dbgp("mapblck[RVAL]: skip indentation!");
6412  _line_progressed(m_evt_handler->m_curr->indref);
6413  rem = m_evt_handler->m_curr->line_contents.rem;
6414  if(!rem.len)
6415  goto mapblck_again;
6416  // TODO: this is valid:
6417  //
6418  // ```yaml
6419  // a:
6420  // b:
6421  // ---
6422  // a:
6423  // b
6424  // ---
6425  // a:
6426  // b: c
6427  // ```
6428  //
6429  // ... but this is not:
6430  //
6431  // ```yaml
6432  // a:
6433  // v
6434  // ---
6435  // a: b: c
6436  // ```
6437  //
6438  // here, we probably need to set a boolean on the state
6439  // to disambiguate between these cases.
6440  }
6441  else if(m_evt_handler->m_curr->indentation_gt())
6442  {
6443  _c4dbgp("mapblck[RVAL]: more indented!");
6444  m_evt_handler->m_curr->more_indented = true;
6445  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6446  rem = m_evt_handler->m_curr->line_contents.rem;
6447  if(!rem.len)
6448  goto mapblck_again;
6449  }
6450  else if(m_evt_handler->m_curr->indentation_lt())
6451  {
6452  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6453  _handle_indentation_pop_from_block_map();
6454  if(has_all(RMAP|BLCK))
6455  {
6456  _c4dbgp("mapblck[RVAL]: still mapblck!");
6457  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6458  if(has_any(RNXT))
6459  {
6460  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6461  m_evt_handler->add_sibling();
6462  addrem_flags(RKEY, RNXT);
6463  }
6464  goto mapblck_again;
6465  }
6466  else
6467  {
6468  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6469  goto mapblck_finish;
6470  }
6471  }
6472  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6473  {
6474  _c4dbgp("mapblck[RVAL]: empty line!");
6475  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6476  goto mapblck_again;
6477  }
6478  }
6479  //
6480  // now handle the tokens
6481  //
6482  const char first = rem.str[0];
6483  const size_t startline = m_evt_handler->m_curr->pos.line;
6484  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6485  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6486  ScannedScalar sc;
6487  if(first == '\'')
6488  {
6489  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6490  sc = _scan_scalar_squot();
6491  if(!_maybe_scan_following_colon())
6492  {
6493  _c4dbgp("mapblck[RVAL]: set as val");
6494  _handle_annotations_before_blck_val_scalar();
6495  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6496  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6497  addrem_flags(RNXT, RVAL);
6498  }
6499  else
6500  {
6501  if(startindent != m_evt_handler->m_curr->indref)
6502  {
6503  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6504  _handle_annotations_before_start_mapblck(startline);
6505  addrem_flags(RNXT, RVAL);
6506  _handle_colon();
6507  m_evt_handler->begin_map_val_block();
6508  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6509  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6510  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6511  _maybe_skip_whitespace_tokens();
6512  // keep the child state on RVAL
6513  addrem_flags(RVAL, RNXT);
6514  }
6515  else
6516  {
6517  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6518  m_evt_handler->set_val_scalar_plain_empty();
6519  m_evt_handler->add_sibling();
6520  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6521  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6522  // keep going on RVAL
6523  _maybe_skip_whitespace_tokens();
6524  }
6525  }
6526  }
6527  else if(first == '"')
6528  {
6529  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6530  sc = _scan_scalar_dquot();
6531  if(!_maybe_scan_following_colon())
6532  {
6533  _c4dbgp("mapblck[RVAL]: set as val");
6534  _handle_annotations_before_blck_val_scalar();
6535  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6536  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6537  addrem_flags(RNXT, RVAL);
6538  }
6539  else
6540  {
6541  if(startindent != m_evt_handler->m_curr->indref)
6542  {
6543  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6544  _handle_annotations_before_start_mapblck(startline);
6545  addrem_flags(RNXT, RVAL);
6546  _handle_colon();
6547  m_evt_handler->begin_map_val_block();
6548  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6549  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6550  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6551  _maybe_skip_whitespace_tokens();
6552  // keep the child state on RVAL
6553  addrem_flags(RVAL, RNXT);
6554  }
6555  else
6556  {
6557  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6558  m_evt_handler->set_val_scalar_plain_empty();
6559  m_evt_handler->add_sibling();
6560  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6561  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6562  // keep going on RVAL
6563  _maybe_skip_whitespace_tokens();
6564  }
6565  }
6566  }
6567  // block scalars can only appear as keys when in QMRK scope
6568  // (ie, after ? tokens), so no need to scan following colon
6569  else if(first == '|')
6570  {
6571  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6572  ScannedBlock sb;
6573  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6574  _handle_annotations_before_blck_val_scalar();
6575  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6576  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6577  addrem_flags(RNXT, RVAL);
6578  }
6579  else if(first == '>')
6580  {
6581  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6582  ScannedBlock sb;
6583  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6584  _handle_annotations_before_blck_val_scalar();
6585  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6586  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6587  addrem_flags(RNXT, RVAL);
6588  }
6589  else if(_scan_scalar_plain_map_blck(&sc))
6590  {
6591  _c4dbgp("mapblck[RVAL]: plain scalar.");
6592  if(!_maybe_scan_following_colon())
6593  {
6594  _c4dbgp("mapblck[RVAL]: set as val");
6595  _handle_annotations_before_blck_val_scalar();
6596  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6597  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6598  addrem_flags(RNXT, RVAL);
6599  }
6600  else
6601  {
6602  if(startindent != m_evt_handler->m_curr->indref)
6603  {
6604  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6605  addrem_flags(RNXT, RVAL);
6606  _handle_annotations_before_start_mapblck(startline);
6607  _handle_colon();
6608  m_evt_handler->begin_map_val_block();
6609  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6610  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6611  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6612  _maybe_skip_whitespace_tokens();
6613  // keep the child state on RVAL
6614  addrem_flags(RVAL, RNXT);
6615  }
6616  else
6617  {
6618  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6619  _handle_annotations_before_blck_val_scalar();
6620  m_evt_handler->set_val_scalar_plain_empty();
6621  m_evt_handler->add_sibling();
6622  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6623  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6624  // keep going on RVAL
6625  _maybe_skip_whitespace_tokens();
6626  }
6627  }
6628  }
6629  else if(first == '-')
6630  {
6631  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6632  {
6633  _c4dbgp("mapblck[RVAL]: start val seqblck");
6634  addrem_flags(RNXT, RVAL);
6635  _handle_annotations_before_blck_val_scalar();
6636  m_evt_handler->begin_seq_val_block();
6637  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6638  _set_indentation(startindent);
6639  _line_progressed(1);
6640  _maybe_skip_whitespace_tokens();
6641  goto mapblck_finish;
6642  }
6643  else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6644  {
6645  _c4dbgp("mapblck[RVAL]: end+start doc");
6646  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6647  _start_doc_suddenly();
6648  _line_progressed(3);
6649  _maybe_skip_whitespace_tokens();
6650  goto mapblck_finish;
6651  }
6652  else
6653  {
6654  _c4err("parse error");
6655  }
6656  }
6657  else if(first == '[')
6658  {
6659  _c4dbgp("mapblck[RVAL]: start val seqflow");
6660  addrem_flags(RNXT, RVAL);
6661  _handle_annotations_before_blck_val_scalar();
6662  m_evt_handler->begin_seq_val_flow();
6663  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT);
6664  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6665  _line_progressed(1);
6666  goto mapblck_finish;
6667  }
6668  else if(first == '{')
6669  {
6670  _c4dbgp("mapblck[RVAL]: start val mapflow");
6671  addrem_flags(RNXT, RVAL);
6672  _handle_annotations_before_blck_val_scalar();
6673  m_evt_handler->begin_map_val_flow();
6674  addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
6675  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6676  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6677  _line_progressed(1);
6678  goto mapblck_finish;
6679  }
6680  else if(first == '*')
6681  {
6682  csubstr ref = _scan_ref_map();
6683  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6684  if(startindent == m_evt_handler->m_curr->indref)
6685  {
6686  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6687  m_evt_handler->set_val_ref(ref);
6688  addrem_flags(RNXT, RVAL);
6689  }
6690  else
6691  {
6692  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6693  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6694  if(_maybe_scan_following_colon())
6695  {
6696  _c4dbgp("mapblck[RVAL]: start child map, block");
6697  addrem_flags(RNXT, RVAL);
6698  _handle_annotations_before_blck_val_scalar();
6699  m_evt_handler->begin_map_val_block();
6700  m_evt_handler->set_key_ref(ref);
6701  _set_indentation(startindent);
6702  // keep going in RVAL
6703  addrem_flags(RVAL, RNXT);
6704  }
6705  else
6706  {
6707  _c4dbgp("mapblck[RVAL]: was val ref");
6708  _handle_annotations_before_blck_val_scalar();
6709  m_evt_handler->set_val_ref(ref);
6710  addrem_flags(RNXT, RVAL);
6711  }
6712  }
6713  _maybe_skip_whitespace_tokens();
6714  }
6715  else if(first == '&')
6716  {
6717  csubstr anchor = _scan_anchor();
6718  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6719  if(startindent == m_evt_handler->m_curr->indref)
6720  {
6721  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6722  m_evt_handler->set_val_scalar_plain_empty();
6723  m_evt_handler->add_sibling();
6724  addrem_flags(RKEY, RVAL);
6725  }
6726  // we need to buffer the anchors, as there may be two
6727  // consecutive anchors in here
6728  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6729  }
6730  else if(first == '!')
6731  {
6732  csubstr tag = _scan_tag();
6733  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6734  if(startindent == m_evt_handler->m_curr->indref)
6735  {
6736  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6737  _handle_annotations_before_blck_val_scalar();
6738  m_evt_handler->set_val_scalar_plain_empty();
6739  m_evt_handler->add_sibling();
6740  addrem_flags(RKEY, RVAL);
6741  }
6742  // we need to buffer the tags, as there may be two
6743  // consecutive tags in here
6744  _add_annotation(&m_pending_tags, tag, startindent, startline);
6745  }
6746  else if(first == '?')
6747  {
6748  if(startindent == m_evt_handler->m_curr->indref)
6749  {
6750  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6751  _handle_annotations_before_blck_val_scalar();
6752  m_evt_handler->set_val_scalar_plain_empty();
6753  m_evt_handler->add_sibling();
6754  addrem_flags(QMRK, RVAL);
6755  }
6756  else if(startindent > m_evt_handler->m_curr->indref)
6757  {
6758  _c4dbgp("mapblck[RVAL]: start val mapblck");
6759  addrem_flags(RNXT, RVAL);
6760  _handle_annotations_before_blck_val_scalar();
6761  m_evt_handler->begin_map_val_block();
6762  addrem_flags(QMRK|BLCK, RNXT);
6763  _set_indentation(startindent);
6764  }
6765  else
6766  {
6767  _c4err("parse error");
6768  }
6769  m_was_inside_qmrk = true;
6770  _line_progressed(1);
6771  _maybe_skip_whitespace_tokens();
6772  goto mapblck_again;
6773  }
6774  else if(first == ':')
6775  {
6776  if(startindent == m_evt_handler->m_curr->indref)
6777  {
6778  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6779  m_evt_handler->set_val_scalar_plain_empty();
6780  m_evt_handler->add_sibling();
6781  m_evt_handler->set_key_scalar_plain_empty();
6782  }
6783  else if(startindent > m_evt_handler->m_curr->indref)
6784  {
6785  _c4dbgp("mapblck[RVAL]: start val mapblck");
6786  addrem_flags(RNXT, RVAL);
6787  _handle_annotations_before_start_mapblck(startline);
6788  _handle_colon();
6789  m_evt_handler->begin_map_val_block();
6790  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6791  m_evt_handler->set_key_scalar_plain_empty();
6792  // keep the child state on RVAL
6793  addrem_flags(RVAL, RNXT);
6794  }
6795  else
6796  {
6797  _c4err("parse error");
6798  }
6799  _line_progressed(1);
6800  _maybe_skip_whitespace_tokens();
6801  goto mapblck_again;
6802  }
6803  else if(first == '.')
6804  {
6805  _c4dbgp("mapblck[RVAL]: maybe doc?");
6806  csubstr rs = rem.sub(1);
6807  if(rs == ".." || rs.begins_with(".. "))
6808  {
6809  _c4dbgp("seqblck[RVAL]: end doc expl");
6810  _end_doc_suddenly();
6811  _line_progressed(3);
6812  _maybe_skip_whitespace_tokens();
6813  goto mapblck_finish;
6814  }
6815  else
6816  {
6817  _c4err("parse error");
6818  }
6819  }
6821  else if(first == '\t')
6822  {
6823  _c4dbgp("mapblck[RVAL]: skip tabs");
6824  _maybe_skipchars('\t');
6825  })
6826  else
6827  {
6828  _c4err("parse error");
6829  }
6830  }
6831  else if(has_any(RNXT))
6832  {
6833  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6834  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6835  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6836  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6837  //
6838  // handle indentation
6839  //
6840  if(m_evt_handler->m_curr->at_line_beginning())
6841  {
6842  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6843  if(m_evt_handler->m_curr->indentation_eq())
6844  {
6845  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6846  _line_progressed(m_evt_handler->m_curr->indref);
6847  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6848  m_evt_handler->add_sibling();
6849  addrem_flags(RKEY, RNXT);
6850  goto mapblck_again;
6851  }
6852  else if(m_evt_handler->m_curr->indentation_lt())
6853  {
6854  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6855  _handle_indentation_pop_from_block_map();
6856  if(has_all(RMAP|BLCK))
6857  {
6858  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6859  if(!has_any(RKCL))
6860  {
6861  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6862  m_evt_handler->add_sibling();
6863  addrem_flags(RKEY, RNXT);
6864  }
6865  goto mapblck_again;
6866  }
6867  else
6868  {
6869  goto mapblck_finish;
6870  }
6871  }
6872  }
6873  else
6874  {
6875  _c4dbgp("mapblck[RNXT]: NOT at line begin");
6876  if(!rem.begins_with_any(" \t"))
6877  {
6878  _c4err("parse error");
6879  }
6880  else
6881  {
6882  _skipchars(" \t");
6883  rem = m_evt_handler->m_curr->line_contents.rem;
6884  if(!rem.len)
6885  {
6886  _c4dbgp("seqblck[RNXT]: again");
6887  goto mapblck_again;
6888  }
6889  }
6890  }
6891  //
6892  // handle tokens
6893  //
6894  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6895  const char first = rem.str[0];
6896  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6897  if(first == ':')
6898  {
6899  if(m_evt_handler->m_curr->more_indented)
6900  {
6901  _c4dbgp("mapblck[RNXT]: start child block map");
6902  C4_NOT_IMPLEMENTED();
6903  //m_evt_handler->actually_as_block_map();
6904  _line_progressed(1);
6905  _set_indentation(m_evt_handler->m_curr->scalar_col);
6906  m_evt_handler->m_curr->more_indented = false;
6907  goto mapblck_again;
6908  }
6909  else
6910  {
6911  _c4err("parse error");
6912  }
6913  }
6914  else if(first == ' ')
6915  {
6916  _c4dbgp("mapblck[RNXT]: skip spaces");
6917  _maybe_skip_whitespace_tokens();
6918  }
6919  else
6920  {
6921  _c4err("parse error");
6922  }
6923  }
6924  else if(has_any(QMRK))
6925  {
6926  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6927  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6928  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6929  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6930  //
6931  // handle indentation
6932  //
6933  if(m_evt_handler->m_curr->at_line_beginning())
6934  {
6935  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
6936  if(m_evt_handler->m_curr->indentation_eq())
6937  {
6938  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6939  _line_progressed(m_evt_handler->m_curr->indref);
6940  rem = m_evt_handler->m_curr->line_contents.rem;
6941  if(!rem.len)
6942  goto mapblck_again;
6943  }
6944  else if(m_evt_handler->m_curr->indentation_lt())
6945  {
6946  _c4dbgp("mapblck[QMRK]: smaller indentation!");
6947  _handle_indentation_pop_from_block_map();
6948  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6949  if(has_all(RMAP|BLCK))
6950  {
6951  _c4dbgp("mapblck[QMRK]: still mapblck!");
6952  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
6953  rem = m_evt_handler->m_curr->line_contents.rem;
6954  if(!rem.len)
6955  goto mapblck_again;
6956  }
6957  else
6958  {
6959  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
6960  goto mapblck_finish;
6961  }
6962  }
6963  // indentation can be larger in QMRK state
6964  else
6965  {
6966  _c4dbgp("mapblck[QMRK]: larger indentation !");
6967  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6968  rem = m_evt_handler->m_curr->line_contents.rem;
6969  if(!rem.len)
6970  goto mapblck_again;
6971  }
6972  }
6973  //
6974  // now handle the tokens
6975  //
6976  const char first = rem.str[0];
6977  const size_t startline = m_evt_handler->m_curr->pos.line;
6978  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6979  _c4dbgpf("mapblck[QMRK]: '{}'", first);
6980  ScannedScalar sc;
6981  if(first == '\'')
6982  {
6983  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
6984  sc = _scan_scalar_squot();
6985  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6986  if(!_maybe_scan_following_colon())
6987  {
6988  _c4dbgp("mapblck[QMRK]: set as key");
6989  _handle_annotations_before_blck_key_scalar();
6990  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6991  addrem_flags(RKCL, QMRK);
6992  }
6993  else
6994  {
6995  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6996  addrem_flags(RKCL, QMRK);
6997  _handle_annotations_before_start_mapblck_as_key();
6998  m_evt_handler->begin_map_key_block();
6999  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7000  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7001  _maybe_skip_whitespace_tokens();
7002  _set_indentation(startindent);
7003  // keep the child state on RVAL
7004  addrem_flags(RVAL, RKCL|QMRK);
7005  }
7006  }
7007  else if(first == '"')
7008  {
7009  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7010  sc = _scan_scalar_dquot();
7011  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7012  if(!_maybe_scan_following_colon())
7013  {
7014  _c4dbgp("mapblck[QMRK]: set as key");
7015  _handle_annotations_before_blck_key_scalar();
7016  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7017  addrem_flags(RKCL, QMRK);
7018  }
7019  else
7020  {
7021  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7022  addrem_flags(RKCL, QMRK);
7023  _handle_annotations_before_start_mapblck_as_key();
7024  m_evt_handler->begin_map_key_block();
7025  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7026  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7027  _maybe_skip_whitespace_tokens();
7028  _set_indentation(startindent);
7029  // keep the child state on RVAL
7030  addrem_flags(RVAL, RKCL|QMRK);
7031  }
7032  }
7033  else if(first == '|')
7034  {
7035  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7036  ScannedBlock sb;
7037  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7038  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7039  _handle_annotations_before_blck_key_scalar();
7040  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7041  addrem_flags(RKCL, QMRK);
7042  }
7043  else if(first == '>')
7044  {
7045  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7046  ScannedBlock sb;
7047  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7048  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7049  _handle_annotations_before_blck_key_scalar();
7050  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7051  addrem_flags(RKCL, QMRK);
7052  }
7053  else if(_scan_scalar_plain_map_blck(&sc))
7054  {
7055  _c4dbgp("mapblck[QMRK]: plain scalar");
7056  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7057  if(!_maybe_scan_following_colon())
7058  {
7059  _c4dbgp("mapblck[QMRK]: set as key");
7060  _handle_annotations_before_blck_key_scalar();
7061  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7062  addrem_flags(RKCL, QMRK);
7063  }
7064  else
7065  {
7066  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7067  addrem_flags(RKCL, QMRK);
7068  _handle_annotations_before_start_mapblck_as_key();
7069  m_evt_handler->begin_map_key_block();
7070  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7071  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7072  _maybe_skip_whitespace_tokens();
7073  _set_indentation(startindent);
7074  // keep the child state on RVAL
7075  addrem_flags(RVAL, RKCL|QMRK);
7076  }
7077  }
7078  else if(first == ':')
7079  {
7080  if(startindent == m_evt_handler->m_curr->indref)
7081  {
7082  _c4dbgp("mapblck[QMRK]: empty key");
7083  addrem_flags(RVAL, QMRK);
7084  _handle_annotations_before_blck_key_scalar();
7085  m_evt_handler->set_key_scalar_plain_empty();
7086  _line_progressed(1);
7087  _maybe_skip_whitespace_tokens();
7088  }
7089  else
7090  {
7091  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7092  addrem_flags(RKCL, QMRK);
7093  _handle_annotations_before_start_mapblck_as_key();
7094  m_evt_handler->begin_map_key_block();
7095  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7096  m_evt_handler->set_key_scalar_plain_empty();
7097  _line_progressed(1);
7098  _maybe_skip_whitespace_tokens();
7099  _set_indentation(startindent);
7100  // keep the child state on RVAL
7101  addrem_flags(RVAL, RKCL|QMRK);
7102  }
7103  }
7104  else if(first == '*')
7105  {
7106  csubstr ref = _scan_ref_map();
7107  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7108  if(!_maybe_scan_following_colon())
7109  {
7110  _c4dbgp("mapblck[QMRK]: set ref as key");
7111  _handle_annotations_before_blck_key_scalar();
7112  m_evt_handler->set_key_ref(ref);
7113  addrem_flags(RKCL, QMRK);
7114  }
7115  else
7116  {
7117  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7118  addrem_flags(RKCL, QMRK);
7119  _handle_annotations_before_blck_key_scalar();
7120  m_evt_handler->begin_map_key_block();
7121  m_evt_handler->set_key_ref(ref);
7122  _set_indentation(startindent);
7123  // keep the child state on RVAL
7124  addrem_flags(RVAL, RKCL|QMRK);
7125  }
7126  _maybe_skip_whitespace_tokens();
7127  }
7128  else if(first == '&')
7129  {
7130  csubstr anchor = _scan_anchor();
7131  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7132  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7133  }
7134  else if(first == '!')
7135  {
7136  csubstr tag = _scan_tag();
7137  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7138  _add_annotation(&m_pending_tags, tag, startindent, startline);
7139  }
7140  else if(first == '-')
7141  {
7142  _c4dbgp("mapblck[QMRK]: maybe doc?");
7143  csubstr rs = rem.sub(1);
7144  if(rs == "--" || rs.begins_with("-- "))
7145  {
7146  _c4dbgp("mapblck[QMRK]: end+start doc");
7147  _start_doc_suddenly();
7148  _line_progressed(3);
7149  }
7150  else
7151  {
7152  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7153  addrem_flags(RKCL, RKEY|QMRK);
7154  _handle_annotations_before_blck_key_scalar();
7155  m_evt_handler->begin_seq_key_block();
7156  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
7157  _set_indentation(startindent);
7158  _line_progressed(1);
7159  }
7160  _maybe_skip_whitespace_tokens();
7161  goto mapblck_finish;
7162  }
7163  else if(first == '[')
7164  {
7165  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7166  addrem_flags(RKCL, RKEY|QMRK);
7167  m_evt_handler->begin_seq_key_flow();
7168  addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
7169  _set_indentation(m_evt_handler->m_parent->indref);
7170  _line_progressed(1);
7171  goto mapblck_finish;
7172  }
7173  else if(first == '{')
7174  {
7175  _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
7176  addrem_flags(RKCL, RKEY|QMRK);
7177  m_evt_handler->begin_map_key_flow();
7178  addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
7179  _set_indentation(m_evt_handler->m_parent->indref);
7180  _line_progressed(1);
7181  goto mapblck_finish;
7182  }
7183  else if(first == '?')
7184  {
7185  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7186  m_evt_handler->set_key_scalar_plain_empty();
7187  m_evt_handler->set_val_scalar_plain_empty();
7188  m_evt_handler->add_sibling();
7189  _line_progressed(1);
7190  }
7191  else if(first == '.')
7192  {
7193  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7194  csubstr rs = rem.sub(1);
7195  if(rs == ".." || rs.begins_with(".. "))
7196  {
7197  _c4dbgp("mapblck[QMRK]: end+start doc");
7198  _end_doc_suddenly();
7199  _line_progressed(3);
7200  goto mapblck_finish;
7201  }
7202  else
7203  {
7204  _c4err("parse error");
7205  }
7206  }
7207  else
7208  {
7209  _c4err("parse error");
7210  }
7211  }
7212 
7213  mapblck_again:
7214  _c4dbgt("mapblck: again", 0);
7215  if(_finished_line())
7216  {
7217  _line_ended();
7218  _scan_line();
7219  if(_finished_file())
7220  {
7221  _c4dbgp("mapblck: file finished!");
7222  _end_map_blck();
7223  goto mapblck_finish;
7224  }
7225  _c4dbgnextline();
7226  }
7227  goto mapblck_start;
7228 
7229  mapblck_finish:
7230  _c4dbgp("mapblck: finish");
7231 }
7232 
7233 
7234 //-----------------------------------------------------------------------------
7235 
7236 template<class EventHandler>
7237 void ParseEngine<EventHandler>::_handle_unk_json()
7238 {
7239  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7240 
7241  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7242  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7243 
7244  _maybe_skip_comment();
7245  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7246  if(!rem.len)
7247  return;
7248 
7249  size_t pos = rem.first_not_of(" \t");
7250  if(pos)
7251  {
7252  pos = pos != npos ? pos : rem.len;
7253  _c4dbgpf("skipping indentation of {}", pos);
7254  _line_progressed(pos);
7255  rem = m_evt_handler->m_curr->line_contents.rem;
7256  if(!rem.len)
7257  return;
7258  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7259  }
7260 
7261  if(rem.begins_with('['))
7262  {
7263  _c4dbgp("it's a seq");
7264  m_evt_handler->check_trailing_doc_token();
7265  _maybe_begin_doc();
7266  m_evt_handler->begin_seq_val_flow();
7267  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7268  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7269  m_doc_empty = false;
7270  _line_progressed(1);
7271  }
7272  else if(rem.begins_with('{'))
7273  {
7274  _c4dbgp("it's a map");
7275  m_evt_handler->check_trailing_doc_token();
7276  _maybe_begin_doc();
7277  m_evt_handler->begin_map_val_flow();
7278  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7279  m_doc_empty = false;
7280  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7281  _line_progressed(1);
7282  }
7283  else if(_handle_bom())
7284  {
7285  _c4dbgp("byte order mark");
7286  }
7287  else
7288  {
7289  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7290  _maybe_skip_whitespace_tokens();
7291  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7292  if(!s.len)
7293  return;
7294  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7295  const char first = s.str[0];
7296  ScannedScalar sc;
7297  if(first == '"')
7298  {
7299  _c4dbgp("runk_json: scanning double-quoted scalar");
7300  m_evt_handler->check_trailing_doc_token();
7301  _maybe_begin_doc();
7302  add_flags(RDOC);
7303  m_doc_empty = false;
7304  sc = _scan_scalar_dquot();
7305  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7306  if(!_maybe_scan_following_colon())
7307  {
7308  _c4dbgp("runk_json: set as val");
7309  _handle_annotations_before_blck_val_scalar();
7310  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7311  }
7312  else
7313  {
7314  _c4err("parse error");
7315  }
7316  }
7317  else if(_scan_scalar_plain_unk(&sc))
7318  {
7319  _c4dbgp("runk_json: got a plain scalar");
7320  m_evt_handler->check_trailing_doc_token();
7321  _maybe_begin_doc();
7322  add_flags(RDOC);
7323  m_doc_empty = false;
7324  if(!_maybe_scan_following_colon())
7325  {
7326  _c4dbgp("runk_json: set as val");
7327  _handle_annotations_before_blck_val_scalar();
7328  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7329  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7330  }
7331  else
7332  {
7333  _c4err("parse error");
7334  }
7335  }
7336  else
7337  {
7338  _c4err("parse error");
7339  }
7340  }
7341 }
7342 
7343 
7344 //-----------------------------------------------------------------------------
7345 
7346 template<class EventHandler>
7347 void ParseEngine<EventHandler>::_handle_unk()
7348 {
7349  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7350 
7351  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7352  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7353 
7354  _maybe_skip_comment();
7355  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7356  if(!rem.len)
7357  return;
7358 
7359  size_t pos = rem.first_not_of(" \t");
7360  if(pos)
7361  {
7362  pos = pos != npos ? pos : rem.len;
7363  _c4dbgpf("skipping {} whitespace characters", pos);
7364  _line_progressed(pos);
7365  rem = m_evt_handler->m_curr->line_contents.rem;
7366  if(!rem.len)
7367  return;
7368  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7369  }
7370 
7371  if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7372  {
7373  _c4dbgp("rtop: zero indent + at line begin");
7374  if(_handle_bom())
7375  {
7376  _c4dbgp("byte order mark!");
7377  rem = m_evt_handler->m_curr->line_contents.rem;
7378  if(!rem.len)
7379  return;
7380  }
7381  const char first = rem.str[0];
7382  if(first == '-')
7383  {
7384  _c4dbgp("rtop: suspecting doc");
7385  if(_is_doc_begin_token(rem))
7386  {
7387  _c4dbgp("rtop: begin doc");
7388  _maybe_end_doc();
7389  _begin2_doc_expl();
7390  _set_indentation(0);
7391  addrem_flags(RDOC|RUNK, NDOC);
7392  _line_progressed(3u);
7393  _maybe_skip_whitespace_tokens();
7394  return;
7395  }
7396  }
7397  else if(first == '.')
7398  {
7399  _c4dbgp("rtop: suspecting doc end");
7400  if(_is_doc_end_token(rem))
7401  {
7402  _c4dbgp("rtop: end doc");
7403  if(has_any(RDOC))
7404  {
7405  _end2_doc_expl();
7406  }
7407  else
7408  {
7409  _c4dbgp("rtop: ignore end doc");
7410  }
7411  addrem_flags(NDOC|RUNK, RDOC);
7412  _line_progressed(3u);
7413  _maybe_skip_whitespace_tokens();
7414  return;
7415  }
7416  }
7417  else if(first == '%')
7418  {
7419  _c4dbgpf("directive: {}", rem);
7420  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7421  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
7422  _handle_directive(rem);
7423  return;
7424  }
7425  }
7426 
7427  /* no else-if! */
7428  char first = rem.str[0];
7429 
7430  if(first == '[')
7431  {
7432  m_evt_handler->check_trailing_doc_token();
7433  _maybe_begin_doc();
7434  m_doc_empty = false;
7435  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7436  if(C4_LIKELY( ! _annotations_require_key_container()))
7437  {
7438  _c4dbgp("it's a seq, flow");
7439  _handle_annotations_before_blck_val_scalar();
7440  m_evt_handler->begin_seq_val_flow();
7441  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7442  _set_indentation(startindent);
7443  }
7444  else
7445  {
7446  _c4dbgp("start new block map, set flow seq as key (!)");
7447  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7448  m_evt_handler->begin_map_val_block();
7449  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7450  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7451  m_evt_handler->begin_seq_key_flow();
7452  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
7453  _set_indentation(startindent);
7454  }
7455  _line_progressed(1);
7456  }
7457  else if(first == '{')
7458  {
7459  m_evt_handler->check_trailing_doc_token();
7460  _maybe_begin_doc();
7461  m_doc_empty = false;
7462  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7463  if(C4_LIKELY( ! _annotations_require_key_container()))
7464  {
7465  _c4dbgp("it's a map, flow");
7466  _handle_annotations_before_blck_val_scalar();
7467  m_evt_handler->begin_map_val_flow();
7468  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7469  _set_indentation(startindent);
7470  }
7471  else
7472  {
7473  _c4dbgp("start new block map, set flow map as key (!)");
7474  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7475  m_evt_handler->begin_map_val_block();
7476  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7477  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7478  m_evt_handler->begin_map_key_flow();
7479  addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
7480  _set_indentation(startindent);
7481  }
7482  _line_progressed(1);
7483  }
7484  else if(first == '-' && _is_blck_token(rem))
7485  {
7486  _c4dbgp("it's a seq, block");
7487  m_evt_handler->check_trailing_doc_token();
7488  _maybe_begin_doc();
7489  _handle_annotations_before_blck_val_scalar();
7490  m_evt_handler->begin_seq_val_block();
7491  addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7492  m_doc_empty = false;
7493  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7494  _line_progressed(1);
7495  _maybe_skip_whitespace_tokens();
7496  }
7497  else if(first == '?' && _is_blck_token(rem))
7498  {
7499  _c4dbgp("it's a map + this key is complex");
7500  m_evt_handler->check_trailing_doc_token();
7501  _maybe_begin_doc();
7502  _handle_annotations_before_blck_val_scalar();
7503  m_evt_handler->begin_map_val_block();
7504  addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7505  m_doc_empty = false;
7506  m_was_inside_qmrk = true;
7507  _save_indentation();
7508  _line_progressed(1);
7509  _maybe_skip_whitespace_tokens();
7510  }
7511  else if(first == ':' && _is_blck_token(rem))
7512  {
7513  if(m_doc_empty)
7514  {
7515  _c4dbgp("it's a map with an empty key");
7516  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7517  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7518  m_evt_handler->check_trailing_doc_token();
7519  _maybe_begin_doc();
7520  _handle_annotations_before_start_mapblck(startline);
7521  _handle_colon();
7522  m_evt_handler->begin_map_val_block();
7523  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7524  m_evt_handler->set_key_scalar_plain_empty();
7525  m_doc_empty = false;
7526  _set_indentation(startindent);
7527  }
7528  else
7529  {
7530  _c4dbgp("actually prev val is a key!");
7531  size_t prev_indentation = m_evt_handler->m_curr->indref;
7532  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7533  _set_indentation(prev_indentation);
7534  }
7535  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7536  _line_progressed(1);
7537  _maybe_skip_whitespace_tokens();
7538  }
7539  else if(first == '&')
7540  {
7541  csubstr anchor = _scan_anchor();
7542  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7543  m_evt_handler->check_trailing_doc_token();
7544  _maybe_begin_doc();
7545  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7546  const size_t line = m_evt_handler->m_curr->pos.line;
7547  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7548  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7549  m_doc_empty = false;
7550  }
7551  else if(first == '*')
7552  {
7553  csubstr ref = _scan_ref_map();
7554  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7555  m_evt_handler->check_trailing_doc_token();
7556  _maybe_begin_doc();
7557  m_doc_empty = false;
7558  if(!_maybe_scan_following_colon())
7559  {
7560  _c4dbgp("runk: set val ref");
7561  _handle_annotations_before_blck_val_scalar();
7562  m_evt_handler->set_val_ref(ref);
7563  }
7564  else
7565  {
7566  _c4dbgp("runk: start new block map, set ref as key");
7567  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7568  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7569  _handle_annotations_before_start_mapblck(startline);
7570  m_evt_handler->begin_map_val_block();
7571  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7572  m_evt_handler->set_key_ref(ref);
7573  _maybe_skip_whitespace_tokens();
7574  _set_indentation(startindent);
7575  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7576  }
7577  }
7578  else if(first == '!')
7579  {
7580  csubstr tag = _scan_tag();
7581  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7582  // we need to buffer the tags, as there may be two
7583  // consecutive tags in here
7584  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7585  const size_t line = m_evt_handler->m_curr->pos.line;
7586  _add_annotation(&m_pending_tags, tag, indentation, line);
7587  }
7588  else
7589  {
7590  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7591  _maybe_skip_whitespace_tokens();
7592  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7593  if(!s.len)
7594  return;
7595  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7596  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7597  first = s.str[0];
7598  ScannedScalar sc;
7599  if(first == '\'')
7600  {
7601  _c4dbgp("runk: scanning single-quoted scalar");
7602  m_evt_handler->check_trailing_doc_token();
7603  _maybe_begin_doc();
7604  add_flags(RDOC);
7605  m_doc_empty = false;
7606  sc = _scan_scalar_squot();
7607  if(!_maybe_scan_following_colon())
7608  {
7609  _c4dbgp("runk: set as val");
7610  _handle_annotations_before_blck_val_scalar();
7611  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7612  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7613  }
7614  else
7615  {
7616  _c4dbgp("runk: start new block map, set scalar as key");
7617  _handle_annotations_before_start_mapblck(startline);
7618  _handle_colon();
7619  m_evt_handler->begin_map_val_block();
7620  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7621  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7622  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7623  _maybe_skip_whitespace_tokens();
7624  _set_indentation(startindent);
7625  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7626  }
7627  }
7628  else if(first == '"')
7629  {
7630  _c4dbgp("runk: scanning double-quoted scalar");
7631  m_evt_handler->check_trailing_doc_token();
7632  _maybe_begin_doc();
7633  add_flags(RDOC);
7634  m_doc_empty = false;
7635  sc = _scan_scalar_dquot();
7636  if(!_maybe_scan_following_colon())
7637  {
7638  _c4dbgp("runk: set as val");
7639  _handle_annotations_before_blck_val_scalar();
7640  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7641  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7642  }
7643  else
7644  {
7645  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7646  _handle_annotations_before_start_mapblck(startline);
7647  m_evt_handler->begin_map_val_block();
7648  _handle_colon();
7649  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7650  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7651  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7652  _maybe_skip_whitespace_tokens();
7653  _set_indentation(startindent);
7654  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7655  }
7656  }
7657  else if(first == '|')
7658  {
7659  _c4dbgp("runk: scanning block-literal scalar");
7660  m_evt_handler->check_trailing_doc_token();
7661  _maybe_begin_doc();
7662  add_flags(RDOC);
7663  m_doc_empty = false;
7664  ScannedBlock sb;
7665  _scan_block(&sb, startindent);
7666  if(C4_LIKELY(!_maybe_scan_following_colon()))
7667  {
7668  _c4dbgp("runk: set as val");
7669  _handle_annotations_before_blck_val_scalar();
7670  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7671  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7672  }
7673  else
7674  {
7675  _c4err("block literal keys must be enclosed in '?'");
7676  }
7677  }
7678  else if(first == '>')
7679  {
7680  _c4dbgp("runk: scanning block-folded scalar");
7681  m_evt_handler->check_trailing_doc_token();
7682  _maybe_begin_doc();
7683  add_flags(RDOC);
7684  m_doc_empty = false;
7685  ScannedBlock sb;
7686  _scan_block(&sb, startindent);
7687  if(C4_LIKELY(!_maybe_scan_following_colon()))
7688  {
7689  _c4dbgp("runk: set as val");
7690  _handle_annotations_before_blck_val_scalar();
7691  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7692  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7693  }
7694  else
7695  {
7696  _c4err("block folded keys must be enclosed in '?'");
7697  }
7698  }
7699  else if(_scan_scalar_plain_unk(&sc))
7700  {
7701  _c4dbgp("runk: got a plain scalar");
7702  m_evt_handler->check_trailing_doc_token();
7703  _maybe_begin_doc();
7704  add_flags(RDOC);
7705  m_doc_empty = false;
7706  if(!_maybe_scan_following_colon())
7707  {
7708  _c4dbgp("runk: set as val");
7709  _handle_annotations_before_blck_val_scalar();
7710  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7711  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7712  }
7713  else
7714  {
7715  _c4dbgp("runk: start new block map, set scalar as key");
7716  _handle_annotations_before_start_mapblck(startline);
7717  _handle_colon();
7718  m_evt_handler->begin_map_val_block();
7719  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7720  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7721  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7722  _maybe_skip_whitespace_tokens();
7723  _set_indentation(startindent);
7724  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7725  }
7726  }
7727  }
7728 }
7729 
7730 
7731 //-----------------------------------------------------------------------------
7732 
7733 template<class EventHandler>
7734 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7735 {
7736  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7737 
7738  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
7739 
7740  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7741  if(has_any(RNXT))
7742  {
7743  _c4dbgp("usty[RNXT]: finishing!");
7744  _end_stream();
7745  }
7746  #endif
7747 
7748  _maybe_skip_comment();
7749  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7750  if(!rem.len)
7751  return;
7752 
7753  size_t pos = rem.first_not_of(" \t");
7754  if(pos)
7755  {
7756  pos = pos != npos ? pos : rem.len;
7757  _c4dbgpf("skipping indentation of {}", pos);
7758  _line_progressed(pos);
7759  rem = m_evt_handler->m_curr->line_contents.rem;
7760  if(!rem.len)
7761  return;
7762  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7763  }
7764 
7765  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7766  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7767  char first = rem.str[0];
7768  if(has_any(RSEQ)) // destination is a sequence
7769  {
7770  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7771  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7772  if(first == '[')
7773  {
7774  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7775  add_flags(RNXT);
7776  m_evt_handler->_push();
7777  addrem_flags(FLOW|RVAL, RNXT|USTY);
7778  _set_indentation(startindent);
7779  _line_progressed(1);
7780  _maybe_skip_whitespace_tokens();
7781  }
7782  else if(first == '-' && _is_blck_token(rem))
7783  {
7784  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7785  add_flags(RNXT);
7786  m_evt_handler->_push();
7787  addrem_flags(BLCK|RVAL, RNXT|USTY);
7788  _set_indentation(startindent);
7789  _line_progressed(1);
7790  _maybe_skip_whitespace_tokens();
7791  }
7792  else
7793  {
7794  _c4err("can only parse a seq into an existing seq");
7795  }
7796  }
7797  else if(has_any(RMAP)) // destination is a map
7798  {
7799  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7800  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7801  if(first == '{')
7802  {
7803  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7804  add_flags(RNXT);
7805  _handle_annotations_before_blck_val_scalar();
7806  m_evt_handler->_push();
7807  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7808  _set_indentation(startindent);
7809  _line_progressed(1);
7810  _maybe_skip_whitespace_tokens();
7811  }
7812  else if(first == '?' && _is_blck_token(rem))
7813  {
7814  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7815  add_flags(RNXT);
7816  _handle_annotations_before_blck_val_scalar();
7817  m_evt_handler->_push();
7818  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7819  m_was_inside_qmrk = true;
7820  _save_indentation();
7821  _line_progressed(1);
7822  _maybe_skip_whitespace_tokens();
7823  }
7824  else if(first == ':' && _is_blck_token(rem))
7825  {
7826  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7827  add_flags(RNXT);
7828  _handle_annotations_before_blck_val_scalar();
7829  m_evt_handler->_push();
7830  m_evt_handler->set_key_scalar_plain_empty();
7831  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7832  _save_indentation();
7833  _line_progressed(1);
7834  _maybe_skip_whitespace_tokens();
7835  }
7836  else if(rem.begins_with('&'))
7837  {
7838  csubstr anchor = _scan_anchor();
7839  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7840  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7841  const size_t line = m_evt_handler->m_curr->pos.line;
7842  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7843  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7844  }
7845  else if(first == '*')
7846  {
7847  csubstr ref = _scan_ref_map();
7848  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7849  if(!_maybe_scan_following_colon())
7850  {
7851  _c4err("cannot read a VAL to a map");
7852  }
7853  else
7854  {
7855  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7856  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7857  add_flags(RNXT);
7858  _handle_annotations_before_start_mapblck(startline);
7859  m_evt_handler->_push();
7860  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7861  m_evt_handler->set_key_ref(ref);
7862  _maybe_skip_whitespace_tokens();
7863  _set_indentation(startindent);
7864  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7865  }
7866  }
7867  else if(first == '!')
7868  {
7869  csubstr tag = _scan_tag();
7870  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7871  // we need to buffer the tags, as there may be two
7872  // consecutive tags in here
7873  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7874  const size_t line = m_evt_handler->m_curr->pos.line;
7875  _add_annotation(&m_pending_tags, tag, indentation, line);
7876  }
7877  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7878  {
7879  _c4err("cannot parse a seq into an existing map");
7880  }
7881  else
7882  {
7883  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7884  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7885  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7886  ScannedScalar sc;
7887  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7888  if(first == '\'')
7889  {
7890  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7891  sc = _scan_scalar_squot();
7892  if(!_maybe_scan_following_colon())
7893  {
7894  _c4err("cannot read a VAL to a map");
7895  }
7896  else
7897  {
7898  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7899  add_flags(RNXT);
7900  _handle_annotations_before_start_mapblck(startline);
7901  m_evt_handler->_push();
7902  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7903  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7904  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7905  _set_indentation(startindent);
7906  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7907  _maybe_skip_whitespace_tokens();
7908  }
7909  }
7910  else if(first == '"')
7911  {
7912  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
7913  sc = _scan_scalar_dquot();
7914  if(!_maybe_scan_following_colon())
7915  {
7916  _c4err("cannot read a VAL to a map");
7917  }
7918  else
7919  {
7920  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
7921  add_flags(RNXT);
7922  _handle_annotations_before_start_mapblck(startline);
7923  m_evt_handler->_push();
7924  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7925  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7926  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7927  _set_indentation(startindent);
7928  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7929  _maybe_skip_whitespace_tokens();
7930  }
7931  }
7932  else if(first == '|')
7933  {
7934  _c4err("block literal keys must be enclosed in '?'");
7935  }
7936  else if(first == '>')
7937  {
7938  _c4err("block literal keys must be enclosed in '?'");
7939  }
7940  else if(_scan_scalar_plain_unk(&sc))
7941  {
7942  _c4dbgp("usty[RMAP]: got a plain scalar");
7943  if(!_maybe_scan_following_colon())
7944  {
7945  _c4err("cannot read a VAL to a map");
7946  }
7947  else
7948  {
7949  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7950  add_flags(RNXT);
7951  _handle_annotations_before_start_mapblck(startline);
7952  m_evt_handler->_push();
7953  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7954  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7955  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7956  _set_indentation(startindent);
7957  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7958  _maybe_skip_whitespace_tokens();
7959  }
7960  }
7961  else
7962  {
7963  _c4err("parse error");
7964  }
7965  }
7966  }
7967  else // destination is unknown
7968  {
7969  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7970  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
7971  if(first == '[')
7972  {
7973  _c4dbgp("usty[UNK]: it's a flow seq");
7974  add_flags(RNXT);
7975  _handle_annotations_before_blck_val_scalar();
7976  m_evt_handler->begin_seq_val_flow();
7977  addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
7978  _set_indentation(startindent);
7979  _line_progressed(1);
7980  _maybe_skip_whitespace_tokens();
7981  }
7982  else if(first == '-' && _is_blck_token(rem))
7983  {
7984  _c4dbgp("usty[UNK]: it's a block seq");
7985  add_flags(RNXT);
7986  _handle_annotations_before_blck_val_scalar();
7987  m_evt_handler->begin_seq_val_block();
7988  addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
7989  _set_indentation(startindent);
7990  _line_progressed(1);
7991  _maybe_skip_whitespace_tokens();
7992  }
7993  else if(first == '{')
7994  {
7995  _c4dbgp("usty[UNK]: it's a flow map");
7996  add_flags(RNXT);
7997  _handle_annotations_before_blck_val_scalar();
7998  m_evt_handler->begin_map_val_flow();
7999  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
8000  _set_indentation(startindent);
8001  _line_progressed(1);
8002  _maybe_skip_whitespace_tokens();
8003  }
8004  else if(first == '?' && _is_blck_token(rem))
8005  {
8006  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8007  add_flags(RNXT);
8008  _handle_annotations_before_blck_val_scalar();
8009  m_evt_handler->begin_map_val_block();
8010  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
8011  m_was_inside_qmrk = true;
8012  _save_indentation();
8013  _line_progressed(1);
8014  _maybe_skip_whitespace_tokens();
8015  }
8016  else if(first == ':' && _is_blck_token(rem))
8017  {
8018  _c4dbgp("usty[UNK]: it's a map with an empty key");
8019  add_flags(RNXT);
8020  _handle_annotations_before_blck_val_scalar();
8021  m_evt_handler->begin_map_val_block();
8022  m_evt_handler->set_key_scalar_plain_empty();
8023  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8024  _save_indentation();
8025  _line_progressed(1);
8026  _maybe_skip_whitespace_tokens();
8027  }
8028  else if(first == '&')
8029  {
8030  csubstr anchor = _scan_anchor();
8031  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8032  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8033  const size_t line = m_evt_handler->m_curr->pos.line;
8034  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8035  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8036  }
8037  else if(first == '*')
8038  {
8039  csubstr ref = _scan_ref_map();
8040  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8041  if(!_maybe_scan_following_colon())
8042  {
8043  _c4dbgp("usty[UNK]: set val ref");
8044  _handle_annotations_before_blck_val_scalar();
8045  m_evt_handler->set_val_ref(ref);
8046  }
8047  else
8048  {
8049  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8050  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8051  add_flags(RNXT);
8052  _handle_annotations_before_start_mapblck(startline);
8053  m_evt_handler->begin_map_val_block();
8054  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8055  m_evt_handler->set_key_ref(ref);
8056  _maybe_skip_whitespace_tokens();
8057  _set_indentation(startindent);
8058  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8059  }
8060  }
8061  else if(first == '!')
8062  {
8063  csubstr tag = _scan_tag();
8064  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8065  // we need to buffer the tags, as there may be two
8066  // consecutive tags in here
8067  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8068  const size_t line = m_evt_handler->m_curr->pos.line;
8069  _add_annotation(&m_pending_tags, tag, indentation, line);
8070  }
8071  else
8072  {
8073  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
8074  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8075  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8076  first = rem.str[0];
8077  ScannedScalar sc;
8078  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8079  if(first == '\'')
8080  {
8081  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8082  sc = _scan_scalar_squot();
8083  if(!_maybe_scan_following_colon())
8084  {
8085  _c4dbgp("usty[UNK]: set as val");
8086  _handle_annotations_before_blck_val_scalar();
8087  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8088  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8089  _end_stream();
8090  }
8091  else
8092  {
8093  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8094  add_flags(RNXT);
8095  _handle_annotations_before_start_mapblck(startline);
8096  m_evt_handler->begin_map_val_block();
8097  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8098  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8099  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8100  _set_indentation(startindent);
8101  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8102  _maybe_skip_whitespace_tokens();
8103  }
8104  }
8105  else if(first == '"')
8106  {
8107  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8108  sc = _scan_scalar_dquot();
8109  if(!_maybe_scan_following_colon())
8110  {
8111  _c4dbgp("usty[UNK]: set as val");
8112  _handle_annotations_before_blck_val_scalar();
8113  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8114  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8115  _end_stream();
8116  }
8117  else
8118  {
8119  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8120  add_flags(RNXT);
8121  _handle_annotations_before_start_mapblck(startline);
8122  m_evt_handler->begin_map_val_block();
8123  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8124  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8125  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8126  _set_indentation(startindent);
8127  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8128  _maybe_skip_whitespace_tokens();
8129  }
8130  }
8131  else if(first == '|')
8132  {
8133  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8134  ScannedBlock sb;
8135  _scan_block(&sb, startindent);
8136  _c4dbgp("usty[UNK]: set as val");
8137  _handle_annotations_before_blck_val_scalar();
8138  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8139  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8140  _end_stream();
8141  }
8142  else if(first == '>')
8143  {
8144  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8145  ScannedBlock sb;
8146  _scan_block(&sb, startindent);
8147  _c4dbgp("usty[UNK]: set as val");
8148  _handle_annotations_before_blck_val_scalar();
8149  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8150  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8151  _end_stream();
8152  }
8153  else if(_scan_scalar_plain_unk(&sc))
8154  {
8155  _c4dbgp("usty[UNK]: got a plain scalar");
8156  if(!_maybe_scan_following_colon())
8157  {
8158  _c4dbgp("usty[UNK]: set as val");
8159  _handle_annotations_before_blck_val_scalar();
8160  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8161  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8162  _end_stream();
8163  }
8164  else
8165  {
8166  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8167  add_flags(RNXT);
8168  _handle_annotations_before_start_mapblck(startline);
8169  m_evt_handler->begin_map_val_block();
8170  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8171  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8172  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8173  _set_indentation(startindent);
8174  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8175  _maybe_skip_whitespace_tokens();
8176  }
8177  }
8178  else
8179  {
8180  _c4err("parse error");
8181  }
8182  }
8183  }
8184 }
8185 
8186 
8187 //-----------------------------------------------------------------------------
8188 
8189 template<class EventHandler>
8190 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8191 {
8192  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8193  m_file = filename;
8194  m_buf = src;
8195  _reset();
8196  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8197  m_evt_handler->begin_stream();
8198  while( ! _finished_file())
8199  {
8200  _scan_line();
8201  while( ! _finished_line())
8202  {
8203  _c4dbgnextline();
8204  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8205  if(has_any(RSEQ))
8206  {
8207  _handle_seq_json();
8208  }
8209  else if(has_any(RMAP))
8210  {
8211  _handle_map_json();
8212  }
8213  else if(has_any(RUNK))
8214  {
8215  _handle_unk_json();
8216  }
8217  else
8218  {
8219  _c4err("internal error");
8220  }
8221  }
8222  if(_finished_file())
8223  break; // it may have finished because of multiline blocks
8224  _line_ended();
8225  }
8226  _end_stream();
8227  m_evt_handler->finish_parse();
8228 }
8229 
8230 
8231 //-----------------------------------------------------------------------------
8232 
8233 template<class EventHandler>
8234 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8235 {
8236  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8237  m_file = filename;
8238  m_buf = src;
8239  _reset();
8240  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8241  m_evt_handler->begin_stream();
8242  while( ! _finished_file())
8243  {
8244  _scan_line();
8245  while( ! _finished_line())
8246  {
8247  _c4dbgnextline();
8248  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8249  if(has_any(FLOW))
8250  {
8251  if(has_none(RSEQIMAP))
8252  {
8253  if(has_any(RSEQ))
8254  {
8255  _handle_seq_flow();
8256  }
8257  else
8258  {
8259  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8260  _handle_map_flow();
8261  }
8262  }
8263  else
8264  {
8265  _handle_seq_imap();
8266  }
8267  }
8268  else if(has_any(BLCK))
8269  {
8270  if(has_any(RSEQ))
8271  {
8272  _handle_seq_block();
8273  }
8274  else
8275  {
8276  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8277  _handle_map_block();
8278  }
8279  }
8280  else if(has_any(RUNK))
8281  {
8282  _handle_unk();
8283  }
8284  else if(has_any(USTY))
8285  {
8286  _handle_usty();
8287  }
8288  else
8289  {
8290  _c4err("internal error");
8291  }
8292  }
8293  if(_finished_file())
8294  break; // it may have finished because of multiline blocks
8295  _line_ended();
8296  }
8297  _end_stream();
8298  m_evt_handler->finish_parse();
8299 }
8300 /** @endcond */
8301 
8302 } // namespace yml
8303 } // namespace c4
8304 
8305 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
8306 
8307 #undef _c4dbgnextline
8308 
8309 #if defined(_MSC_VER)
8310 # pragma warning(pop)
8311 #elif defined(__clang__)
8312 # pragma clang diagnostic pop
8313 #elif defined(__GNUC__)
8314 # pragma GCC diagnostic pop
8315 #endif
8316 
8317 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_ERRMSG_SIZE
size for the error message buffer
Definition: common.hpp:24
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:49
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1547
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:890
size_t to_chars(substr buf, uint8_t v) noexcept
Definition: charconv.hpp:2328
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
Definition: common.hpp:267
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
Encoding_e
Definition: common.hpp:428
@ UTF16BE
Definition: common.hpp:432
@ UTF8
Definition: common.hpp:430
@ UTF16LE
Definition: common.hpp:431
@ NOBOM
Definition: common.hpp:429
@ UTF32BE
Definition: common.hpp:434
@ UTF32LE
Definition: common.hpp:433
Definition: common.cpp:12
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _c4err_(fmt,...)
#define _c4err(fmt)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark