rapidyaml  0.8.0
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 
9 #include <ctype.h>
10 
11 #include "c4/yml/detail/parser_dbg.hpp"
13 #ifdef RYML_DBG
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
16 #endif
17 
18 
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
23 #else
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
27 #endif
28 
29 
30 // scaffold:
31 #define _c4dbgnextline() \
32  do { \
33  _c4dbgq("\n-----------"); \
34  _c4dbgt("handling line={}, offset={}B", \
35  m_evt_handler->m_curr->pos.line, \
36  m_evt_handler->m_curr->pos.offset); \
37  } while(0)
38 
39 
40 #if defined(_MSC_VER)
41 # pragma warning(push)
42 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
43 # pragma warning(disable: 4702/*unreachable code*/)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
54 # if __GNUC__ >= 7
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
56 # endif
57 #endif
58 
59 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
60 
61 namespace c4 {
62 namespace yml {
63 
64 namespace { // NOLINT
65 
66 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
67 {
68  RYML_ASSERT(s.len > 0);
69  RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
70  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
71 }
72 
73 inline bool _is_doc_begin_token(csubstr s)
74 {
75  RYML_ASSERT(s.begins_with('-'));
76  RYML_ASSERT(!s.ends_with("\n"));
77  RYML_ASSERT(!s.ends_with("\r"));
78  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
79  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
80 }
81 
82 inline bool _is_doc_end_token(csubstr s)
83 {
84  RYML_ASSERT(s.begins_with('.'));
85  RYML_ASSERT(!s.ends_with("\n"));
86  RYML_ASSERT(!s.ends_with("\r"));
87  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
88  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
89 }
90 
91 inline bool _is_doc_token(csubstr s) noexcept
92 {
93  //
94  // NOTE: this function was failing under some scenarios when
95  // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
96  // related to optimizer assumptions on the input string and
97  // possibly caused from UB around assignment to that string (the
98  // call site was in _scan_block()). For more details see:
99  //
100  // https://github.com/biojppm/rapidyaml/issues/440
101  //
102  // The current version does not suffer this problem, but it may
103  // appear again.
104  //
105  //
106  // UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
107  // (but not any other optimization level, nor any other compiler
108  // or version), because the assignment to s is being hoisted out
109  // of the loop which calls this function. Then the length doesn't
110  // enter the s.len >= 3 when it should. Adding a
111  // C4_DONT_OPTIMIZE(var) makes the problem go away.
112  //
113  if(s.len >= 3)
114  {
115  switch(s.str[0])
116  {
117  case '-':
118  //return _is_doc_begin_token(s); // this was failing with gcc -O2
119  return (s.str[1] == '-' && s.str[2] == '-')
120  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
121  case '.':
122  //return _is_doc_end_token(s); // this was failing with gcc -O2
123  return (s.str[1] == '.' && s.str[2] == '.')
124  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
125  }
126  }
127  return false;
128 }
129 
130 inline size_t _is_special_json_scalar(csubstr s)
131 {
132  RYML_ASSERT(s.len);
133  switch(s.str[0])
134  {
135  case 'f':
136  if(s.len >= 5 && s.begins_with("false"))
137  return 5u;
138  break;
139  case 't':
140  if(s.len >= 4 && s.begins_with("true"))
141  return 4u;
142  break;
143  case 'n':
144  if(s.len >= 4 && s.begins_with("null"))
145  return 4u;
146  break;
147  }
148  return 0u;
149 }
150 
151 
152 //-----------------------------------------------------------------------------
153 
154 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
155 {
156  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
157 }
158 
159 //! look for the next newline chars, and jump to the right of those
160 inline substr from_next_line(substr rem)
161 {
162  size_t nlpos = rem.first_of("\r\n");
163  if(nlpos == csubstr::npos)
164  return {};
165  const char nl = rem[nlpos];
166  rem = rem.right_of(nlpos);
167  if(rem.empty())
168  return {};
169  if(_extend_from_combined_newline(nl, rem.front()))
170  rem = rem.sub(1);
171  return rem;
172 }
173 
174 
175 //-----------------------------------------------------------------------------
176 
177 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
178 {
179  RYML_ASSERT(r[*i] == '\n');
180  size_t numnl_following = 0;
181  ++(*i);
182  for( ; *i < r.len; ++(*i))
183  {
184  if(r.str[*i] == '\n')
185  ++numnl_following;
186  // skip leading whitespace
187  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
188  ;
189  else
190  break;
191  }
192  return numnl_following;
193 }
194 
195 /** @p i is set to the first non whitespace character after the line
196  * @return the number of empty lines after the initial position */
197 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
198 {
199  RYML_ASSERT(r[*i] == '\n');
200  size_t numnl_following = 0;
201  ++(*i);
202  if(indentation == 0)
203  {
204  for( ; *i < r.len; ++(*i))
205  {
206  if(r.str[*i] == '\n')
207  ++numnl_following;
208  // skip leading whitespace
209  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
210  ;
211  else
212  break;
213  }
214  }
215  else
216  {
217  for( ; *i < r.len; ++(*i))
218  {
219  if(r.str[*i] == '\n')
220  {
221  ++numnl_following;
222  // skip the indentation after the newline
223  size_t stop = *i + indentation;
224  for( ; *i < r.len; ++(*i))
225  {
226  if(r.str[*i] != ' ' && r.str[*i] != '\r')
227  break;
228  RYML_ASSERT(*i < stop);
229  }
230  C4_UNUSED(stop);
231  }
232  // skip leading whitespace
233  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
234  ;
235  else
236  break;
237  }
238  }
239  return numnl_following;
240 }
241 
242 } // anon namespace
243 
244 
245 //-----------------------------------------------------------------------------
246 //-----------------------------------------------------------------------------
247 //-----------------------------------------------------------------------------
248 
249 template<class EventHandler>
251 {
252  _free();
253  _clr();
254 }
255 
256 template<class EventHandler>
258  : m_options(opts)
259  , m_file()
260  , m_buf()
261  , m_evt_handler(evt_handler)
262  , m_pending_anchors()
263  , m_pending_tags()
264  , m_was_inside_qmrk(false)
265  , m_doc_empty(false)
266  , m_encoding(NOBOM)
267  , m_newline_offsets()
268  , m_newline_offsets_size(0)
269  , m_newline_offsets_capacity(0)
270  , m_newline_offsets_buf()
271 {
272  RYML_CHECK(evt_handler);
273 }
274 
275 template<class EventHandler>
277  : m_options(that.m_options)
278  , m_file(that.m_file)
279  , m_buf(that.m_buf)
280  , m_evt_handler(that.m_evt_handler)
281  , m_pending_anchors(that.m_pending_anchors)
282  , m_pending_tags(that.m_pending_tags)
283  , m_was_inside_qmrk(false)
284  , m_doc_empty(false)
285  , m_encoding(NOBOM)
286  , m_newline_offsets(that.m_newline_offsets)
287  , m_newline_offsets_size(that.m_newline_offsets_size)
288  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
289  , m_newline_offsets_buf(that.m_newline_offsets_buf)
290 {
291  that._clr();
292 }
293 
294 template<class EventHandler>
296  : m_options(that.m_options)
297  , m_file(that.m_file)
298  , m_buf(that.m_buf)
299  , m_evt_handler(that.m_evt_handler)
300  , m_pending_anchors(that.m_pending_anchors)
301  , m_pending_tags(that.m_pending_tags)
302  , m_was_inside_qmrk(false)
303  , m_doc_empty(false)
304  , m_encoding(NOBOM)
305  , m_newline_offsets()
306  , m_newline_offsets_size()
307  , m_newline_offsets_capacity()
308  , m_newline_offsets_buf()
309 {
310  if(that.m_newline_offsets_capacity)
311  {
312  _resize_locations(that.m_newline_offsets_capacity);
313  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
314  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
315  m_newline_offsets_size = that.m_newline_offsets_size;
316  }
317 }
318 
319 template<class EventHandler>
321 {
322  _free();
323  m_options = (that.m_options);
324  m_file = (that.m_file);
325  m_buf = (that.m_buf);
326  m_evt_handler = that.m_evt_handler;
327  m_pending_anchors = that.m_pending_anchors;
328  m_pending_tags = that.m_pending_tags;
329  m_was_inside_qmrk = that.m_was_inside_qmrk;
330  m_doc_empty = that.m_doc_empty;
331  m_encoding = that.m_encoding;
332  m_newline_offsets = (that.m_newline_offsets);
333  m_newline_offsets_size = (that.m_newline_offsets_size);
334  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
335  m_newline_offsets_buf = (that.m_newline_offsets_buf);
336  that._clr();
337  return *this;
338 }
339 
340 template<class EventHandler>
342 {
343  if(&that != this)
344  {
345  _free();
346  m_options = (that.m_options);
347  m_file = (that.m_file);
348  m_buf = (that.m_buf);
349  m_evt_handler = that.m_evt_handler;
350  m_pending_anchors = that.m_pending_anchors;
351  m_pending_tags = that.m_pending_tags;
352  m_was_inside_qmrk = that.m_was_inside_qmrk;
353  m_doc_empty = that.m_doc_empty;
354  m_encoding = that.m_encoding;
355  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
356  _resize_locations(that.m_newline_offsets_capacity);
357  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
358  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
359  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
360  m_newline_offsets_size = that.m_newline_offsets_size;
361  m_newline_offsets_buf = that.m_newline_offsets_buf;
362  }
363  return *this;
364 }
365 
366 template<class EventHandler>
368 {
369  m_options = {};
370  m_file = {};
371  m_buf = {};
372  m_evt_handler = {};
373  m_pending_anchors = {};
374  m_pending_tags = {};
375  m_was_inside_qmrk = false;
376  m_doc_empty = true;
377  m_encoding = NOBOM;
378  m_newline_offsets = {};
379  m_newline_offsets_size = {};
380  m_newline_offsets_capacity = {};
381  m_newline_offsets_buf = {};
382 }
383 
384 template<class EventHandler>
385 void ParseEngine<EventHandler>::_free()
386 {
387  if(m_newline_offsets)
388  {
389  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
390  m_newline_offsets = nullptr;
391  m_newline_offsets_size = 0u;
392  m_newline_offsets_capacity = 0u;
393  m_newline_offsets_buf = nullptr;
394  }
395 }
396 
397 
398 //-----------------------------------------------------------------------------
399 
400 template<class EventHandler>
401 void ParseEngine<EventHandler>::_reset()
402 {
403  m_pending_anchors = {};
404  m_pending_tags = {};
405  m_doc_empty = true;
406  m_was_inside_qmrk = false;
407  m_encoding = NOBOM;
408  if(m_options.locations())
409  {
410  _prepare_locations();
411  }
412 }
413 
414 
415 //-----------------------------------------------------------------------------
416 
417 template<class EventHandler>
418 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
419 {
420  #define _ryml_relocate(s) \
421  if((s).is_sub(prev_arena)) \
422  { \
423  (s).str = next_arena.str + ((s).str - prev_arena.str); \
424  }
425  _ryml_relocate(m_buf);
426  _ryml_relocate(m_newline_offsets_buf);
427  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
428  _ryml_relocate(m_pending_tags.annotations[i].str);
429  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
430  _ryml_relocate(m_pending_anchors.annotations[i].str);
431  #undef _ryml_relocate
432 }
433 
434 template<class EventHandler>
435 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
436 {
437  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
438 }
439 
440 
441 //-----------------------------------------------------------------------------
442 
443 template<class EventHandler>
444 template<class DumpFn>
445 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
446 {
447  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
448  auto const& lc = st->line_contents;
449  csubstr contents = lc.stripped;
450  if(contents.len)
451  {
452  // print the yaml src line
453  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
454  if(m_file.len)
455  {
456  detail::_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
457  offs += m_file.len + 1;
458  }
459  detail::_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
460  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
461  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
462  detail::_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
463  // highlight the remaining portion of the previous line
464  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
465  size_t lastcol = firstcol + lc.rem.len;
466  for(size_t i = 0; i < offs + firstcol; ++i)
467  std::forward<DumpFn>(dumpfn)(" ");
468  std::forward<DumpFn>(dumpfn)("^");
469  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
470  std::forward<DumpFn>(dumpfn)("~");
471  detail::_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
472  }
473  else
474  {
475  std::forward<DumpFn>(dumpfn)("\n");
476  }
477 
478 #ifdef RYML_DBG
479  // next line: print the state flags
480  {
481  char flagbuf_[128];
482  detail::_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
483  }
484 #endif
485 }
486 
487 
488 //-----------------------------------------------------------------------------
489 
490 template<class EventHandler>
491 template<class ...Args>
492 void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
493 {
494  char errmsg[RYML_ERRMSG_SIZE];
495  detail::_SubstrWriter writer(errmsg);
496  auto dumpfn = [&writer](csubstr s){ writer.append(s); };
497  detail::_dump(dumpfn, fmt, args...);
498  writer.append('\n');
499  _fmt_msg(dumpfn);
500  size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
501  m_evt_handler->cancel_parse();
502  m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
503 }
504 
505 
506 //-----------------------------------------------------------------------------
507 #ifdef RYML_DBG
508 template<class EventHandler>
509 template<class ...Args>
510 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
511 {
512  if(_dbg_enabled())
513  {
514  auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
515  detail::_dump(dumpfn, fmt, args...);
516  dumpfn("\n");
517  _fmt_msg(dumpfn);
518  }
519 }
520 #endif
521 
522 
523 //-----------------------------------------------------------------------------
524 template<class EventHandler>
525 bool ParseEngine<EventHandler>::_finished_file() const
526 {
527  bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
528  if(ret)
529  {
530  _c4dbgp("finished file!!!");
531  }
532  return ret;
533 }
534 
535 template<class EventHandler>
536 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
537 {
538  return m_evt_handler->m_curr->line_contents.rem.empty();
539 }
540 
541 
542 //-----------------------------------------------------------------------------
543 
544 template<class EventHandler>
545 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
546 {
547  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
548  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
549  {
550  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
551  if(pos == npos)
552  pos = rem.len; // maybe the line is just all whitespace
553  _c4dbgpf("skip {} whitespace characters", pos);
554  _line_progressed(pos);
555  }
556 }
557 
558 template<class EventHandler>
559 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
560 {
561  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
562  if(rem.len && rem.str[0] == c)
563  {
564  size_t pos = rem.first_not_of(c);
565  if(pos == npos)
566  pos = rem.len; // maybe the line is just all c
567  _c4dbgpf("skip {}x'{}'", pos, c);
568  _line_progressed(pos);
569  }
570 }
571 
572 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
573 template<class EventHandler>
574 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
575 {
576  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
577  if(rem.len && rem.str[0] == c)
578  {
579  size_t pos = rem.first_not_of(c);
580  if(pos == npos)
581  pos = rem.len; // maybe the line is just all c
582  if(pos > max_to_skip)
583  pos = max_to_skip;
584  _c4dbgpf("skip {}x'{}'", pos, c);
585  _line_progressed(pos);
586  }
587 }
588 #endif
589 
590 template<class EventHandler>
591 template<size_t N>
592 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
593 {
594  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
595  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
596  if(pos == npos)
597  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
598  _c4dbgpf("skip {} characters", pos);
599  _line_progressed(pos);
600 }
601 
602 template<class EventHandler>
603 void ParseEngine<EventHandler>::_skip_comment()
604 {
605  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
606  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
607  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
608  csubstr full = m_evt_handler->m_curr->line_contents.full;
609  // raise an error if the comment is not preceded by whitespace
610  if(!full.begins_with('#'))
611  {
612  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
613  const char c = full[(size_t)(rem.str - full.str - 1)];
614  if(C4_UNLIKELY(c != ' ' && c != '\t'))
615  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
616  }
617  else
618  {
619  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
620  }
621  _c4dbgpf("comment was '{}'", rem);
622  _line_progressed(rem.len);
623 }
624 
625 template<class EventHandler>
626 void ParseEngine<EventHandler>::_maybe_skip_comment()
627 {
628  csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
629  if(s.begins_with('#'))
630  {
631  _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
632  _skip_comment();
633  }
634 }
635 
636 template<class EventHandler>
637 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
638 {
639  if(m_evt_handler->m_curr->line_contents.rem.len)
640  {
641  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
642  {
643  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
644  if(pos == npos)
645  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
646  _c4dbgpf("skip {}x'{}'", pos, ' ');
647  _line_progressed(pos);
648  }
649  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
650  {
651  _c4dbgp("found ':' colon next");
652  _line_progressed(1);
653  return true;
654  }
655  }
656  return false;
657 }
658 
659 template<class EventHandler>
660 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
661 {
662  if(m_evt_handler->m_curr->line_contents.rem.len)
663  {
664  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
665  {
666  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
667  if(pos == npos)
668  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
669  _c4dbgpf("skip {}x'{}'", pos, ' ');
670  _line_progressed(pos);
671  }
672  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
673  {
674  _c4dbgp("found ',' comma next");
675  _line_progressed(1);
676  return true;
677  }
678  }
679  return false;
680 }
681 
682 
683 //-----------------------------------------------------------------------------
684 
685 template<class EventHandler>
686 csubstr ParseEngine<EventHandler>::_scan_anchor()
687 {
688  csubstr s = m_evt_handler->m_curr->line_contents.rem;
689  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
690  csubstr anchor = s.range(1, s.first_of(' '));
691  _line_progressed(1u + anchor.len);
692  _maybe_skipchars(' ');
693  return anchor;
694 }
695 
696 template<class EventHandler>
697 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
698 {
699  csubstr s = m_evt_handler->m_curr->line_contents.rem;
700  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
701  csubstr ref = s.first(s.first_of(",] :"));
702  _line_progressed(ref.len);
703  return ref;
704 }
705 
706 template<class EventHandler>
707 csubstr ParseEngine<EventHandler>::_scan_ref_map()
708 {
709  csubstr s = m_evt_handler->m_curr->line_contents.rem;
710  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
711  csubstr ref = s.first(s.first_of(",} "));
712  _line_progressed(ref.len);
713  return ref;
714 }
715 
716 template<class EventHandler>
717 csubstr ParseEngine<EventHandler>::_scan_tag()
718 {
719  csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
720  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
721  csubstr t;
722  if(rem.begins_with("!!"))
723  {
724  _c4dbgp("begins with '!!'");
725  if(has_any(FLOW))
726  t = rem.left_of(rem.first_of(" ,"));
727  else
728  t = rem.left_of(rem.first_of(' '));
729  }
730  else if(rem.begins_with("!<"))
731  {
732  _c4dbgp("begins with '!<'");
733  t = rem.left_of(rem.first_of('>'), true);
734  }
735  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
736  else if(rem.begins_with("!h!"))
737  {
738  _c4dbgp("begins with '!h!'");
739  t = rem.left_of(rem.first_of(' '));
740  }
741  #endif
742  else
743  {
744  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
745  _c4dbgp("begins with '!'");
746  if(has_any(FLOW))
747  t = rem.left_of(rem.first_of(" ,"));
748  else
749  t = rem.left_of(rem.first_of(' '));
750  }
751  _line_progressed(t.len);
752  _maybe_skip_whitespace_tokens();
753  return t;
754 }
755 
756 
757 //-----------------------------------------------------------------------------
758 
759 template<class EventHandler>
760 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
761 {
762  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
763 
764  // it's not a scalar if it starts with any of these characters:
765  switch(s.str[0])
766  {
767  // these are all legal tokens which mean no scalar is starting:
768  case '[':
769  case ']':
770  case '{':
771  case '}':
772  case '!':
773  case '&':
774  case '*':
775  case '|':
776  case '>':
777  case '#':
778  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
779  return false;
780  // '-' and ':' are illegal at the beginning if not followed by a scalar character
781  case '-':
782  case ':':
783  if(s.len > 1)
784  {
785  switch(s.str[1])
786  {
787  case '\n':
788  case '\r':
789  case '{':
790  case '[':
791  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
792  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
793  break;
794  case ' ':
795  case '}':
796  case ']':
797  if(s.str[0] == ':')
798  {
799  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
800  return false;
801  }
802  break;
803  default:
804  break;
805  }
806  }
807  else
808  {
809  return false;
810  }
811  break;
812  case '?':
813  if(s.len > 1)
814  {
815  switch(s.str[1])
816  {
817  case ' ':
818  case '\n':
819  case '\r':
820  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
821  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
822  return false;
823  case '{':
824  case '}':
825  case '[':
826  case ']':
827  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
828  break;
829  default:
830  break;
831  }
832  }
833  else
834  {
835  return false;
836  }
837  break;
838  // everything else is a legal starting character
839  default:
840  break;
841  }
842 
843  return true;
844 }
845 
846 template<class EventHandler>
847 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
848 {
849  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
850  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
851  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
852  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
853  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
854 
855  substr s = m_evt_handler->m_curr->line_contents.rem;
856  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
857  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
858 
859  if(!s.len)
860  return false;
861 
862  if(!_is_valid_start_scalar_plain_flow(s))
863  return false;
864 
865  _c4dbgp("scanning seqflow scalar...");
866 
867  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
868  bool needs_filter = false;
869  while(true)
870  {
871  _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
872  for(size_t i = 0; i < s.len; ++i)
873  {
874  const char c = s.str[i];
875  switch(c)
876  {
877  case ',':
878  _c4dbgpf("found terminating character at {}: '{}'", i, c);
879  _line_progressed(i);
880  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
881  {
882  goto ended_scalar;
883  }
884  else
885  {
886  _c4dbgp("at the beginning. no scalar here.");
887  return false;
888  }
889  break;
890  case ']':
891  _c4dbgpf("found terminating character at {}: '{}'", i, c);
892  _line_progressed(i);
893  goto ended_scalar;
894  break;
895  case '#':
896  _c4dbgp("found suspicious '#'");
897  if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
898  {
899  _c4dbgpf("found terminating character at {}: '{}'", i, c);
900  _line_progressed(i);
901  goto ended_scalar;
902  }
903  break;
904  case ':':
905  _c4dbgp("found suspicious ':'");
906  if(s.len > i+1)
907  {
908  const char next = s.str[i+1];
909  _c4dbgpf("next char is '{}'", _c4prc(next));
910  if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
911  {
912  _c4dbgp("map starting!");
913  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
914  {
915  _c4dbgp("scalar finished!");
916  _line_progressed(i);
917  goto ended_scalar;
918  }
919  else
920  {
921  _c4dbgp("at the beginning. no scalar here.");
922  return false;
923  }
924  }
925  else
926  {
927  _c4dbgp("it's a scalar indeed.");
928  ++i; // skip the next char
929  }
930  }
931  else if(s.len == i+1)
932  {
933  _c4dbgp("':' at line end. map starting!");
934  return false;
935  }
936  break;
937  case '[':
938  case '{':
939  case '}':
940  _line_progressed(i);
941  _c4err("invalid character: '{}'", c); // noreturn
942  default:
943  ;
944  }
945  }
946  _line_progressed(s.len);
947  if(!_finished_file())
948  {
949  _c4dbgp("next line!");
950  _line_ended();
951  _scan_line();
952  }
953  else
954  {
955  _c4dbgp("file finished!");
956  goto ended_scalar;
957  }
958  s = m_evt_handler->m_curr->line_contents.rem;
959  needs_filter = true;
960  }
961 
962 ended_scalar:
963 
964  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
965  sc->needs_filter = needs_filter;
966 
967  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
968 
969  return true;
970 }
971 
972 template<class EventHandler>
973 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
974 {
975  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
976  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
977  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
978  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
979  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
980 
981  substr s = m_evt_handler->m_curr->line_contents.rem;
982  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
983 
984  if(!s.len)
985  return false;
986 
987  if(!_is_valid_start_scalar_plain_flow(s))
988  return false;
989 
990  _c4dbgp("scanning scalar...");
991 
992  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
993  bool needs_filter = false;
994  while(true)
995  {
996  for(size_t i = 0; i < s.len; ++i)
997  {
998  const char c = s.str[i];
999  switch(c)
1000  {
1001  case ',':
1002  case '}':
1003  _line_progressed(i);
1004  _c4dbgpf("found terminating character: '{}'", c);
1005  goto ended_scalar;
1006  case ':':
1007  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
1008  {
1009  _line_progressed(i);
1010  _c4dbgpf("found terminating character: '{}'", c);
1011  goto ended_scalar;
1012  }
1013  break;
1014  case '{':
1015  case '[':
1016  _line_progressed(i);
1017  _c4err("invalid character: '{}'", c); // noreturn
1018  break;
1019  case ']':
1020  _line_progressed(i);
1021  if(has_any(RSEQIMAP))
1022  goto ended_scalar;
1023  else
1024  _c4err("invalid character: '{}'", c); // noreturn
1025  break;
1026  case '#':
1027  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
1028  {
1029  _line_progressed(i);
1030  _c4dbgpf("found terminating character: '{}'", c);
1031  goto ended_scalar;
1032  }
1033  break;
1034  default:
1035  ;
1036  }
1037  }
1038  _c4dbgp("next line!");
1039  _line_progressed(s.len);
1040  if(!_finished_file())
1041  {
1042  _c4dbgp("next line!");
1043  _line_ended();
1044  _scan_line();
1045  }
1046  else
1047  {
1048  _c4dbgp("file finished!");
1049  goto ended_scalar;
1050  }
1051  s = m_evt_handler->m_curr->line_contents.rem;
1052  needs_filter = true;
1053  }
1054 
1055 ended_scalar:
1056 
1057  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1058  sc->needs_filter = needs_filter;
1059 
1060  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1061 
1062  return sc->scalar.len > 0u;
1063 }
1064 
1065 template<class EventHandler>
1066 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1067 {
1068  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1069  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1070  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1071  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1072 
1073  substr s = m_evt_handler->m_curr->line_contents.rem;
1074  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1075 
1076  if(!s.len)
1077  return false;
1078 
1079  _c4dbgp("scanning scalar...");
1080 
1081  switch(s.str[0])
1082  {
1083  case ']':
1084  case '{':
1085  case ',':
1086  _c4dbgp("not a scalar.");
1087  return false;
1088  }
1089 
1090  {
1091  const size_t len = _is_special_json_scalar(s);
1092  if(len)
1093  {
1094  sc->scalar = s.first(len);
1095  sc->needs_filter = false;
1096  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1097  _line_progressed(len);
1098  return true;
1099  }
1100  }
1101 
1102  // must be a number
1103  size_t i = 0;
1104  for( ; i < s.len; ++i)
1105  {
1106  const char c = s.str[i];
1107  switch(c)
1108  {
1109  case ',':
1110  case ']':
1111  case ' ':
1112  case '\t':
1113  _c4dbgpf("found terminating character: '{}'", c);
1114  goto ended_scalar;
1115  case '#':
1116  if(!i || s.str[i-1] == ' ')
1117  {
1118  _c4dbgpf("found terminating character: '{}'", c);
1119  goto ended_scalar;
1120  }
1121  break;
1122  default:
1123  ;
1124  }
1125  }
1126 
1127 ended_scalar:
1128 
1129  if(C4_LIKELY(i > 0))
1130  {
1131  _line_progressed(i);
1132  sc->scalar = s.first(i);
1133  sc->needs_filter = false;
1134  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1135  return true;
1136  }
1137 
1138  return false;
1139 }
1140 
1141 template<class EventHandler>
1142 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1143 {
1144  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1145  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1146  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1147  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1148  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1149 
1150  substr s = m_evt_handler->m_curr->line_contents.rem;
1151  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1152 
1153  if(!s.len)
1154  return false;
1155 
1156  _c4dbgp("scanning scalar...");
1157 
1158  {
1159  const size_t len = _is_special_json_scalar(s);
1160  if(len)
1161  {
1162  sc->scalar = s.first(len);
1163  sc->needs_filter = false;
1164  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1165  _line_progressed(len);
1166  return true;
1167  }
1168  }
1169 
1170  // must be a number
1171  size_t i = 0;
1172  for( ; i < s.len; ++i)
1173  {
1174  const char c = s.str[i];
1175  switch(c)
1176  {
1177  case ',':
1178  case '}':
1179  case ' ':
1180  case '\t':
1181  _c4dbgpf("found terminating character: '{}'", c);
1182  goto ended_scalar;
1183  case '#':
1184  if(!i || s.str[i-1] == ' ')
1185  {
1186  _c4dbgpf("found terminating character: '{}'", c);
1187  goto ended_scalar;
1188  }
1189  break;
1190  default:
1191  ;
1192  }
1193  }
1194 
1195 ended_scalar:
1196 
1197  if(C4_LIKELY(i > 0))
1198  {
1199  _line_progressed(i);
1200  sc->scalar = s.first(i);
1201  sc->needs_filter = false;
1202  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1203  return true;
1204  }
1205 
1206  return false;
1207 }
1208 
1209 template<class EventHandler>
1210 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1211 {
1212  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1213  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1214 }
1215 
1216 template<class EventHandler>
1217 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1218 {
1219  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1220  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1221 }
1222 
1223 template<class EventHandler>
1224 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1225 {
1226  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1227  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1228  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
1229 
1230  substr s = m_evt_handler->m_curr->line_contents.rem;
1231  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1232 
1233  if(!s.len)
1234  return false;
1235 
1236  switch(s.str[0])
1237  {
1238  case '-':
1239  if(_is_blck_token(s))
1240  {
1241  return false;
1242  }
1243  else if(_is_doc_begin(s))
1244  {
1245  _c4dbgp("token is doc start");
1246  return false;
1247  }
1248  break;
1249  case ':':
1250  case '?':
1251  if(_is_blck_token(s))
1252  return false;
1253  break;
1254  case '[':
1255  case '{':
1256  case '&':
1257  case '*':
1258  case '!':
1259  _RYML_WITH_TAB_TOKENS(case '\t':)
1260  return false;
1261  case '.':
1262  if(_is_doc_end(s))
1263  {
1264  _c4dbgp("token is doc end");
1265  return false;
1266  }
1267  break;
1268  }
1269 
1270  _c4dbgpf("plain scalar! indentation={}", indentation);
1271 
1272  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1273  const size_t start_line = m_evt_handler->m_curr->pos.line;
1274 
1275  bool needs_filter = false;
1276  while(true)
1277  {
1278  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1279  for(size_t i = 0; i < s.len; ++i)
1280  {
1281  const char curr = s.str[i];
1282  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1283  switch(curr)
1284  {
1285  case ':':
1286  _c4dbgpf("[{}]: got suspicious ':'", i);
1287  // are there more characters?
1288  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1289  {
1290  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1291  _line_progressed(i);
1292  // ': ' is accepted only on the first line
1293  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1294  {
1295  _c4dbgp("start line. scalar ends here");
1296  goto ended_scalar;
1297  }
1298  else
1299  {
1300  _c4err("parse error");
1301  }
1302  }
1303  else
1304  {
1305  size_t j = i;
1306  while(j + 1 < s.len && s.str[j+1] == ':')
1307  {
1308  _c4dbgp("skip colon");
1309  ++j;
1310  }
1311  i = j > i ? j-1 : i;
1312  _c4dbgp("nothing to see here");
1313  }
1314  break;
1315  case '#':
1316  _c4dbgp("got suspicious '#'");
1317  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1318  {
1319  _c4dbgp("comment! scalar ends here");
1320  _line_progressed(i);
1321  goto ended_scalar;
1322  }
1323  else
1324  {
1325  _c4dbgp("nothing to see here");
1326  }
1327  break;
1328  }
1329  }
1330  _line_progressed(s.len);
1331  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1332  next_peeked = next_peeked.trimr("\n\r");
1333  const size_t next_indentation = next_peeked.first_not_of(' ');
1334  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1335  if(next_indentation < indentation)
1336  {
1337  _c4dbgp("smaller indentation! scalar ended");
1338  goto ended_scalar;
1339  }
1340  else if(next_indentation == 0 && next_peeked.len > 0)
1341  {
1342  const char first = next_peeked.str[0];
1343  switch(first)
1344  {
1345  case '-':
1346  next_peeked = next_peeked.trimr("\n\r");
1347  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1348  if(_is_doc_begin_token(next_peeked))
1349  {
1350  _c4dbgp("doc begin! scalar ended");
1351  goto ended_scalar;
1352  }
1353  break;
1354  case '.':
1355  next_peeked = next_peeked.trimr("\n\r");
1356  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1357  if(_is_doc_end_token(next_peeked))
1358  {
1359  _c4dbgp("doc end! scalar ended");
1360  goto ended_scalar;
1361  }
1362  break;
1363  }
1364  }
1365  // load with next line
1366  _c4dbgp("next line!");
1367  if(!_finished_file())
1368  {
1369  _c4dbgp("next line!");
1370  _line_ended();
1371  _scan_line();
1372  }
1373  else
1374  {
1375  _c4dbgp("file finished!");
1376  goto ended_scalar;
1377  }
1378  s = m_evt_handler->m_curr->line_contents.rem;
1379  needs_filter = true;
1380  }
1381 
1382 ended_scalar:
1383 
1384  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1385  sc->needs_filter = needs_filter;
1386 
1387  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1388 
1389  return true;
1390 }
1391 
1392 template<class EventHandler>
1393 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1394 {
1395  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1396  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1397  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1398  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1399  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1400  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1401  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1402 }
1403 
1404 template<class EventHandler>
1405 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1406 {
1407  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1408  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1409  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1410  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1411  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1412  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1413 }
1414 
1415 template<class EventHandler>
1416 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1417 {
1418  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1419  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1420 }
1421 
1422 
1423 //-----------------------------------------------------------------------------
1424 
1425 template<class EventHandler>
1426 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1427 {
1428  substr rem{}; // declare here because of the goto
1429  size_t nlpos{}; // declare here because of the goto
1430  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1431  if(pos >= m_buf.len)
1432  goto next_is_empty;
1433 
1434  // look for the next newline chars, and jump to the right of those
1435  rem = from_next_line(m_buf.sub(pos));
1436  if(rem.empty())
1437  goto next_is_empty;
1438 
1439  // now get everything up to and including the following newline chars
1440  nlpos = rem.first_of("\r\n");
1441  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1442  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1443  rem = rem.left_of(nlpos, /*include_pos*/true);
1444 
1445  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1446  return rem;
1447 
1448 next_is_empty:
1449  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1450  return {};
1451 }
1452 
1453 //-----------------------------------------------------------------------------
1454 
1455 template<class EventHandler>
1456 void ParseEngine<EventHandler>::_scan_line()
1457 {
1458  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1459  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1460  else
1461  m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1462 }
1463 
1464 template<class EventHandler>
1465 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1466 {
1467  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1468  m_evt_handler->m_curr->pos.offset += ahead;
1469  m_evt_handler->m_curr->pos.col += ahead;
1470  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1471  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1472 }
1473 
1474 template<class EventHandler>
1475 void ParseEngine<EventHandler>::_line_ended()
1476 {
1477  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1478  m_evt_handler->m_curr->pos.line,
1479  m_evt_handler->m_curr->line_contents.full.len,
1480  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1481  m_evt_handler->m_curr->pos.col, 1);
1482  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1483  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1484  ++m_evt_handler->m_curr->pos.line;
1485  m_evt_handler->m_curr->pos.col = 1;
1486 }
1487 
1488 template<class EventHandler>
1489 void ParseEngine<EventHandler>::_line_ended_undo()
1490 {
1491  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1492  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1493  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1494  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1495  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1496  m_evt_handler->m_curr->pos.offset -= delta;
1497  --m_evt_handler->m_curr->pos.line;
1498  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1499  // don't forget to undo also the changes to the remainder of the line
1500  //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
1501  m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1502 }
1503 
1504 
1505 //-----------------------------------------------------------------------------
1506 template<class EventHandler>
1507 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1508 {
1509  m_evt_handler->m_curr->indref = indentation;
1510  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1511 }
1512 
1513 template<class EventHandler>
1514 void ParseEngine<EventHandler>::_save_indentation()
1515 {
1516  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1517  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1518  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1519 }
1520 
1521 
1522 //-----------------------------------------------------------------------------
1523 
1524 template<class EventHandler>
1525 void ParseEngine<EventHandler>::_end_map_blck()
1526 {
1527  _c4dbgp("mapblck: end");
1528  if(has_any(RKCL|RVAL))
1529  {
1530  _c4dbgp("mapblck: set missing val");
1531  _handle_annotations_before_blck_val_scalar();
1532  m_evt_handler->set_val_scalar_plain_empty();
1533  }
1534  else if(has_any(QMRK))
1535  {
1536  _c4dbgp("mapblck: set missing keyval");
1537  _handle_annotations_before_blck_key_scalar();
1538  m_evt_handler->set_key_scalar_plain_empty();
1539  _handle_annotations_before_blck_val_scalar();
1540  m_evt_handler->set_val_scalar_plain_empty();
1541  }
1542  m_evt_handler->end_map();
1543 }
1544 
1545 template<class EventHandler>
1546 void ParseEngine<EventHandler>::_end_seq_blck()
1547 {
1548  if(has_any(RVAL))
1549  {
1550  _c4dbgp("seqblck: set missing val");
1551  _handle_annotations_before_blck_val_scalar();
1552  m_evt_handler->set_val_scalar_plain_empty();
1553  }
1554  m_evt_handler->end_seq();
1555 }
1556 
1557 template<class EventHandler>
1558 void ParseEngine<EventHandler>::_end2_map()
1559 {
1560  _c4dbgp("map: end");
1561  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1562  if(has_any(BLCK))
1563  {
1564  _end_map_blck();
1565  }
1566  else
1567  {
1568  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1569  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1570  m_evt_handler->_pop();
1571  }
1572 }
1573 
1574 template<class EventHandler>
1575 void ParseEngine<EventHandler>::_end2_seq()
1576 {
1577  _c4dbgp("seq: end");
1578  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1579  if(has_any(BLCK))
1580  {
1581  _end_seq_blck();
1582  }
1583  else
1584  {
1585  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1586  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1587  m_evt_handler->_pop();
1588  }
1589 }
1590 
1591 template<class EventHandler>
1592 void ParseEngine<EventHandler>::_begin2_doc()
1593 {
1594  m_doc_empty = true;
1595  add_flags(RDOC);
1596  m_evt_handler->begin_doc();
1597  m_evt_handler->m_curr->indref = 0; // ?
1598 }
1599 
1600 template<class EventHandler>
1601 void ParseEngine<EventHandler>::_begin2_doc_expl()
1602 {
1603  m_doc_empty = true;
1604  add_flags(RDOC);
1605  m_evt_handler->begin_doc_expl();
1606  m_evt_handler->m_curr->indref = 0; // ?
1607 }
1608 
1609 template<class EventHandler>
1610 void ParseEngine<EventHandler>::_end2_doc()
1611 {
1612  _c4dbgp("doc: end");
1613  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1614  if(m_doc_empty)
1615  {
1616  _c4dbgp("doc was empty; add empty val");
1617  m_evt_handler->set_val_scalar_plain_empty();
1618  }
1619  m_evt_handler->end_doc();
1620 }
1621 
1622 template<class EventHandler>
1623 void ParseEngine<EventHandler>::_end2_doc_expl()
1624 {
1625  _c4dbgp("doc: end");
1626  if(m_doc_empty)
1627  {
1628  _c4dbgp("doc: no children; add empty val");
1629  m_evt_handler->set_val_scalar_plain_empty();
1630  }
1631  m_evt_handler->end_doc_expl();
1632 }
1633 
1634 template<class EventHandler>
1635 void ParseEngine<EventHandler>::_maybe_begin_doc()
1636 {
1637  if(has_none(RDOC))
1638  {
1639  _c4dbgp("doc must be started");
1640  _begin2_doc();
1641  }
1642 }
1643 template<class EventHandler>
1644 void ParseEngine<EventHandler>::_maybe_end_doc()
1645 {
1646  if(has_any(RDOC))
1647  {
1648  _c4dbgp("doc must be finished");
1649  _end2_doc();
1650  }
1651 }
1652 
1653 template<class EventHandler>
1654 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1655 {
1656  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1657  if(m_evt_handler->m_stack[0].flags & RDOC)
1658  {
1659  _c4dbgp("root is RDOC");
1660  if(m_evt_handler->m_curr->level != 0)
1661  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1662  }
1663  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1664  {
1665  _c4dbgp("root is STREAM");
1666  if(m_evt_handler->m_curr->level != 1)
1667  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1668  }
1669  else
1670  {
1671  _c4err("internal error");
1672  }
1673  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1674 }
1675 
1676 template<class EventHandler>
1677 void ParseEngine<EventHandler>::_end_doc_suddenly()
1678 {
1679  _c4dbgp("end doc suddenly");
1680  _end_doc_suddenly__pop();
1681  _end2_doc_expl();
1682  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1683 }
1684 
1685 template<class EventHandler>
1686 void ParseEngine<EventHandler>::_start_doc_suddenly()
1687 {
1688  _c4dbgp("start doc suddenly");
1689  _end_doc_suddenly__pop();
1690  _end2_doc();
1691  _begin2_doc_expl();
1692 }
1693 
1694 template<class EventHandler>
1695 void ParseEngine<EventHandler>::_end_stream()
1696 {
1697  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1698  if(has_all(RSEQ|FLOW))
1699  _c4err("missing terminating ]");
1700  else if(has_all(RMAP|FLOW))
1701  _c4err("missing terminating }");
1702  if(m_evt_handler->m_stack.size() > 1)
1703  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1704  if(has_all(RDOC))
1705  {
1706  _end2_doc();
1707  }
1708  else if(has_all(RTOP|RUNK))
1709  {
1710  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1711  {
1712  if(m_doc_empty)
1713  {
1714  m_evt_handler->begin_doc();
1715  _handle_annotations_before_blck_val_scalar();
1716  m_evt_handler->set_val_scalar_plain_empty();
1717  m_evt_handler->end_doc();
1718  }
1719  }
1720  }
1721  m_evt_handler->end_stream();
1722 }
1723 
1724 
1725 template<class EventHandler>
1726 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1727 {
1728  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1729  while(m_evt_handler->m_curr != popto)
1730  {
1731  if(has_any(RSEQ))
1732  {
1733  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1734  _end2_seq();
1735  }
1736  else if(has_any(RMAP))
1737  {
1738  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1739  _end2_map();
1740  }
1741  else
1742  {
1743  break;
1744  }
1745  }
1746  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1747 }
1748 
1749 template<class EventHandler>
1750 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1751 {
1752  // search the stack frame to jump to based on its indentation
1753  using state_type = typename EventHandler::state;
1754  state_type const* popto = nullptr;
1755  auto &stack = m_evt_handler->m_stack;
1756  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1757  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1758  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1759  #ifdef RYML_DBG
1760  if(_dbg_enabled())
1761  {
1762  char flagbuf_[128];
1763  for(state_type const& s : stack)
1764  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1765  }
1766  #endif
1767  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1768  {
1769  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1770  if(s->indref == ind)
1771  {
1772  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1773  popto = s;
1774  break;
1775  }
1776  }
1777  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1778  {
1779  _c4err("parse error: incorrect indentation?");
1780  }
1781  _handle_indentation_pop(popto);
1782 }
1783 
1784 template<class EventHandler>
1785 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1786 {
1787  // search the stack frame to jump to based on its indentation
1788  using state_type = typename EventHandler::state;
1789  auto &stack = m_evt_handler->m_stack;
1790  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1791  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1792  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1793  state_type const* popto = nullptr;
1794  #ifdef RYML_DBG
1795  char flagbuf_[128];
1796  if(_dbg_enabled())
1797  {
1798  for(state_type const& s : stack)
1799  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1800  }
1801  #endif
1802  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1803  {
1804  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1805  if(s->indref < ind)
1806  {
1807  break;
1808  }
1809  else if(s->indref == ind)
1810  {
1811  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1812  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1813  {
1814  break;
1815  }
1816  popto = s;
1817  if(has_all(RSEQ|BLCK, s))
1818  {
1819  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1820  const size_t first = rem.first_not_of(' ');
1821  _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
1822  rem = rem.right_of(first, true);
1823  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1824  if(rem.begins_with('-') && _is_blck_token(rem))
1825  {
1826  _c4dbgp("parent was indentless seq");
1827  break;
1828  }
1829  }
1830  }
1831  }
1832  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1833  {
1834  _c4err("parse error: incorrect indentation?");
1835  }
1836  _handle_indentation_pop(popto);
1837 }
1838 
1839 
1840 //-----------------------------------------------------------------------------
1841 template<class EventHandler>
1842 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1843 {
1844  // quoted scalars can spread over multiple lines!
1845  // nice explanation here: http://yaml-multiline.info/
1846 
1847  // a span to the end of the file
1848  size_t b = m_evt_handler->m_curr->pos.offset;
1849  substr s = m_buf.sub(b);
1850  if(s.begins_with(' '))
1851  {
1852  s = s.triml(' ');
1853  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1854  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1855  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1856  }
1857  b = m_evt_handler->m_curr->pos.offset; // take this into account
1858  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1859 
1860  // skip the opening quote
1861  _line_progressed(1);
1862  s = s.sub(1);
1863 
1864  bool needs_filter = false;
1865 
1866  size_t numlines = 1; // we already have one line
1867  size_t pos = npos; // find the pos of the matching quote
1868  while( ! _finished_file())
1869  {
1870  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1871  bool line_is_blank = true;
1872  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1873  for(size_t i = 0; i < line.len; ++i)
1874  {
1875  const char curr = line.str[i];
1876  if(curr == '\'') // single quotes are escaped with two single quotes
1877  {
1878  const char next = i+1 < line.len ? line.str[i+1] : '~';
1879  if(next != '\'') // so just look for the first quote
1880  { // without another after it
1881  pos = i;
1882  break;
1883  }
1884  else
1885  {
1886  needs_filter = true; // needs filter to remove escaped quotes
1887  ++i; // skip the escaped quote
1888  }
1889  }
1890  else if(curr != ' ')
1891  {
1892  line_is_blank = false;
1893  }
1894  }
1895 
1896  // leading whitespace also needs filtering
1897  needs_filter = needs_filter
1898  || (numlines > 1)
1899  || line_is_blank
1900  || (_at_line_begin() && line.begins_with(' '));
1901 
1902  if(pos == npos)
1903  {
1904  _line_progressed(line.len);
1905  ++numlines;
1906  }
1907  else
1908  {
1909  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1910  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
1911  _line_progressed(pos + 1); // progress beyond the quote
1912  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1913  break;
1914  }
1915 
1916  _line_ended();
1917  _scan_line();
1918  }
1919 
1920  if(pos == npos)
1921  {
1922  _c4err("reached end of file while looking for closing quote");
1923  }
1924  else
1925  {
1926  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1927  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1928  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
1929  s = s.sub(0, pos-1);
1930  }
1931 
1932  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
1933 
1934  return ScannedScalar { s, needs_filter };
1935 }
1936 
1937 
1938 //-----------------------------------------------------------------------------
1939 template<class EventHandler>
1940 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1941 {
1942  // quoted scalars can spread over multiple lines!
1943  // nice explanation here: http://yaml-multiline.info/
1944 
1945  // a span to the end of the file
1946  size_t b = m_evt_handler->m_curr->pos.offset;
1947  substr s = m_buf.sub(b);
1948  if(s.begins_with(' '))
1949  {
1950  s = s.triml(' ');
1951  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1952  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1953  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1954  }
1955  b = m_evt_handler->m_curr->pos.offset; // take this into account
1956  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
1957 
1958  // skip the opening quote
1959  _line_progressed(1);
1960  s = s.sub(1);
1961 
1962  bool needs_filter = false;
1963 
1964  size_t numlines = 1; // we already have one line
1965  size_t pos = npos; // find the pos of the matching quote
1966  while( ! _finished_file())
1967  {
1968  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1969  #if defined(__GNUC__) && __GNUC__ == 11
1970  C4_DONT_OPTIMIZE(line); // prevent erroneous hoist of the assignment out of the loop
1971  #endif
1972  bool line_is_blank = true;
1973  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1974  for(size_t i = 0; i < line.len; ++i)
1975  {
1976  const char curr = line.str[i];
1977  if(curr != ' ')
1978  line_is_blank = false;
1979  // every \ is an escape
1980  if(curr == '\\')
1981  {
1982  const char next = i+1 < line.len ? line.str[i+1] : '~';
1983  needs_filter = true;
1984  if(next == '"' || next == '\\')
1985  ++i;
1986  }
1987  else if(curr == '"')
1988  {
1989  pos = i;
1990  break;
1991  }
1992  }
1993 
1994  // leading whitespace also needs filtering
1995  needs_filter = needs_filter
1996  || (numlines > 1)
1997  || line_is_blank
1998  || (_at_line_begin() && line.begins_with(' '));
1999 
2000  if(pos == npos)
2001  {
2002  _line_progressed(line.len);
2003  ++numlines;
2004  }
2005  else
2006  {
2007  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2008  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"');
2009  _line_progressed(pos + 1); // progress beyond the quote
2010  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
2011  break;
2012  }
2013 
2014  _line_ended();
2015  _scan_line();
2016  }
2017 
2018  if(pos == npos)
2019  {
2020  _c4err("reached end of file looking for closing quote");
2021  }
2022  else
2023  {
2024  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
2025  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
2026  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2027  s = s.sub(0, pos-1);
2028  }
2029 
2030  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2031 
2032  return ScannedScalar { s, needs_filter };
2033 }
2034 
2035 
2036 //-----------------------------------------------------------------------------
2037 template<class EventHandler>
2038 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2039 {
2040  _c4dbgpf("blck: indref={}", indref);
2041  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
2042 
2043  // nice explanation here: http://yaml-multiline.info/
2044  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2045  csubstr trimmed = s.triml(' ');
2046  if(trimmed.str > s.str)
2047  {
2048  _c4dbgp("skipping whitespace");
2049  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2050  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2051  s = trimmed;
2052  }
2053  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2054 
2055  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2056 
2057  // parse the spec
2058  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2059  size_t indentation = npos; // have to find out if no spec is given
2060  csubstr digits;
2061  if(s.len > 1)
2062  {
2063  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2064  csubstr t = s.sub(1);
2065  _c4dbgpf("blck: spec is multichar: '{}'", t);
2066  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2067  size_t pos = t.first_of("-+");
2068  _c4dbgpf("blck: spec chomp char at {}", pos);
2069  if(pos != npos)
2070  {
2071  if(t[pos] == '-')
2072  chomp = CHOMP_STRIP;
2073  else if(t[pos] == '+')
2074  chomp = CHOMP_KEEP;
2075  if(pos == 0)
2076  t = t.sub(1);
2077  else
2078  t = t.first(pos);
2079  }
2080  // from here to the end, only digits are considered
2081  digits = t.left_of(t.first_not_of("0123456789"));
2082  if( ! digits.empty())
2083  {
2084  if(C4_UNLIKELY(digits.len > 1))
2085  _c4err("parse error: invalid indentation");
2086  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2087  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2088  _c4err("parse error: could not read indentation as decimal");
2089  if(C4_UNLIKELY( ! indentation))
2090  _c4err("parse error: null indentation");
2091  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2092  indentation += m_evt_handler->m_curr->indref;
2093  }
2094  }
2095 
2096  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2097 
2098  // finish the current line
2099  _line_progressed(s.len);
2100  _line_ended();
2101  _scan_line();
2102 
2103  // start with a zero-length block, already pointing at the right place
2104  substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
2105  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2106 
2107  // read every full line into a raw block,
2108  // from which newlines are to be stripped as needed.
2109  //
2110  // If no explicit indentation was given, pick it from the first
2111  // non-empty line. See
2112  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2113  size_t num_lines = 0;
2114  size_t first = m_evt_handler->m_curr->pos.line;
2115  size_t provisional_indentation = npos;
2116  LineContents lc;
2117  while(( ! _finished_file()))
2118  {
2119  // peek next line, but do not advance immediately
2120  lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2121  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2122  C4_DONT_OPTIMIZE(lc.rem);
2123  #endif
2124  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2125  // evaluate termination conditions
2126  if(indentation != npos)
2127  {
2128  _c4dbgpf("blck: indentation={}", indentation);
2129  // stop when the line is deindented and not empty
2130  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2131  {
2132  if(raw_block.len)
2133  {
2134  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2135  }
2136  else
2137  {
2138  _c4err("indentation decreased without any scalar");
2139  }
2140  break;
2141  }
2142  else if(indentation == 0)
2143  {
2144  _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2145  if(_is_doc_token(lc.rem))
2146  {
2147  _c4dbgp("blck: stop. indentation=0 and doc ended");
2148  break;
2149  }
2150  }
2151  }
2152  else
2153  {
2154  const size_t fns = lc.stripped.first_not_of(' ');
2155  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2156  if(fns != npos) // non-empty line
2157  {
2159  if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
2160  _c4err("parse error");
2161  )
2162  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2163  if(provisional_indentation == npos)
2164  {
2165  if(lc.indentation < indref)
2166  {
2167  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2168  if(raw_block.len == 0)
2169  {
2170  _c4dbgp("blck: was empty, undo next line");
2171  _line_ended_undo();
2172  }
2173  break;
2174  }
2175  else if(lc.indentation == m_evt_handler->m_curr->indref)
2176  {
2177  if(has_any(RSEQ|RMAP))
2178  {
2179  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2180  break;
2181  }
2182  }
2183  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2184  indentation = lc.indentation;
2185  }
2186  else
2187  {
2188  if(lc.indentation >= provisional_indentation)
2189  {
2190  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2191  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2192  indentation = lc.indentation;
2193  }
2194  else
2195  {
2196  break;
2197  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2198  }
2199  }
2200  }
2201  else // empty line
2202  {
2203  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2204  if(provisional_indentation != npos)
2205  {
2206  if(lc.stripped.len >= provisional_indentation)
2207  {
2208  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2209  provisional_indentation = lc.stripped.len;
2210  }
2211  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2212  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2213  {
2214  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2215  provisional_indentation = lc.indentation;
2216  }
2217  #endif
2218  }
2219  else
2220  {
2221  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2222  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2223  if(provisional_indentation == npos)
2224  {
2225  provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
2226  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2227  }
2228  if(provisional_indentation < indref)
2229  {
2230  provisional_indentation = indref;
2231  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2232  }
2233  }
2234  }
2235  }
2236  // advance now that we know the folded scalar continues
2237  m_evt_handler->m_curr->line_contents = lc;
2238  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2239  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2240  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2241  _line_ended();
2242  ++num_lines;
2243  }
2244  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2245  C4_UNUSED(num_lines);
2246  C4_UNUSED(first);
2247 
2248  if(indentation == npos)
2249  {
2250  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2251  indentation = provisional_indentation;
2252  }
2253 
2254  if(num_lines)
2255  _line_ended_undo();
2256 
2257  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2258 
2259  sb->scalar = raw_block;
2260  sb->indentation = indentation;
2261  sb->chomp = chomp;
2262 }
2263 
2264 
2265 //-----------------------------------------------------------------------------
2266 //-----------------------------------------------------------------------------
2267 //-----------------------------------------------------------------------------
2268 /** @cond dev */
2269 
2270 // a debugging scaffold:
2271 #if 0
2272 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2273 #else
2274 #define _c4dbgfws(...)
2275 #endif
2276 
2277 template<class EventHandler>
2278 template<class FilterProcessor>
2279 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2280 {
2281  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2282  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2283 
2284  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2285  if(first_pos != npos)
2286  {
2287  const char first_char = proc.src[first_pos];
2288  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2289  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2290  {
2291  _c4dbgfws("whitespace is trailing on line", "");
2292  proc.skip(first_pos - proc.rpos);
2293  }
2294  else // a legit whitespace
2295  {
2296  proc.copy();
2297  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2298  }
2299  return true;
2300  }
2301  _c4dbgfws("whitespace is trailing on line", "");
2302  return false;
2303 }
2304 
2305 template<class EventHandler>
2306 template<class FilterProcessor>
2307 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2308 {
2309  if(!_filter_ws_handle_to_first_non_space(proc))
2310  {
2311  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2312  proc.copy(proc.src.len - proc.rpos);
2313  }
2314 }
2315 
2316 template<class EventHandler>
2317 template<class FilterProcessor>
2318 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2319 {
2320  if(!_filter_ws_handle_to_first_non_space(proc))
2321  {
2322  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2323  proc.skip(proc.src.len - proc.rpos);
2324  }
2325 }
2326 
2327 #undef _c4dbgfws
2328 
2329 
2330 //-----------------------------------------------------------------------------
2331 //-----------------------------------------------------------------------------
2332 //-----------------------------------------------------------------------------
2333 /* plain scalars */
2334 
2335 // a debugging scaffold:
2336 #if 0
2337 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2338 #else
2339 #define _c4dbgfps(fmt, ...)
2340 #endif
2341 
2342 template<class EventHandler>
2343 template<class FilterProcessor>
2344 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2345 {
2346  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2347 
2348  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2349  size_t ii = proc.rpos;
2350  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2351  if(numnl_following)
2352  {
2353  proc.set('\n', numnl_following);
2354  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2355  }
2356  else
2357  {
2358  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2359  if(ret != npos)
2360  {
2361  proc.set(' ');
2362  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2363  }
2364  else
2365  {
2366  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2367  ii = proc.src.len;
2368  }
2369  }
2370  proc.rpos = ii;
2371 }
2372 
2373 template<class EventHandler>
2374 template<class FilterProcessor>
2375 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2376 {
2377  _RYML_CB_ASSERT(this->callbacks(), indentation != npos);
2378  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2379 
2380  while(proc.has_more_chars())
2381  {
2382  const char curr = proc.curr();
2383  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2384  switch(curr)
2385  {
2386  case ' ':
2387  _RYML_WITH_TAB_TOKENS(case '\t':)
2388  _c4dbgfps("whitespace", curr);
2389  _filter_ws_skip_trailing(proc);
2390  break;
2391  case '\n':
2392  _c4dbgfps("newline", curr);
2393  _filter_nl_plain(proc, /*indentation*/indentation);
2394  break;
2395  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2396  _c4dbgfps("carriage return, ignore", curr);
2397  proc.skip();
2398  break;
2399  default:
2400  proc.copy();
2401  break;
2402  }
2403  }
2404 
2405  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2406 
2407  return proc.result();
2408 }
2409 
2410 #undef _c4dbgfps
2411 
2412 
2413 template<class EventHandler>
2414 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2415 {
2416  FilterProcessorSrcDst proc(scalar, dst);
2417  return _filter_plain(proc, indentation);
2418 }
2419 
2420 template<class EventHandler>
2421 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2422 {
2423  FilterProcessorInplaceEndExtending proc(dst, cap);
2424  return _filter_plain(proc, indentation);
2425 }
2426 
2427 
2428 //-----------------------------------------------------------------------------
2429 //-----------------------------------------------------------------------------
2430 //-----------------------------------------------------------------------------
2431 /* single quoted */
2432 
2433 // a debugging scaffold:
2434 #if 0
2435 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2436 #else
2437 #define _c4dbgfsq(fmt, ...)
2438 #endif
2439 
2440 template<class EventHandler>
2441 template<class FilterProcessor>
2442 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2443 {
2444  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2445 
2446  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2447  size_t ii = proc.rpos;
2448  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2449  if(numnl_following)
2450  {
2451  proc.set('\n', numnl_following);
2452  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2453  }
2454  else
2455  {
2456  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2457  if(ret != npos)
2458  {
2459  proc.set(' ');
2460  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2461  }
2462  else
2463  {
2464  proc.set(' ');
2465  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2466  }
2467  }
2468  proc.rpos = ii;
2469 }
2470 
2471 template<class EventHandler>
2472 template<class FilterProcessor>
2473 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2474 {
2475  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2476 
2477  // from the YAML spec for double-quoted scalars:
2478  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2479  while(proc.has_more_chars())
2480  {
2481  const char curr = proc.curr();
2482  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2483  switch(curr)
2484  {
2485  case ' ':
2486  case '\t':
2487  _c4dbgfsq("whitespace", curr);
2488  _filter_ws_copy_trailing(proc);
2489  break;
2490  case '\n':
2491  _c4dbgfsq("newline", curr);
2492  _filter_nl_squoted(proc);
2493  break;
2494  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2495  _c4dbgfsq("skip cr", curr);
2496  proc.skip();
2497  break;
2498  case '\'':
2499  _c4dbgfsq("squote", curr);
2500  if(proc.next() == '\'')
2501  {
2502  _c4dbgfsq("two consecutive squotes", curr);
2503  proc.skip();
2504  proc.copy();
2505  }
2506  else
2507  {
2508  _c4err("filter error");
2509  }
2510  break;
2511  default:
2512  proc.copy();
2513  break;
2514  }
2515  }
2516 
2517  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2518 
2519  return proc.result();
2520 }
2521 
2522 #undef _c4dbgfsq
2523 
2524 template<class EventHandler>
2525 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2526 {
2527  FilterProcessorSrcDst proc(scalar, dst);
2528  return _filter_squoted(proc);
2529 }
2530 
2531 template<class EventHandler>
2532 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2533 {
2534  FilterProcessorInplaceEndExtending proc(dst, cap);
2535  return _filter_squoted(proc);
2536 }
2537 
2538 
2539 //-----------------------------------------------------------------------------
2540 //-----------------------------------------------------------------------------
2541 //-----------------------------------------------------------------------------
2542 /* double quoted */
2543 
2544 // a debugging scaffold:
2545 #if 0
2546 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2547 #else
2548 #define _c4dbgfdq(...)
2549 #endif
2550 
2551 template<class EventHandler>
2552 template<class FilterProcessor>
2553 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2554 {
2555  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2556 
2557  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2558  size_t ii = proc.rpos;
2559  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2560  if(numnl_following)
2561  {
2562  proc.set('\n', numnl_following);
2563  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2564  }
2565  else
2566  {
2567  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2568  if(ret != npos)
2569  {
2570  proc.set(' ');
2571  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2572  }
2573  else
2574  {
2575  proc.set(' ');
2576  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2577  }
2578  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2579  {
2580  _c4dbgfdq("backslash at [{}]", ii);
2581  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2582  if(next == ' ' || next == '\t')
2583  {
2584  _c4dbgfdq("extend skip to backslash", "");
2585  ++ii;
2586  }
2587  }
2588  }
2589  proc.rpos = ii;
2590 }
2591 
2592 template<class EventHandler>
2593 template<class FilterProcessor>
2594 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2595 {
2596  char next = proc.next();
2597  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2598  if(next == '\r')
2599  {
2600  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2601  {
2602  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2603  next = '\n';
2604  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2605  }
2606  }
2607 
2608  if(next == '\n')
2609  {
2610  size_t ii = proc.rpos + 2;
2611  for( ; ii < proc.src.len; ++ii)
2612  {
2613  // skip leading whitespace
2614  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2615  ;
2616  else
2617  break;
2618  }
2619  proc.skip(ii - proc.rpos);
2620  }
2621  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2622  {
2623  // escapes for json compatibility
2624  proc.translate_esc(next);
2625  _c4dbgfdq("here, used '{}'", _c4prc(next));
2626  }
2627  else if(next == '\r')
2628  {
2629  proc.skip();
2630  }
2631  else if(next == 'n')
2632  {
2633  proc.translate_esc('\n');
2634  }
2635  else if(next == 'r')
2636  {
2637  proc.translate_esc('\r');
2638  }
2639  else if(next == 't')
2640  {
2641  proc.translate_esc('\t');
2642  }
2643  else if(next == '\\')
2644  {
2645  proc.translate_esc('\\');
2646  }
2647  else if(next == 'x') // UTF8
2648  {
2649  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2650  _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2651  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2652  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2653  uint8_t byteval = {};
2654  if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
2655  _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2656  proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
2657  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2658  }
2659  else if(next == 'u') // UTF16
2660  {
2661  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2662  _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2663  char readbuf[8];
2664  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2665  uint32_t codepoint_val = {};
2666  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2667  _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2668  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2669  if(C4_UNLIKELY(numbytes == 0))
2670  _c4err("failed to decode code point={}", proc.rpos);
2671  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2672  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2673  }
2674  else if(next == 'U') // UTF32
2675  {
2676  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2677  _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2678  char readbuf[8];
2679  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2680  uint32_t codepoint_val = {};
2681  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2682  _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2683  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2684  if(C4_UNLIKELY(numbytes == 0))
2685  _c4err("failed to decode code point={}", proc.rpos);
2686  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2687  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2688  }
2689  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2690  else if(next == '0')
2691  {
2692  proc.translate_esc('\0');
2693  }
2694  else if(next == 'b') // backspace
2695  {
2696  proc.translate_esc('\b');
2697  }
2698  else if(next == 'f') // form feed
2699  {
2700  proc.translate_esc('\f');
2701  }
2702  else if(next == 'a') // bell character
2703  {
2704  proc.translate_esc('\a');
2705  }
2706  else if(next == 'v') // vertical tab
2707  {
2708  proc.translate_esc('\v');
2709  }
2710  else if(next == 'e') // escape character
2711  {
2712  proc.translate_esc('\x1b');
2713  }
2714  else if(next == '_') // unicode non breaking space \u00a0
2715  {
2716  // https://www.compart.com/en/unicode/U+00a0
2717  const char payload[] = {
2718  _RYML_CHCONST(-0x3e, 0xc2),
2719  _RYML_CHCONST(-0x60, 0xa0),
2720  };
2721  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2722  }
2723  else if(next == 'N') // unicode next line \u0085
2724  {
2725  // https://www.compart.com/en/unicode/U+0085
2726  const char payload[] = {
2727  _RYML_CHCONST(-0x3e, 0xc2),
2728  _RYML_CHCONST(-0x7b, 0x85),
2729  };
2730  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2731  }
2732  else if(next == 'L') // unicode line separator \u2028
2733  {
2734  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2735  const char payload[] = {
2736  _RYML_CHCONST(-0x1e, 0xe2),
2737  _RYML_CHCONST(-0x80, 0x80),
2738  _RYML_CHCONST(-0x58, 0xa8),
2739  };
2740  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2741  }
2742  else if(next == 'P') // unicode paragraph separator \u2029
2743  {
2744  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2745  const char payload[] = {
2746  _RYML_CHCONST(-0x1e, 0xe2),
2747  _RYML_CHCONST(-0x80, 0x80),
2748  _RYML_CHCONST(-0x57, 0xa9),
2749  };
2750  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2751  }
2752  else if(next == '\0')
2753  {
2754  proc.skip();
2755  }
2756  else
2757  {
2758  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2759  }
2760  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2761 }
2762 
2763 
2764 template<class EventHandler>
2765 template<class FilterProcessor>
2766 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2767 {
2768  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2769  // from the YAML spec for double-quoted scalars:
2770  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2771  while(proc.has_more_chars())
2772  {
2773  const char curr = proc.curr();
2774  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2775  switch(curr)
2776  {
2777  case ' ':
2778  case '\t':
2779  {
2780  _c4dbgfdq("whitespace", curr);
2781  _filter_ws_copy_trailing(proc);
2782  break;
2783  }
2784  case '\n':
2785  {
2786  _c4dbgfdq("newline", curr);
2787  _filter_nl_dquoted(proc);
2788  break;
2789  }
2790  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2791  {
2792  _c4dbgfdq("carriage return, ignore", curr);
2793  proc.skip();
2794  break;
2795  }
2796  case '\\':
2797  {
2798  _filter_dquoted_backslash(proc);
2799  break;
2800  }
2801  default:
2802  {
2803  proc.copy();
2804  break;
2805  }
2806  }
2807  }
2808  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2809  return proc.result();
2810 }
2811 
2812 #undef _c4dbgfdq
2813 
2814 
2815 template<class EventHandler>
2816 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2817 {
2818  FilterProcessorSrcDst proc(scalar, dst);
2819  return _filter_dquoted(proc);
2820 }
2821 
2822 template<class EventHandler>
2823 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2824 {
2825  FilterProcessorInplaceMidExtending proc(dst, cap);
2826  return _filter_dquoted(proc);
2827 }
2828 
2829 
2830 //-----------------------------------------------------------------------------
2831 //-----------------------------------------------------------------------------
2832 //-----------------------------------------------------------------------------
2833 // block filtering helpers
2834 
2835 template<class EventHandler>
2836 template<class FilterProcessor>
2837 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2838 {
2839  _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2840  _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2841 
2842  // a debugging scaffold:
2843  #if 0
2844  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2845  #else
2846  #define _c4dbgchomp(...)
2847  #endif
2848 
2849  // advance to the last line having spaces beyond the indentation
2850  {
2851  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2852  if(last != npos)
2853  {
2854  _c4dbgchomp("found newline and larger indentation. last={}", last);
2855  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2856  _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2857  // remove indentation spaces, copy the rest
2858  while((proc.rpos < last) && proc.has_more_chars())
2859  {
2860  const char curr = proc.curr();
2861  _c4dbgchomp("curr='{}'", _c4prc(curr));
2862  switch(curr)
2863  {
2864  case '\n':
2865  {
2866  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2867  proc.copy();
2868  // are there spaces after the newline?
2869  csubstr at_next_line = proc.rem();
2870  if(at_next_line.begins_with(' '))
2871  {
2872  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2873  // there are spaces.
2874  size_t first_non_space = at_next_line.first_not_of(' ');
2875  _c4dbgchomp("first_non_space={}", first_non_space);
2876  if(first_non_space == npos)
2877  {
2878  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2879  first_non_space = at_next_line.len;
2880  }
2881  if(first_non_space <= indentation)
2882  {
2883  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2884  proc.skip(first_non_space);
2885  }
2886  else
2887  {
2888  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2889  proc.skip(indentation);
2890  // copy the spaces after the indentation
2891  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2892  proc.copy(first_non_space - indentation);
2893  }
2894  }
2895  break;
2896  }
2897  case '\r':
2898  proc.skip();
2899  break;
2900  default:
2901  _c4err("parse error");
2902  break;
2903  }
2904  }
2905  }
2906  }
2907 
2908  // from now on, we only have line ends (or indentation spaces)
2909  switch(chomp)
2910  {
2911  case CHOMP_CLIP:
2912  {
2913  bool had_one = false;
2914  while(proc.has_more_chars())
2915  {
2916  const char curr = proc.curr();
2917  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
2918  switch(curr)
2919  {
2920  case '\n':
2921  {
2922  _c4dbgchomp("copy newline!", curr);
2923  proc.copy();
2924  proc.set_at_end();
2925  had_one = true;
2926  break;
2927  }
2928  case ' ':
2929  case '\r':
2930  _c4dbgchomp("skip!", curr);
2931  proc.skip();
2932  break;
2933  }
2934  }
2935  if(!had_one) // there were no newline characters. add one.
2936  {
2937  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
2938  proc.set('\n');
2939  }
2940  break;
2941  }
2942  case CHOMP_KEEP:
2943  {
2944  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2945  while(proc.has_more_chars())
2946  {
2947  const char curr = proc.curr();
2948  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
2949  switch(curr)
2950  {
2951  case '\n':
2952  _c4dbgchomp("copy newline!", curr);
2953  proc.copy();
2954  break;
2955  case ' ':
2956  case '\r':
2957  _c4dbgchomp("skip!", curr);
2958  proc.skip();
2959  break;
2960  }
2961  }
2962  break;
2963  }
2964  case CHOMP_STRIP:
2965  {
2966  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
2967  // nothing to do!
2968  break;
2969  }
2970  }
2971 
2972  #undef _c4dbgchomp
2973 }
2974 
2975 
2976 // a debugging scaffold:
2977 #if 0
2978 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2979 #else
2980 #define _c4dbgfb(...)
2981 #endif
2982 
2983 template<class EventHandler>
2984 template<class FilterProcessor>
2985 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2986 {
2987  csubstr rem = proc.rem(); // remaining
2988  if(rem.len)
2989  {
2990  size_t first = rem.first_not_of(' ');
2991  if(first != npos)
2992  {
2993  _c4dbgfb("{} spaces follow before next nonws character", first);
2994  if(first < indentation)
2995  {
2996  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
2997  proc.skip(first);
2998  }
2999  else
3000  {
3001  _c4dbgfb("skip {} spaces from indentation", indentation);
3002  proc.skip(indentation);
3003  }
3004  }
3005  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3006  else
3007  {
3008  _c4dbgfb("all spaces to the end: {} spaces", first);
3009  first = rem.len;
3010  if(first)
3011  {
3012  if(first < indentation)
3013  {
3014  _c4dbgfb("skip everything", first);
3015  proc.skip(proc.src.len - proc.rpos);
3016  }
3017  else
3018  {
3019  _c4dbgfb("skip {} spaces from indentation", indentation);
3020  proc.skip(indentation);
3021  }
3022  }
3023  }
3024  #endif
3025  }
3026 }
3027 
3028 template<class EventHandler>
3029 template<class FilterProcessor>
3030 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3031 {
3032  csubstr contents = proc.src.trimr(" \n\r");
3033  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3034  if(!contents.len)
3035  {
3036  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3037  if(chomp == CHOMP_KEEP && proc.src.len)
3038  {
3039  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3040  while(proc.has_more_chars())
3041  {
3042  const char curr = proc.curr();
3043  if(curr == '\n')
3044  proc.copy();
3045  else
3046  proc.skip();
3047  }
3048  if(!proc.wpos)
3049  {
3050  proc.set('\n');
3051  }
3052  }
3053  }
3054  return contents.len;
3055 }
3056 
3057 template<class EventHandler>
3058 template<class FilterProcessor>
3059 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3060 {
3061  _c4dbgfb("contents_len={}", contents_len);
3062 
3063  _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3064 
3065  // extend contents to just before the first newline at the end,
3066  // in case it is preceded by spaces
3067  size_t firstnewl = proc.src.first_of('\n', contents_len);
3068  if(firstnewl != npos)
3069  {
3070  contents_len = firstnewl;
3071  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3072  }
3073  else
3074  {
3075  contents_len = proc.src.len;
3076  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3077  }
3078 
3079  return contents_len;
3080 }
3081 
3082 #undef _c4dbgfb
3083 
3084 
3085 //-----------------------------------------------------------------------------
3086 //-----------------------------------------------------------------------------
3087 //-----------------------------------------------------------------------------
3088 
3089 // a debugging scaffold:
3090 #if 0
3091 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3092 #else
3093 #define _c4dbgfbl(...)
3094 #endif
3095 
3096 template<class EventHandler>
3097 template<class FilterProcessor>
3098 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3099 {
3100  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3101 
3102  size_t contents_len = _handle_all_whitespace(proc, chomp);
3103  if(!contents_len)
3104  return proc.result();
3105 
3106  contents_len = _extend_to_chomp(proc, contents_len);
3107 
3108  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3109 
3110  _filter_block_indentation(proc, indentation);
3111 
3112  // now filter the bulk
3113  while(proc.has_more_chars(/*maxpos*/contents_len))
3114  {
3115  const char curr = proc.curr();
3116  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3117  switch(curr)
3118  {
3119  case '\n':
3120  {
3121  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3122  proc.copy(); // copy the newline
3123  _filter_block_indentation(proc, indentation);
3124  break;
3125  }
3126  case '\r':
3127  proc.skip();
3128  break;
3129  default:
3130  proc.copy();
3131  break;
3132  }
3133  }
3134 
3135  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3136 
3137  _filter_chomp(proc, chomp, indentation);
3138 
3139  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3140 
3141  return proc.result();
3142 }
3143 
3144 #undef _c4dbgfbl
3145 
3146 template<class EventHandler>
3147 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3148 {
3149  FilterProcessorSrcDst proc(scalar, dst);
3150  return _filter_block_literal(proc, indentation, chomp);
3151 }
3152 
3153 template<class EventHandler>
3154 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3155 {
3156  FilterProcessorInplaceEndExtending proc(scalar, cap);
3157  return _filter_block_literal(proc, indentation, chomp);
3158 }
3159 
3160 
3161 //-----------------------------------------------------------------------------
3162 //-----------------------------------------------------------------------------
3163 //-----------------------------------------------------------------------------
3164 
3165 // a debugging scaffold:
3166 #if 0
3167 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3168 #else
3169 #define _c4dbgfbf(...)
3170 #endif
3171 
3172 
3173 template<class EventHandler>
3174 template<class FilterProcessor>
3175 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3176 {
3177  _filter_block_indentation(proc, indentation);
3178  while(proc.has_more_chars(len))
3179  {
3180  const char curr = proc.curr();
3181  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3182  switch(curr)
3183  {
3184  case '\n':
3185  _c4dbgfbf("newline.", curr);
3186  proc.copy();
3187  _filter_block_indentation(proc, indentation);
3188  break;
3189  case '\r':
3190  proc.skip();
3191  break;
3192  case ' ':
3193  case '\t':
3194  {
3195  size_t first = proc.rem().first_not_of(" \t");
3196  _c4dbgfbf("space. first={}", first);
3197  if(first == npos)
3198  first = proc.rem().len;
3199  _c4dbgfbf("... indentation increased to {}", first);
3200  _filter_block_folded_indented_block(proc, indentation, len, first);
3201  break;
3202  }
3203  default:
3204  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3205  return;
3206  }
3207  }
3208 }
3209 
3210 template<class EventHandler>
3211 template<class FilterProcessor>
3212 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3213 {
3214  switch(num_newl)
3215  {
3216  case 1u:
3217  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3218  wpos_at_first_newl = proc.wpos;
3219  proc.skip();
3220  proc.set(' ');
3221  break;
3222  case 2u:
3223  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3224  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
3225  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3226  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3227  proc.skip();
3228  proc.set_at(wpos_at_first_newl, '\n');
3229  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3230  break;
3231  default:
3232  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3233  proc.copy();
3234  break;
3235  }
3236  return wpos_at_first_newl;
3237 }
3238 
3239 template<class EventHandler>
3240 template<class FilterProcessor>
3241 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3242 {
3243  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
3244  size_t num_newl = 0;
3245  size_t wpos_at_first_newl = npos;
3246  while(proc.has_more_chars(len))
3247  {
3248  const char curr = proc.curr();
3249  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3250  switch(curr)
3251  {
3252  case '\n':
3253  {
3254  _c4dbgfbf("newline. sofar={}", num_newl);
3255  // NOTE: vs2022-32bit-release builds were giving wrong
3256  // results in this block, if it was written as either
3257  // as a switch(num_newl) or its equivalent if-form.
3258  //
3259  // For this reason, we're using a dedicated function
3260  // (**_compress), which seems to work around the issue.
3261  //
3262  // The manifested problem was that somewhere between the
3263  // assignment to curr and this point, proc.wpos (the
3264  // write-position of the processor) jumped to npos, which
3265  // made the write wrap-around! To make things worse,
3266  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3267  // problem go away!
3268  //
3269  // The only way to make the problem appear with prints
3270  // enabled was by disabling all prints in this function
3271  // (including in the block which was moved to the compress
3272  // function) and then selectively enabling only some of
3273  // those prints.
3274  //
3275  // This may be due to some bug in the cl-x86 optimizer; or
3276  // it may be triggered by some UB which may be
3277  // inadvertedly present in this function or in the filter
3278  // processor. This is despite our best efforts to weed out
3279  // any such UB problem: neither clang-tidy nor none of the
3280  // sanitizers, or gcc's -fanalyzer pointed to any problems
3281  // in this code.
3282  //
3283  // In the end, moving this block to a separate function
3284  // was the only way to bury the problem. But it may
3285  // resurface again, as The Undead, rising to from the
3286  // grave to haunt us with his terrible presence.
3287  //
3288  // We may have to revisit this. With a stake, and lots of
3289  // garlic.
3290  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3291  _filter_block_indentation(proc, indentation);
3292  break;
3293  }
3294  case ' ':
3295  case '\t':
3296  {
3297  size_t first = proc.rem().first_not_of(" \t");
3298  _c4dbgfbf("space. first={}", first);
3299  if(first == npos)
3300  first = proc.rem().len;
3301  _c4dbgfbf("... indentation increased to {}", first);
3302  if(num_newl)
3303  {
3304  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3305  proc.set_at(wpos_at_first_newl, '\n');
3306  }
3307  if(num_newl > 1u)
3308  {
3309  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3310  proc.set('\n');
3311  }
3312  _filter_block_folded_indented_block(proc, indentation, len, first);
3313  num_newl = 0;
3314  wpos_at_first_newl = npos;
3315  break;
3316  }
3317  case '\r':
3318  proc.skip();
3319  break;
3320  default:
3321  _c4dbgfbf("not space, not newline. stop.", 0);
3322  return;
3323  }
3324  }
3325 }
3326 
3327 
3328 template<class EventHandler>
3329 template<class FilterProcessor>
3330 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3331 {
3332  _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3333  if(curr_indentation)
3334  proc.copy(curr_indentation);
3335  while(proc.has_more_chars(len))
3336  {
3337  const char curr = proc.curr();
3338  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3339  switch(curr)
3340  {
3341  case '\n':
3342  {
3343  proc.copy();
3344  _filter_block_indentation(proc, indentation);
3345  csubstr rem = proc.rem();
3346  const size_t first = rem.first_not_of(' ');
3347  _c4dbgfbf("newline. firstns={}", first);
3348  if(first == 0)
3349  {
3350  const char c = rem[first];
3351  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3352  if(c == '\n' || c == '\r')
3353  {
3354  ;
3355  }
3356  else
3357  {
3358  _c4dbgfbf("done with indented block", first);
3359  goto endloop;
3360  }
3361  }
3362  else if(first != npos)
3363  {
3364  proc.copy(first);
3365  _c4dbgfbf("copy all {} spaces", first);
3366  }
3367  break;
3368  }
3369  break;
3370  case '\r':
3371  proc.skip();
3372  break;
3373  default:
3374  proc.copy();
3375  break;
3376  }
3377  }
3378  endloop:
3379  return;
3380 }
3381 
3382 
3383 template<class EventHandler>
3384 template<class FilterProcessor>
3385 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3386 {
3387  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3388 
3389  size_t contents_len = _handle_all_whitespace(proc, chomp);
3390  if(!contents_len)
3391  return proc.result();
3392 
3393  contents_len = _extend_to_chomp(proc, contents_len);
3394 
3395  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3396 
3397  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3398 
3399  // now filter the bulk
3400  while(proc.has_more_chars(/*maxpos*/contents_len))
3401  {
3402  const char curr = proc.curr();
3403  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3404  switch(curr)
3405  {
3406  case '\n':
3407  {
3408  _c4dbgfbf("found newline", curr);
3409  _filter_block_folded_newlines(proc, indentation, contents_len);
3410  break;
3411  }
3412  case '\r':
3413  proc.skip();
3414  break;
3415  default:
3416  proc.copy();
3417  break;
3418  }
3419  }
3420 
3421  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3422 
3423  _filter_chomp(proc, chomp, indentation);
3424 
3425  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3426 
3427  return proc.result();
3428 }
3429 
3430 #undef _c4dbgfbf
3431 
3432 template<class EventHandler>
3433 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3434 {
3435  FilterProcessorSrcDst proc(scalar, dst);
3436  return _filter_block_folded(proc, indentation, chomp);
3437 }
3438 
3439 template<class EventHandler>
3440 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3441 {
3442  FilterProcessorInplaceEndExtending proc(scalar, cap);
3443  return _filter_block_folded(proc, indentation, chomp);
3444 }
3445 
3446 
3447 //-----------------------------------------------------------------------------
3448 //-----------------------------------------------------------------------------
3449 //-----------------------------------------------------------------------------
3450 
3451 template<class EventHandler>
3452 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3453 {
3454  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3455  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3456  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3457  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3458  return r.get();
3459 }
3460 
3461 //-----------------------------------------------------------------------------
3462 
3463 template<class EventHandler>
3464 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3465 {
3466  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3467  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3468  _RYML_CB_ASSERT(this->callbacks(), r.valid());
3469  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3470  return r.get();
3471 }
3472 
3473 
3474 //-----------------------------------------------------------------------------
3475 
3476 template<class EventHandler>
3477 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3478 {
3479  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3480  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3481  if(C4_LIKELY(r.valid()))
3482  {
3483  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3484  return r.get();
3485  }
3486  else
3487  {
3488  const size_t len = r.required_len();
3489  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3490  substr dst = m_evt_handler->alloc_arena(len, &s);
3491  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3492  _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3493  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3494  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3495  _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3496  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3497  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3498  return rsd.get();
3499  }
3500 }
3501 
3502 
3503 //-----------------------------------------------------------------------------
3504 template<class EventHandler>
3505 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3506 {
3507  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3508  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3509  if(C4_LIKELY(r.valid()))
3510  {
3511  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3512  return r.get();
3513  }
3514  else
3515  {
3516  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3517  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3518  FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3519  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3520  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3521  return rsd.get();
3522  }
3523 }
3524 
3525 
3526 //-----------------------------------------------------------------------------
3527 template<class EventHandler>
3528 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3529 {
3530  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3531  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3532  if(C4_LIKELY(r.valid()))
3533  {
3534  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3535  return r.get();
3536  }
3537  else
3538  {
3539  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3540  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3541  FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3542  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3543  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3544  return rsd.get();
3545  }
3546 }
3547 
3548 
3549 //-----------------------------------------------------------------------------
3550 
3551 template<class EventHandler>
3552 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3553 {
3554  if(sc.needs_filter)
3555  {
3556  if(m_options.scalar_filtering())
3557  {
3558  return _filter_scalar_plain(sc.scalar, indentation);
3559  }
3560  else
3561  {
3562  _c4dbgp("plain scalar left unfiltered");
3563  m_evt_handler->mark_key_scalar_unfiltered();
3564  }
3565  }
3566  else
3567  {
3568  _c4dbgp("plain scalar doesn't need filtering");
3569  }
3570  return sc.scalar;
3571 }
3572 
3573 template<class EventHandler>
3574 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3575 {
3576  if(sc.needs_filter)
3577  {
3578  if(m_options.scalar_filtering())
3579  {
3580  return _filter_scalar_plain(sc.scalar, indentation);
3581  }
3582  else
3583  {
3584  _c4dbgp("plain scalar left unfiltered");
3585  m_evt_handler->mark_val_scalar_unfiltered();
3586  }
3587  }
3588  else
3589  {
3590  _c4dbgp("plain scalar doesn't need filtering");
3591  }
3592  return sc.scalar;
3593 }
3594 
3595 
3596 //-----------------------------------------------------------------------------
3597 
3598 template<class EventHandler>
3599 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3600 {
3601  if(sc.needs_filter)
3602  {
3603  if(m_options.scalar_filtering())
3604  {
3605  return _filter_scalar_squot(sc.scalar);
3606  }
3607  else
3608  {
3609  _c4dbgp("squo key scalar left unfiltered");
3610  m_evt_handler->mark_key_scalar_unfiltered();
3611  }
3612  }
3613  else
3614  {
3615  _c4dbgp("squo key scalar doesn't need filtering");
3616  }
3617  return sc.scalar;
3618 }
3619 
3620 template<class EventHandler>
3621 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3622 {
3623  if(sc.needs_filter)
3624  {
3625  if(m_options.scalar_filtering())
3626  {
3627  return _filter_scalar_squot(sc.scalar);
3628  }
3629  else
3630  {
3631  _c4dbgp("squo val scalar left unfiltered");
3632  m_evt_handler->mark_val_scalar_unfiltered();
3633  }
3634  }
3635  else
3636  {
3637  _c4dbgp("squo val scalar doesn't need filtering");
3638  }
3639  return sc.scalar;
3640 }
3641 
3642 
3643 //-----------------------------------------------------------------------------
3644 
3645 template<class EventHandler>
3646 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3647 {
3648  if(sc.needs_filter)
3649  {
3650  if(m_options.scalar_filtering())
3651  {
3652  return _filter_scalar_dquot(sc.scalar);
3653  }
3654  else
3655  {
3656  _c4dbgp("dquo scalar left unfiltered");
3657  m_evt_handler->mark_key_scalar_unfiltered();
3658  }
3659  }
3660  else
3661  {
3662  _c4dbgp("dquo scalar doesn't need filtering");
3663  }
3664  return sc.scalar;
3665 }
3666 
3667 template<class EventHandler>
3668 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3669 {
3670  if(sc.needs_filter)
3671  {
3672  if(m_options.scalar_filtering())
3673  {
3674  return _filter_scalar_dquot(sc.scalar);
3675  }
3676  else
3677  {
3678  _c4dbgp("dquo scalar left unfiltered");
3679  m_evt_handler->mark_val_scalar_unfiltered();
3680  }
3681  }
3682  else
3683  {
3684  _c4dbgp("dquo scalar doesn't need filtering");
3685  }
3686  return sc.scalar;
3687 }
3688 
3689 
3690 //-----------------------------------------------------------------------------
3691 
3692 template<class EventHandler>
3693 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3694 {
3695  if(m_options.scalar_filtering())
3696  {
3697  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3698  }
3699  else
3700  {
3701  _c4dbgp("literal scalar left unfiltered");
3702  m_evt_handler->mark_key_scalar_unfiltered();
3703  }
3704  return sb.scalar;
3705 }
3706 
3707 template<class EventHandler>
3708 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3709 {
3710  if(m_options.scalar_filtering())
3711  {
3712  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3713  }
3714  else
3715  {
3716  _c4dbgp("literal scalar left unfiltered");
3717  m_evt_handler->mark_val_scalar_unfiltered();
3718  }
3719  return sb.scalar;
3720 }
3721 
3722 
3723 //-----------------------------------------------------------------------------
3724 
3725 template<class EventHandler>
3726 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3727 {
3728  if(m_options.scalar_filtering())
3729  {
3730  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3731  }
3732  else
3733  {
3734  _c4dbgp("folded scalar left unfiltered");
3735  m_evt_handler->mark_key_scalar_unfiltered();
3736  }
3737  return sb.scalar;
3738 }
3739 
3740 template<class EventHandler>
3741 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3742 {
3743  if(m_options.scalar_filtering())
3744  {
3745  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3746  }
3747  else
3748  {
3749  _c4dbgp("folded scalar left unfiltered");
3750  m_evt_handler->mark_val_scalar_unfiltered();
3751  }
3752  return sb.scalar;
3753 }
3754 
3755 
3756 //-----------------------------------------------------------------------------
3757 //-----------------------------------------------------------------------------
3758 //-----------------------------------------------------------------------------
3759 
3760 #ifdef RYML_DBG // !!! <----------------------------------
3761 
3762 template<class EventHandler>
3763 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3764 {
3765  char buf1_[64], buf2_[64], buf3_[64];
3766  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3767  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3768  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3769  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3770  s->flags |= on;
3771 }
3772 
3773 template<class EventHandler>
3774 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3775 {
3776  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3777  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3778  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3779  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3780  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3781  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3782  s->flags |= on;
3783  s->flags &= ~off;
3784 }
3785 
3786 template<class EventHandler>
3787 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3788 {
3789  char buf1_[64], buf2_[64], buf3_[64];
3790  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3791  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3792  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3793  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3794  s->flags &= ~off;
3795 }
3796 
3797 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3798 {
3799  size_t pos = 0;
3800  bool gotone = false;
3801 
3802  #define _prflag(fl) \
3803  if((flags & fl) == (fl)) \
3804  { \
3805  if(gotone) \
3806  { \
3807  if(pos + 1 < buf.len) \
3808  buf[pos] = '|'; \
3809  ++pos; \
3810  } \
3811  csubstr fltxt = #fl; \
3812  if(pos + fltxt.len <= buf.len) \
3813  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3814  pos += fltxt.len; \
3815  gotone = true; \
3816  }
3817 
3818  _prflag(RTOP);
3819  _prflag(RUNK);
3820  _prflag(RMAP);
3821  _prflag(RSEQ);
3822  _prflag(FLOW);
3823  _prflag(BLCK);
3824  _prflag(QMRK);
3825  _prflag(RKEY);
3826  _prflag(RVAL);
3827  _prflag(RKCL);
3828  _prflag(RNXT);
3829  _prflag(SSCL);
3830  _prflag(QSCL);
3831  _prflag(RSET);
3832  _prflag(RDOC);
3833  _prflag(NDOC);
3834  _prflag(USTY);
3835  _prflag(RSEQIMAP);
3836 
3837  #undef _prflag
3838 
3839  if(pos == 0)
3840  if(buf.len > 0)
3841  buf[pos++] = '0';
3842 
3843  RYML_CHECK(pos <= buf.len);
3844 
3845  return buf.first(pos);
3846 }
3847 
3848 #endif // RYML_DBG !!! <----------------------------------
3849 
3850 
3851 //-----------------------------------------------------------------------------
3852 //-----------------------------------------------------------------------------
3853 //-----------------------------------------------------------------------------
3854 
3855 template<class EventHandler>
3856 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
3857 {
3858  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3859  return m_buf.sub(loc.offset);
3860 }
3861 
3862 template<class EventHandler>
3863 Location ParseEngine<EventHandler>::location(ConstNodeRef node) const
3864 {
3865  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
3866  return location(*node.tree(), node.id());
3867 }
3868 
3869 template<class EventHandler>
3870 Location ParseEngine<EventHandler>::location(Tree const& tree, id_type node) const
3871 {
3872  // try hard to avoid getting the location from a null string.
3873  Location loc;
3874  if(_location_from_node(tree, node, &loc, 0))
3875  return loc;
3876  return val_location(m_buf.str);
3877 }
3878 
3879 template<class EventHandler>
3880 bool ParseEngine<EventHandler>::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const
3881 {
3882  if(tree.has_key(node))
3883  {
3884  csubstr k = tree.key(node);
3885  if(C4_LIKELY(k.str != nullptr))
3886  {
3887  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3888  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3889  *loc = val_location(k.str);
3890  return true;
3891  }
3892  }
3893 
3894  if(tree.has_val(node))
3895  {
3896  csubstr v = tree.val(node);
3897  if(C4_LIKELY(v.str != nullptr))
3898  {
3899  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3900  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3901  *loc = val_location(v.str);
3902  return true;
3903  }
3904  }
3905 
3906  if(tree.is_container(node))
3907  {
3908  if(_location_from_cont(tree, node, loc))
3909  return true;
3910  }
3911 
3912  if(tree.type(node) != NOTYPE && level == 0)
3913  {
3914  // try the prev sibling
3915  {
3916  const id_type prev = tree.prev_sibling(node);
3917  if(prev != NONE)
3918  {
3919  if(_location_from_node(tree, prev, loc, level+1))
3920  return true;
3921  }
3922  }
3923  // try the next sibling
3924  {
3925  const id_type next = tree.next_sibling(node);
3926  if(next != NONE)
3927  {
3928  if(_location_from_node(tree, next, loc, level+1))
3929  return true;
3930  }
3931  }
3932  // try the parent
3933  {
3934  const id_type parent = tree.parent(node);
3935  if(parent != NONE)
3936  {
3937  if(_location_from_node(tree, parent, loc, level+1))
3938  return true;
3939  }
3940  }
3941  }
3942 
3943  return false;
3944 }
3945 
3946 template<class EventHandler>
3947 bool ParseEngine<EventHandler>::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const
3948 {
3949  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3950  if(!tree.is_stream(node))
3951  {
3952  const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container
3953  if(tree.has_children(node))
3954  {
3955  id_type child = tree.first_child(node);
3956  if(tree.has_key(child))
3957  {
3958  // when a map starts, the container was set after the key
3959  csubstr k = tree.key(child);
3960  if(k.str && node_start > k.str)
3961  node_start = k.str;
3962  }
3963  }
3964  *loc = val_location(node_start);
3965  return true;
3966  }
3967  else // it's a stream
3968  {
3969  *loc = val_location(m_buf.str); // just return the front of the buffer
3970  }
3971  return true;
3972 }
3973 
3974 
3975 template<class EventHandler>
3976 Location ParseEngine<EventHandler>::val_location(const char *val) const
3977 {
3978  if(C4_UNLIKELY(val == nullptr))
3979  return {m_file, 0, 0, 0};
3980  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3981  // NOTE: if any of these checks fails, the parser needs to be
3982  // instantiated with locations enabled.
3983  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3984  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3985  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3986  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3987  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
3988  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3989  // NOTE: the pointer needs to belong to the buffer that was used to parse.
3990  csubstr src = m_buf;
3991  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
3992  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
3993  // ok. search the first stored newline after the given ptr
3994  using lineptr_type = size_t const* C4_RESTRICT;
3995  lineptr_type lineptr = nullptr;
3996  size_t offset = (size_t)(val - src.begin());
3997  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
3998  {
3999  // just do a linear search if the size is small.
4000  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4001  {
4002  if(*curr > offset)
4003  {
4004  lineptr = curr;
4005  break;
4006  }
4007  }
4008  }
4009  else
4010  {
4011  // do a bisection search if the size is not small.
4012  //
4013  // We could use std::lower_bound but this is simple enough and
4014  // spares the costly include of <algorithm>.
4015  size_t count = m_newline_offsets_size;
4016  size_t step;
4017  lineptr_type it;
4018  lineptr = m_newline_offsets;
4019  while(count)
4020  {
4021  step = count >> 1;
4022  it = lineptr + step;
4023  if(*it < offset)
4024  {
4025  lineptr = ++it;
4026  count -= step + 1;
4027  }
4028  else
4029  {
4030  count = step;
4031  }
4032  }
4033  }
4034  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4035  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4036  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4037  Location loc;
4038  loc.name = m_file;
4039  loc.offset = offset;
4040  loc.line = (size_t)(lineptr - m_newline_offsets);
4041  if(lineptr > m_newline_offsets)
4042  loc.col = (offset - *(lineptr-1) - 1u);
4043  else
4044  loc.col = offset;
4045  return loc;
4046 }
4047 
4048 template<class EventHandler>
4049 void ParseEngine<EventHandler>::_prepare_locations()
4050 {
4051  m_newline_offsets_buf = m_buf;
4052  size_t numnewlines = 1u + m_buf.count('\n');
4053  _resize_locations(numnewlines);
4054  m_newline_offsets_size = 0;
4055  for(size_t i = 0; i < m_buf.len; i++)
4056  if(m_buf[i] == '\n')
4057  m_newline_offsets[m_newline_offsets_size++] = i;
4058  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4059  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4060 }
4061 
4062 template<class EventHandler>
4063 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4064 {
4065  if(numnewlines > m_newline_offsets_capacity)
4066  {
4067  if(m_newline_offsets)
4068  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4069  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4070  m_newline_offsets_capacity = numnewlines;
4071  }
4072 }
4073 
4074 template<class EventHandler>
4075 bool ParseEngine<EventHandler>::_locations_dirty() const
4076 {
4077  return !m_newline_offsets_size;
4078 }
4079 
4080 
4081 //-----------------------------------------------------------------------------
4082 //-----------------------------------------------------------------------------
4083 //-----------------------------------------------------------------------------
4084 
4085 template<class EventHandler>
4086 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4087 {
4088  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4089  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4090  {
4091  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4092  {
4093  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4094  _skipchars(" \t");
4095  }
4096  // comments
4097  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4098  {
4099  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4100  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4101  }
4102  }
4103 }
4104 
4105 
4106 //-----------------------------------------------------------------------------
4107 
4108 
4109 template<class EventHandler>
4110 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4111 {
4112  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4113  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) // NOLINT(bugprone-sizeof-expression)
4114  _c4err("too many annotations");
4115  dst->annotations[dst->num_entries].str = str;
4116  dst->annotations[dst->num_entries].indentation = indentation;
4117  dst->annotations[dst->num_entries].line = line;
4118  ++dst->num_entries;
4119 }
4120 
4121 template<class EventHandler>
4122 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4123 {
4124  dst->num_entries = 0;
4125 }
4126 
4127 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4128 template<class EventHandler>
4129 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4130 {
4131  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4132  {
4133  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4134  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4135  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4136  size_t to_skip = m_evt_handler->m_curr->indref;
4137  if(m_pending_anchors.num_entries)
4138  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4139  if(m_pending_tags.num_entries)
4140  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4141  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4142  _maybe_skipchars_up_to(' ', to_skip);
4143  return true;
4144  }
4145  return false;
4146 }
4147 #endif
4148 
4149 template<class EventHandler>
4150 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4151 {
4152  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4153 }
4154 
4155 template<class EventHandler>
4156 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4157 {
4158  if(!tag.begins_with("!<"))
4159  {
4160  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4161  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4162  }
4163  else
4164  {
4165  if(C4_UNLIKELY(!tag.ends_with('>')))
4166  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos);
4167  }
4168 }
4169 
4170 template<class EventHandler>
4171 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4172 {
4173  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4174  if(m_pending_tags.num_entries)
4175  {
4176  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4177  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4178  {
4179  _check_tag(m_pending_tags.annotations[0].str);
4180  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4181  _clear_annotations(&m_pending_tags);
4182  }
4183  else
4184  {
4185  _c4err("too many tags");
4186  }
4187  }
4188  if(m_pending_anchors.num_entries)
4189  {
4190  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4191  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4192  {
4193  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4194  _clear_annotations(&m_pending_anchors);
4195  }
4196  else
4197  {
4198  _c4err("too many anchors");
4199  }
4200  }
4201 }
4202 
4203 template<class EventHandler>
4204 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4205 {
4206  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4207  if(m_pending_tags.num_entries)
4208  {
4209  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4210  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4211  {
4212  _check_tag(m_pending_tags.annotations[0].str);
4213  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4214  _clear_annotations(&m_pending_tags);
4215  }
4216  else
4217  {
4218  _c4err("too many tags");
4219  }
4220  }
4221  if(m_pending_anchors.num_entries)
4222  {
4223  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4224  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4225  {
4226  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4227  _clear_annotations(&m_pending_anchors);
4228  }
4229  else
4230  {
4231  _c4err("too many anchors");
4232  }
4233  }
4234 }
4235 
4236 template<class EventHandler>
4237 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4238 {
4239  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4240  if(m_pending_tags.num_entries == 2)
4241  {
4242  _c4dbgp("2 tags, setting entry 0");
4243  _check_tag(m_pending_tags.annotations[0].str);
4244  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4245  }
4246  else if(m_pending_tags.num_entries == 1)
4247  {
4248  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4249  if(m_pending_tags.annotations[0].line < current_line)
4250  {
4251  _c4dbgp("...tag is for the map. setting it.");
4252  _check_tag(m_pending_tags.annotations[0].str);
4253  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4254  _clear_annotations(&m_pending_tags);
4255  }
4256  }
4257  //
4258  if(m_pending_anchors.num_entries == 2)
4259  {
4260  _c4dbgp("2 anchors, setting entry 0");
4261  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4262  }
4263  else if(m_pending_anchors.num_entries == 1)
4264  {
4265  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4266  if(m_pending_anchors.annotations[0].line < current_line)
4267  {
4268  _c4dbgp("...anchor is for the map. setting it.");
4269  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4270  _clear_annotations(&m_pending_anchors);
4271  }
4272  }
4273 }
4274 
4275 template<class EventHandler>
4276 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4277 {
4278  _c4dbgp("annotations_before_start_mapblck_as_key");
4279  if(m_pending_tags.num_entries == 2)
4280  {
4281  _check_tag(m_pending_tags.annotations[0].str);
4282  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4283  }
4284  if(m_pending_anchors.num_entries == 2)
4285  {
4286  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4287  }
4288 }
4289 
4290 template<class EventHandler>
4291 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4292 {
4293  _c4dbgp("annotations_after_start_mapblck");
4294  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4295  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4296  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4297  {
4298  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4299  switch(m_pending_tags.num_entries)
4300  {
4301  case 1u:
4302  _check_tag(m_pending_tags.annotations[0].str);
4303  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4304  _clear_annotations(&m_pending_tags);
4305  break;
4306  case 2u:
4307  _check_tag(m_pending_tags.annotations[1].str);
4308  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4309  _clear_annotations(&m_pending_tags);
4310  break;
4311  }
4312  switch(m_pending_anchors.num_entries)
4313  {
4314  case 1u:
4315  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4316  _clear_annotations(&m_pending_anchors);
4317  break;
4318  case 2u:
4319  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4320  _clear_annotations(&m_pending_anchors);
4321  break;
4322  }
4323  }
4324  _set_indentation(key_indentation);
4325 }
4326 
4327 template<class EventHandler>
4328 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4329 {
4330  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4331  // select the left-most annotation on the max line
4332  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4333  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4334  {
4335  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4336  if(ann.line > curr->line)
4337  curr = &ann;
4338  else if(ann.indentation < curr->indentation)
4339  curr = &ann;
4340  }
4341  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4342  {
4343  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4344  if(ann.line > curr->line)
4345  curr = &ann;
4346  else if(ann.indentation < curr->indentation)
4347  curr = &ann;
4348  }
4349  return curr->line < val_line ? val_indentation : curr->indentation;
4350 }
4351 
4352 template<class EventHandler>
4353 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4354 {
4355  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4356  const size_t pos = rem.find('#');
4357  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4358  if(pos == npos) // no comments
4359  {
4360  m_evt_handler->add_directive(rem);
4361  _line_progressed(rem.len);
4362  }
4363  else
4364  {
4365  csubstr to_comment = rem.first(pos);
4366  csubstr trimmed = to_comment.trimr(" \t");
4367  m_evt_handler->add_directive(trimmed);
4368  _line_progressed(pos);
4369  _skip_comment();
4370  }
4371 }
4372 
4373 template<class EventHandler>
4374 bool ParseEngine<EventHandler>::_handle_bom()
4375 {
4376  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4377  if(rem.len)
4378  {
4379  const csubstr rest = rem.sub(1);
4380  // https://yaml.org/spec/1.2.2/#52-character-encodings
4381  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4382  if(rem.begins_with({"\x00\x00\xfe\xff", 4}) || (rem.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4383  {
4384  _c4dbgp("byte order mark: UTF32BE");
4385  _handle_bom(UTF32BE);
4386  _line_progressed(4);
4387  return true;
4388  }
4389  else if(rem.begins_with("\xff\xfe\x00\x00") || (rest.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4390  {
4391  _c4dbgp("byte order mark: UTF32LE");
4392  _handle_bom(UTF32LE);
4393  _line_progressed(4);
4394  return true;
4395  }
4396  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4397  {
4398  _c4dbgp("byte order mark: UTF16BE");
4399  _handle_bom(UTF16BE);
4400  _line_progressed(2);
4401  return true;
4402  }
4403  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4404  {
4405  _c4dbgp("byte order mark: UTF16LE");
4406  _handle_bom(UTF16LE);
4407  _line_progressed(2);
4408  return true;
4409  }
4410  else if(rem.begins_with("\xef\xbb\xbf"))
4411  {
4412  _c4dbgp("byte order mark: UTF8");
4413  _handle_bom(UTF8);
4414  _line_progressed(3);
4415  return true;
4416  }
4417  #undef _rymlisascii
4418  }
4419  return false;
4420 }
4421 
4422 template<class EventHandler>
4423 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4424 {
4425  if(m_encoding == NOBOM)
4426  {
4427  const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
4428  if(enc == UTF8 || is_beginning_of_file)
4429  m_encoding = enc;
4430  else
4431  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4432  }
4433  else if(enc != m_encoding)
4434  {
4435  _c4err("byte order mark can only be set once");
4436  }
4437 }
4438 
4439 
4440 //-----------------------------------------------------------------------------
4441 
4442 template<class EventHandler>
4443 void ParseEngine<EventHandler>::_handle_seq_json()
4444 {
4445 seqjson_start:
4446  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4447 
4448  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4449  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4450  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4451  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4452  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4453 
4454  _handle_flow_skip_whitespace();
4455  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4456  if(!rem.len)
4457  goto seqjson_again;
4458 
4459  if(has_any(RVAL))
4460  {
4461  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4462  const char first = rem.str[0];
4463  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4464  switch(first)
4465  {
4466  case '"':
4467  {
4468  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4469  ScannedScalar sc = _scan_scalar_dquot();
4470  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4471  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4472  addrem_flags(RNXT, RVAL);
4473  break;
4474  }
4475  case '[':
4476  {
4477  _c4dbgp("seqjson[RVAL]: start child seqjson");
4478  addrem_flags(RNXT, RVAL);
4479  m_evt_handler->begin_seq_val_flow();
4480  addrem_flags(RVAL, RNXT);
4481  _line_progressed(1);
4482  break;
4483  }
4484  case '{':
4485  {
4486  _c4dbgp("seqjson[RVAL]: start child mapjson");
4487  addrem_flags(RNXT, RVAL);
4488  m_evt_handler->begin_map_val_flow();
4489  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4490  _line_progressed(1);
4491  goto seqjson_finish;
4492  }
4493  case ']': // this happens on a trailing comma like ", ]"
4494  {
4495  _c4dbgp("seqjson[RVAL]: end!");
4496  rem_flags(RSEQ);
4497  m_evt_handler->end_seq();
4498  _line_progressed(1);
4499  if(!has_all(RSEQ|FLOW))
4500  goto seqjson_finish;
4501  break;
4502  }
4503  default:
4504  {
4505  ScannedScalar sc;
4506  if(_scan_scalar_seq_json(&sc))
4507  {
4508  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4509  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4510  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4511  addrem_flags(RNXT, RVAL);
4512  }
4513  else
4514  {
4515  _c4err("parse error");
4516  }
4517  }
4518  }
4519  }
4520  else // RNXT
4521  {
4522  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4523  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4524  const char first = rem.str[0];
4525  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4526  switch(first)
4527  {
4528  case ',':
4529  {
4530  _c4dbgp("seqjson[RNXT]: expect next val");
4531  addrem_flags(RVAL, RNXT);
4532  m_evt_handler->add_sibling();
4533  _line_progressed(1);
4534  break;
4535  }
4536  case ']':
4537  {
4538  _c4dbgp("seqjson[RNXT]: end!");
4539  m_evt_handler->end_seq();
4540  _line_progressed(1);
4541  goto seqjson_finish;
4542  }
4543  default:
4544  _c4err("parse error");
4545  }
4546  }
4547 
4548  seqjson_again:
4549  _c4dbgt("seqjson: go again", 0);
4550  if(_finished_line())
4551  {
4552  if(C4_LIKELY(!_finished_file()))
4553  {
4554  _line_ended();
4555  _scan_line();
4556  _c4dbgnextline();
4557  }
4558  else
4559  {
4560  _c4err("missing terminating ]");
4561  }
4562  }
4563  goto seqjson_start;
4564 
4565  seqjson_finish:
4566  _c4dbgp("seqjson: finish");
4567 }
4568 
4569 
4570 //-----------------------------------------------------------------------------
4571 
4572 template<class EventHandler>
4573 void ParseEngine<EventHandler>::_handle_map_json()
4574 {
4575 mapjson_start:
4576  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4577 
4578  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4579  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4580  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4581  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4582  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4583 
4584  _handle_flow_skip_whitespace();
4585  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4586  if(!rem.len)
4587  goto mapjson_again;
4588 
4589  if(has_any(RKEY))
4590  {
4591  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4592  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4593  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4594  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4595  const char first = rem.str[0];
4596  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4597  switch(first)
4598  {
4599  case '"':
4600  {
4601  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4602  ScannedScalar sc = _scan_scalar_dquot();
4603  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4604  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4605  addrem_flags(RKCL, RKEY);
4606  break;
4607  }
4608  case '}': // this happens on a trailing comma like ", }"
4609  {
4610  _c4dbgp("mapjson[RKEY]: end!");
4611  m_evt_handler->end_map();
4612  _line_progressed(1);
4613  goto mapjson_finish;
4614  }
4615  default:
4616  _c4err("parse error");
4617  }
4618  }
4619  else if(has_any(RVAL))
4620  {
4621  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4622  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4623  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4624  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4625  const char first = rem.str[0];
4626  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4627  switch(first)
4628  {
4629  case '"':
4630  {
4631  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4632  ScannedScalar sc = _scan_scalar_dquot();
4633  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4634  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4635  addrem_flags(RNXT, RVAL);
4636  break;
4637  }
4638  case '[':
4639  {
4640  _c4dbgp("mapjson[RVAL]: start val seqjson");
4641  addrem_flags(RNXT, RVAL);
4642  m_evt_handler->begin_seq_val_flow();
4643  _set_indentation(m_evt_handler->m_parent->indref);
4644  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4645  _line_progressed(1);
4646  goto mapjson_finish;
4647  }
4648  case '{':
4649  {
4650  _c4dbgp("mapjson[RVAL]: start val mapjson");
4651  addrem_flags(RNXT, RVAL);
4652  m_evt_handler->begin_map_val_flow();
4653  _set_indentation(m_evt_handler->m_parent->indref);
4654  addrem_flags(RKEY, RNXT);
4655  _line_progressed(1);
4656  // keep going in this function
4657  break;
4658  }
4659  default:
4660  {
4661  ScannedScalar sc;
4662  if(_scan_scalar_map_json(&sc))
4663  {
4664  _c4dbgp("mapjson[RVAL]: plain scalar.");
4665  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4666  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4667  addrem_flags(RNXT, RVAL);
4668  }
4669  else
4670  {
4671  _c4err("parse error");
4672  }
4673  break;
4674  }
4675  }
4676  }
4677  else if(has_any(RKCL)) // read the key colon
4678  {
4679  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4680  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4681  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4682  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4683  const char first = rem.str[0];
4684  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4685  if(first == ':')
4686  {
4687  _c4dbgp("mapjson[RKCL]: found the colon");
4688  addrem_flags(RVAL, RKCL);
4689  _line_progressed(1);
4690  }
4691  else
4692  {
4693  _c4err("parse error");
4694  }
4695  }
4696  else if(has_any(RNXT))
4697  {
4698  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4699  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4700  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4701  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4702  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4703  if(rem.begins_with(','))
4704  {
4705  _c4dbgp("mapjson[RNXT]: expect next keyval");
4706  m_evt_handler->add_sibling();
4707  addrem_flags(RKEY, RNXT);
4708  _line_progressed(1);
4709  }
4710  else if(rem.begins_with('}'))
4711  {
4712  _c4dbgp("mapjson[RNXT]: end!");
4713  m_evt_handler->end_map();
4714  _line_progressed(1);
4715  goto mapjson_finish;
4716  }
4717  else
4718  {
4719  _c4err("parse error");
4720  }
4721  }
4722 
4723  mapjson_again:
4724  _c4dbgt("mapjson: go again", 0);
4725  if(_finished_line())
4726  {
4727  if(C4_LIKELY(!_finished_file()))
4728  {
4729  _line_ended();
4730  _scan_line();
4731  _c4dbgnextline();
4732  }
4733  else
4734  {
4735  _c4err("missing terminating }");
4736  }
4737  }
4738  goto mapjson_start;
4739 
4740  mapjson_finish:
4741  _c4dbgp("mapjson: finish");
4742 }
4743 
4744 
4745 //-----------------------------------------------------------------------------
4746 
4747 template<class EventHandler>
4748 void ParseEngine<EventHandler>::_handle_seq_imap()
4749 {
4750 seqimap_start:
4751  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4752 
4753  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4754  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4755  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4756  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4757  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4758 
4759  _handle_flow_skip_whitespace();
4760  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4761  if(!rem.len)
4762  goto seqimap_again;
4763 
4764  if(has_any(RVAL))
4765  {
4766  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4767  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4768  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4769  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4770  const char first = rem.str[0];
4771  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4772  ScannedScalar sc;
4773  if(first == '\'')
4774  {
4775  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4776  sc = _scan_scalar_squot();
4777  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4778  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4779  m_evt_handler->end_map();
4780  goto seqimap_finish;
4781  }
4782  else if(first == '"')
4783  {
4784  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4785  sc = _scan_scalar_dquot();
4786  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4787  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4788  m_evt_handler->end_map();
4789  goto seqimap_finish;
4790  }
4791  // block scalars (ie | and >) cannot appear in flow containers
4792  else if(_scan_scalar_plain_map_flow(&sc))
4793  {
4794  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4795  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4796  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4797  m_evt_handler->end_map();
4798  goto seqimap_finish;
4799  }
4800  else if(first == '[')
4801  {
4802  _c4dbgp("seqimap[RVAL]: start child seqflow");
4803  addrem_flags(RNXT, RVAL);
4804  m_evt_handler->begin_seq_val_flow();
4805  addrem_flags(RVAL, RNXT|RSEQIMAP);
4806  _set_indentation(m_evt_handler->m_parent->indref);
4807  _line_progressed(1);
4808  goto seqimap_finish;
4809  }
4810  else if(first == '{')
4811  {
4812  _c4dbgp("seqimap[RVAL]: start child mapflow");
4813  addrem_flags(RNXT, RVAL);
4814  m_evt_handler->begin_map_val_flow();
4815  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4816  _set_indentation(m_evt_handler->m_parent->indref);
4817  _line_progressed(1);
4818  goto seqimap_finish;
4819  }
4820  else if(first == ',' || first == ']')
4821  {
4822  _c4dbgp("seqimap[RVAL]: finish without val.");
4823  m_evt_handler->set_val_scalar_plain_empty();
4824  m_evt_handler->end_map();
4825  goto seqimap_finish;
4826  }
4827  else if(first == '&')
4828  {
4829  csubstr anchor = _scan_anchor();
4830  _c4dbgp("seqimap[RVAL]: anchor!");
4831  m_evt_handler->set_val_anchor(anchor);
4832  }
4833  else if(first == '*')
4834  {
4835  csubstr ref = _scan_ref_seq();
4836  _c4dbgp("seqimap[RVAL]: ref!");
4837  m_evt_handler->set_val_ref(ref);
4838  addrem_flags(RNXT, RVAL);
4839  }
4840  else
4841  {
4842  _c4err("parse error");
4843  }
4844  }
4845  else if(has_any(RNXT))
4846  {
4847  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4848  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4849  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4850  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4851  const char first = rem.str[0];
4852  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4853  if(first == ',' || first == ']')
4854  {
4855  // we may get here because a map or a seq started and we
4856  // return later
4857  _c4dbgp("seqimap: done");
4858  m_evt_handler->end_map();
4859  goto seqimap_finish;
4860  }
4861  else
4862  {
4863  _c4err("parse error");
4864  }
4865  }
4866  else if(has_any(QMRK))
4867  {
4868  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4869  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4870  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4871  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4872  const char first = rem.str[0];
4873  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4874  ScannedScalar sc;
4875  if(first == '\'')
4876  {
4877  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4878  sc = _scan_scalar_squot();
4879  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4880  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4881  addrem_flags(RKCL, QMRK);
4882  goto seqimap_again;
4883  }
4884  else if(first == '"')
4885  {
4886  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4887  sc = _scan_scalar_dquot();
4888  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4889  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4890  addrem_flags(RKCL, QMRK);
4891  goto seqimap_again;
4892  }
4893  // block scalars (ie | and >) cannot appear in flow containers
4894  else if(_scan_scalar_plain_map_flow(&sc))
4895  {
4896  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4897  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4898  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4899  addrem_flags(RKCL, QMRK);
4900  goto seqimap_again;
4901  }
4902  else if(first == '[')
4903  {
4904  _c4dbgp("seqimap[QMRK]: start child seqflow");
4905  addrem_flags(RKCL, QMRK);
4906  m_evt_handler->begin_seq_key_flow();
4907  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4908  _set_indentation(m_evt_handler->m_parent->indref);
4909  _line_progressed(1);
4910  goto seqimap_finish;
4911  }
4912  else if(first == '{')
4913  {
4914  _c4dbgp("seqimap[QMRK]: start child mapflow");
4915  addrem_flags(RKCL, QMRK);
4916  m_evt_handler->begin_map_key_flow();
4917  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4918  _set_indentation(m_evt_handler->m_parent->indref);
4919  _line_progressed(1);
4920  goto seqimap_finish;
4921  }
4922  else if(first == ',' || first == ']')
4923  {
4924  _c4dbgp("seqimap[QMRK]: finish without key.");
4925  m_evt_handler->set_key_scalar_plain_empty();
4926  m_evt_handler->set_val_scalar_plain_empty();
4927  m_evt_handler->end_map();
4928  goto seqimap_finish;
4929  }
4930  else if(first == '&')
4931  {
4932  csubstr anchor = _scan_anchor();
4933  _c4dbgp("seqimap[QMRK]: anchor!");
4934  m_evt_handler->set_key_anchor(anchor);
4935  }
4936  else if(first == '*')
4937  {
4938  csubstr ref = _scan_ref_seq();
4939  _c4dbgp("seqimap[QMRK]: ref!");
4940  m_evt_handler->set_key_ref(ref);
4941  addrem_flags(RKCL, QMRK);
4942  }
4943  else
4944  {
4945  _c4err("parse error");
4946  }
4947  }
4948  else if(has_any(RKCL))
4949  {
4950  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4951  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4952  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4953  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4954  const char first = rem.str[0];
4955  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4956  if(first == ':')
4957  {
4958  _c4dbgp("seqimap[RKCL]: found ':'");
4959  addrem_flags(RVAL, RKCL);
4960  _line_progressed(1);
4961  goto seqimap_again;
4962  }
4963  else if(first == ',' || first == ']')
4964  {
4965  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
4966  m_evt_handler->set_val_scalar_plain_empty();
4967  m_evt_handler->end_map();
4968  goto seqimap_finish;
4969  }
4970  else
4971  {
4972  _c4err("parse error");
4973  }
4974  }
4975 
4976  seqimap_again:
4977  _c4dbgt("seqimap: go again", 0);
4978  if(_finished_line())
4979  {
4980  if(C4_LIKELY(!_finished_file()))
4981  {
4982  _line_ended();
4983  _scan_line();
4984  _c4dbgnextline();
4985  }
4986  else
4987  {
4988  _c4err("parse error");
4989  }
4990  }
4991  goto seqimap_start;
4992 
4993  seqimap_finish:
4994  _c4dbgp("seqimap: finish");
4995 }
4996 
4997 
4998 //-----------------------------------------------------------------------------
4999 
5000 template<class EventHandler>
5001 void ParseEngine<EventHandler>::_handle_seq_flow()
5002 {
5003 seqflow_start:
5004  _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5005 
5006  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5007  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5008  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5009  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5010  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
5011  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
5012 
5013  _handle_flow_skip_whitespace();
5014  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
5015  if(!m_evt_handler->m_curr->line_contents.rem.len)
5016  goto seqflow_again;
5017 
5018  if(has_any(RVAL))
5019  {
5020  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5021  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5022  ScannedScalar sc;
5023  if(first == '\'')
5024  {
5025  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5026  sc = _scan_scalar_squot();
5027  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5028  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5029  addrem_flags(RNXT, RVAL);
5030  }
5031  else if(first == '"')
5032  {
5033  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5034  sc = _scan_scalar_dquot();
5035  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5036  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5037  addrem_flags(RNXT, RVAL);
5038  }
5039  // block scalars (ie | and >) cannot appear in flow containers
5040  else if(_scan_scalar_plain_seq_flow(&sc))
5041  {
5042  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5043  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5044  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5045  addrem_flags(RNXT, RVAL);
5046  }
5047  else if(first == '[')
5048  {
5049  _c4dbgp("seqflow[RVAL]: start child seqflow");
5050  addrem_flags(RNXT, RVAL);
5051  m_evt_handler->begin_seq_val_flow();
5052  _set_indentation(m_evt_handler->m_parent->indref);
5053  addrem_flags(RVAL, RNXT);
5054  _line_progressed(1);
5055  }
5056  else if(first == '{')
5057  {
5058  _c4dbgp("seqflow[RVAL]: start child mapflow");
5059  addrem_flags(RNXT, RVAL);
5060  m_evt_handler->begin_map_val_flow();
5061  _set_indentation(m_evt_handler->m_parent->indref);
5062  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5063  _line_progressed(1);
5064  goto seqflow_finish;
5065  }
5066  else if(first == ']') // this happens on a trailing comma like ", ]"
5067  {
5068  _c4dbgp("seqflow[RVAL]: end!");
5069  _line_progressed(1);
5070  m_evt_handler->end_seq();
5071  goto seqflow_finish;
5072  }
5073  else if(first == '*')
5074  {
5075  csubstr ref = _scan_ref_seq();
5076  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5077  m_evt_handler->set_val_ref(ref);
5078  addrem_flags(RNXT, RVAL);
5079  }
5080  else if(first == '&')
5081  {
5082  csubstr anchor = _scan_anchor();
5083  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5084  m_evt_handler->set_val_anchor(anchor);
5085  if(_maybe_scan_following_comma())
5086  {
5087  _c4dbgp("seqflow[RVAL]: empty scalar!");
5088  m_evt_handler->set_val_scalar_plain_empty();
5089  m_evt_handler->add_sibling();
5090  }
5091  }
5092  else if(first == '!')
5093  {
5094  csubstr tag = _scan_tag();
5095  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5096  _check_tag(tag);
5097  m_evt_handler->set_val_tag(tag);
5098  if(_maybe_scan_following_comma())
5099  {
5100  _c4dbgp("seqflow[RVAL]: empty scalar!");
5101  m_evt_handler->set_val_scalar_plain_empty();
5102  m_evt_handler->add_sibling();
5103  }
5104  }
5105  else if(first == ':')
5106  {
5107  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5108  addrem_flags(RNXT, RVAL);
5109  m_evt_handler->begin_map_val_flow();
5110  _set_indentation(m_evt_handler->m_parent->indref);
5111  m_evt_handler->set_key_scalar_plain_empty();
5112  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5113  _line_progressed(1);
5114  goto seqflow_finish;
5115  }
5116  else if(first == '?')
5117  {
5118  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5119  addrem_flags(RNXT, RVAL);
5120  m_was_inside_qmrk = true;
5121  m_evt_handler->begin_map_val_flow();
5122  _set_indentation(m_evt_handler->m_parent->indref);
5123  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5124  _line_progressed(1);
5125  _maybe_skip_whitespace_tokens();
5126  goto seqflow_finish;
5127  }
5128  else
5129  {
5130  _c4err("parse error");
5131  }
5132  }
5133  else // RNXT
5134  {
5135  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5136  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5137  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5138  if(first == ',')
5139  {
5140  _c4dbgp("seqflow[RNXT]: expect next val");
5141  addrem_flags(RVAL, RNXT);
5142  m_evt_handler->add_sibling();
5143  _line_progressed(1);
5144  }
5145  else if(first == ']')
5146  {
5147  _c4dbgp("seqflow[RNXT]: end!");
5148  m_evt_handler->end_seq();
5149  _line_progressed(1);
5150  goto seqflow_finish;
5151  }
5152  else if(first == ':')
5153  {
5154  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5155  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5156  _set_indentation(m_evt_handler->m_parent->indref);
5157  _line_progressed(1);
5158  addrem_flags(RSEQIMAP|RVAL, RNXT);
5159  goto seqflow_finish;
5160  }
5161  else
5162  {
5163  _c4err("parse error");
5164  }
5165  }
5166 
5167  seqflow_again:
5168  _c4dbgt("seqflow: go again", 0);
5169  if(_finished_line())
5170  {
5171  if(C4_LIKELY(!_finished_file()))
5172  {
5173  _line_ended();
5174  _scan_line();
5175  _c4dbgnextline();
5176  }
5177  else
5178  {
5179  _c4err("missing terminating ]");
5180  }
5181  }
5182  goto seqflow_start;
5183 
5184  seqflow_finish:
5185  _c4dbgp("seqflow: finish");
5186 }
5187 
5188 
5189 //-----------------------------------------------------------------------------
5190 
5191 template<class EventHandler>
5192 void ParseEngine<EventHandler>::_handle_map_flow()
5193 {
5194 mapflow_start:
5195  _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5196 
5197  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5198  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5199  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5200  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5201 
5202  _handle_flow_skip_whitespace();
5203  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5204  if(!rem.len)
5205  goto mapflow_again;
5206 
5207  if(has_any(RKEY))
5208  {
5209  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5210  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5211  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5212  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5213  const char first = rem.str[0];
5214  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5215  ScannedScalar sc;
5216  if(first == '\'')
5217  {
5218  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5219  sc = _scan_scalar_squot();
5220  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5221  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5222  addrem_flags(RKCL, RKEY|QMRK);
5223  }
5224  else if(first == '"')
5225  {
5226  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5227  sc = _scan_scalar_dquot();
5228  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5229  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5230  addrem_flags(RKCL, RKEY|QMRK);
5231  }
5232  // block scalars (ie | and >) cannot appear in flow containers
5233  else if(_scan_scalar_plain_map_flow(&sc))
5234  {
5235  _c4dbgp("mapflow[RKEY]: plain scalar");
5236  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5237  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5238  addrem_flags(RKCL, RKEY|QMRK);
5239  }
5240  else if(first == '?')
5241  {
5242  _c4dbgp("mapflow[RKEY]: explicit key");
5243  _line_progressed(1);
5244  addrem_flags(QMRK, RKEY);
5245  _maybe_skip_whitespace_tokens();
5246  }
5247  else if(first == ':')
5248  {
5249  _c4dbgp("mapflow[RKEY]: setting empty key");
5250  m_evt_handler->set_key_scalar_plain_empty();
5251  addrem_flags(RVAL, RKEY|QMRK);
5252  _line_progressed(1);
5253  _maybe_skip_whitespace_tokens();
5254  }
5255  else if(first == ',')
5256  {
5257  _c4dbgp("mapflow[RKEY]: empty key+val!");
5258  m_evt_handler->set_key_scalar_plain_empty();
5259  m_evt_handler->set_val_scalar_plain_empty();
5260  addrem_flags(RNXT, RKEY|QMRK);
5261  // keep going in this function
5262  }
5263  else if(first == '}') // this happens on a trailing comma like ", }"
5264  {
5265  _c4dbgp("mapflow[RKEY]: end!");
5266  m_evt_handler->end_map();
5267  _line_progressed(1);
5268  goto mapflow_finish;
5269  }
5270  else if(first == '&')
5271  {
5272  csubstr anchor = _scan_anchor();
5273  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5274  m_evt_handler->set_key_anchor(anchor);
5275  }
5276  else if(first == '*')
5277  {
5278  csubstr ref = _scan_ref_map();
5279  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5280  m_evt_handler->set_key_ref(ref);
5281  addrem_flags(RKCL, RKEY);
5282  }
5283  else if(first == '[')
5284  {
5285  // RYML's tree cannot store container keys, but that's
5286  // handled inside the tree sink. Other sink types may be
5287  // able to handle it.
5288  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5289  addrem_flags(RKCL, RKEY);
5290  m_evt_handler->begin_seq_key_flow();
5291  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5292  _set_indentation(m_evt_handler->m_parent->indref);
5293  _line_progressed(1);
5294  goto mapflow_finish;
5295  }
5296  else if(first == '{')
5297  {
5298  // RYML's tree cannot store container keys, but that's
5299  // handled inside the tree sink. Other sink types may be
5300  // able to handle it.
5301  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5302  addrem_flags(RKCL, RKEY);
5303  m_evt_handler->begin_map_key_flow();
5304  addrem_flags(RKEY, RVAL|RKCL);
5305  _set_indentation(m_evt_handler->m_parent->indref);
5306  _line_progressed(1);
5307  // keep going in this function
5308  }
5309  else if(first == '!')
5310  {
5311  csubstr tag = _scan_tag();
5312  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5313  _check_tag(tag);
5314  m_evt_handler->set_key_tag(tag);
5315  }
5316  else
5317  {
5318  _c4err("parse error");
5319  }
5320  }
5321  else if(has_any(RKCL)) // read the key colon
5322  {
5323  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5324  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5325  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5326  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5327  const char first = rem.str[0];
5328  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5329  if(first == ':')
5330  {
5331  _c4dbgp("mapflow[RKCL]: found the colon");
5332  addrem_flags(RVAL, RKCL);
5333  _line_progressed(1);
5334  }
5335  else if(first == '}')
5336  {
5337  _c4dbgp("mapflow[RKCL]: end with missing val!");
5338  addrem_flags(RVAL, RKCL);
5339  m_evt_handler->set_val_scalar_plain_empty();
5340  m_evt_handler->end_map();
5341  _line_progressed(1);
5342  goto mapflow_finish;
5343  }
5344  else if(first == ',')
5345  {
5346  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5347  m_evt_handler->set_val_scalar_plain_empty();
5348  m_evt_handler->add_sibling();
5349  addrem_flags(RKEY, RKCL);
5350  _line_progressed(1);
5351  }
5352  else
5353  {
5354  _c4err("parse error");
5355  }
5356  }
5357  else if(has_any(RVAL))
5358  {
5359  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5360  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5361  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5362  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5363  const char first = rem.str[0];
5364  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5365  ScannedScalar sc;
5366  if(first == '\'')
5367  {
5368  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5369  sc = _scan_scalar_squot();
5370  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5371  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5372  addrem_flags(RNXT, RVAL);
5373  }
5374  else if(first == '"')
5375  {
5376  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5377  sc = _scan_scalar_dquot();
5378  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5379  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5380  addrem_flags(RNXT, RVAL);
5381  }
5382  // block scalars (ie | and >) cannot appear in flow containers
5383  else if(_scan_scalar_plain_map_flow(&sc))
5384  {
5385  _c4dbgp("mapflow[RVAL]: plain scalar.");
5386  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5387  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5388  addrem_flags(RNXT, RVAL);
5389  }
5390  else if(first == '[')
5391  {
5392  _c4dbgp("mapflow[RVAL]: start val seqflow");
5393  addrem_flags(RNXT, RVAL);
5394  m_evt_handler->begin_seq_val_flow();
5395  _set_indentation(m_evt_handler->m_parent->indref);
5396  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5397  _line_progressed(1);
5398  goto mapflow_finish;
5399  }
5400  else if(first == '{')
5401  {
5402  _c4dbgp("mapflow[RVAL]: start val mapflow");
5403  addrem_flags(RNXT, RVAL);
5404  m_evt_handler->begin_map_val_flow();
5405  _set_indentation(m_evt_handler->m_parent->indref);
5406  addrem_flags(RKEY, RNXT);
5407  _line_progressed(1);
5408  // keep going in this function
5409  }
5410  else if(first == '}')
5411  {
5412  _c4dbgp("mapflow[RVAL]: end!");
5413  m_evt_handler->set_val_scalar_plain_empty();
5414  m_evt_handler->end_map();
5415  _line_progressed(1);
5416  goto mapflow_finish;
5417  }
5418  else if(first == ',')
5419  {
5420  _c4dbgp("mapflow[RVAL]: empty val!");
5421  m_evt_handler->set_val_scalar_plain_empty();
5422  addrem_flags(RNXT, RVAL);
5423  // keep going in this function
5424  }
5425  else if(first == '*')
5426  {
5427  csubstr ref = _scan_ref_map();
5428  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5429  m_evt_handler->set_val_ref(ref);
5430  addrem_flags(RNXT, RVAL);
5431  }
5432  else if(first == '&')
5433  {
5434  csubstr anchor = _scan_anchor();
5435  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5436  m_evt_handler->set_val_anchor(anchor);
5437  }
5438  else if(first == '!')
5439  {
5440  csubstr tag = _scan_tag();
5441  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5442  _check_tag(tag);
5443  m_evt_handler->set_val_tag(tag);
5444  }
5445  else
5446  {
5447  _c4err("parse error");
5448  }
5449  }
5450  else if(has_any(RNXT))
5451  {
5452  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5453  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5454  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5455  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5456  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5457  if(rem.begins_with(','))
5458  {
5459  _c4dbgp("mapflow[RNXT]: expect next keyval");
5460  m_evt_handler->add_sibling();
5461  addrem_flags(RKEY, RNXT);
5462  _line_progressed(1);
5463  }
5464  else if(rem.begins_with('}'))
5465  {
5466  _c4dbgp("mapflow[RNXT]: end!");
5467  m_evt_handler->end_map();
5468  _line_progressed(1);
5469  goto mapflow_finish;
5470  }
5471  else
5472  {
5473  _c4err("parse error");
5474  }
5475  }
5476  else if(has_any(QMRK))
5477  {
5478  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5479  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5480  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5481  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5482  const char first = rem.str[0];
5483  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5484  ScannedScalar sc;
5485  if(first == '\'')
5486  {
5487  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5488  sc = _scan_scalar_squot();
5489  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5490  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5491  addrem_flags(RKCL, QMRK);
5492  }
5493  else if(first == '"')
5494  {
5495  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5496  sc = _scan_scalar_dquot();
5497  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5498  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5499  addrem_flags(RKCL, QMRK);
5500  }
5501  // block scalars (ie | and >) cannot appear in flow containers
5502  else if(_scan_scalar_plain_map_flow(&sc))
5503  {
5504  _c4dbgp("mapflow[QMRK]: plain scalar");
5505  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5506  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5507  addrem_flags(RKCL, QMRK);
5508  }
5509  else if(first == ':')
5510  {
5511  _c4dbgp("mapflow[QMRK]: setting empty key");
5512  m_evt_handler->set_key_scalar_plain_empty();
5513  addrem_flags(RVAL, QMRK);
5514  _line_progressed(1);
5515  _maybe_skip_whitespace_tokens();
5516  }
5517  else if(first == '}') // this happens on a trailing comma like ", }"
5518  {
5519  _c4dbgp("mapflow[QMRK]: end!");
5520  m_evt_handler->set_key_scalar_plain_empty();
5521  m_evt_handler->set_val_scalar_plain_empty();
5522  m_evt_handler->end_map();
5523  _line_progressed(1);
5524  goto mapflow_finish;
5525  }
5526  else if(first == ',')
5527  {
5528  _c4dbgp("mapflow[QMRK]: empty key+val!");
5529  m_evt_handler->set_key_scalar_plain_empty();
5530  m_evt_handler->set_val_scalar_plain_empty();
5531  addrem_flags(RNXT, QMRK);
5532  }
5533  else if(first == '&')
5534  {
5535  csubstr anchor = _scan_anchor();
5536  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5537  m_evt_handler->set_key_anchor(anchor);
5538  }
5539  else if(first == '*')
5540  {
5541  csubstr ref = _scan_ref_map();
5542  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5543  m_evt_handler->set_key_ref(ref);
5544  addrem_flags(RKCL, QMRK);
5545  }
5546  else if(first == '[')
5547  {
5548  // RYML's tree cannot store container keys, but that's
5549  // handled inside the tree sink. Other sink types may be
5550  // able to handle it.
5551  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5552  addrem_flags(RKCL, QMRK);
5553  m_evt_handler->begin_seq_key_flow();
5554  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5555  _set_indentation(m_evt_handler->m_parent->indref);
5556  _line_progressed(1);
5557  goto mapflow_finish;
5558  }
5559  else if(first == '{')
5560  {
5561  // RYML's tree cannot store container keys, but that's
5562  // handled inside the tree sink. Other sink types may be
5563  // able to handle it.
5564  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5565  addrem_flags(RKCL, QMRK);
5566  m_evt_handler->begin_map_key_flow();
5567  _set_indentation(m_evt_handler->m_parent->indref);
5568  addrem_flags(RKEY, RKCL);
5569  _line_progressed(1);
5570  // keep going in this function
5571  }
5572  else if(first == '!')
5573  {
5574  csubstr tag = _scan_tag();
5575  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5576  _check_tag(tag);
5577  m_evt_handler->set_key_tag(tag);
5578  }
5579  else
5580  {
5581  _c4err("parse error");
5582  }
5583  }
5584 
5585  mapflow_again:
5586  _c4dbgt("mapflow: go again", 0);
5587  if(_finished_line())
5588  {
5589  if(C4_LIKELY(!_finished_file()))
5590  {
5591  _line_ended();
5592  _scan_line();
5593  _c4dbgnextline();
5594  }
5595  else
5596  {
5597  _c4err("missing terminating }");
5598  }
5599  }
5600  goto mapflow_start;
5601 
5602  mapflow_finish:
5603  _c4dbgp("mapflow: finish");
5604 }
5605 
5606 
5607 //-----------------------------------------------------------------------------
5608 
5609 template<class EventHandler>
5610 void ParseEngine<EventHandler>::_handle_seq_block()
5611 {
5612 seqblck_start:
5613  _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5614 
5615  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5616  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5617  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5618  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5619 
5620  _maybe_skip_comment();
5621  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5622  if(!rem.len)
5623  goto seqblck_again;
5624 
5625  if(has_any(RVAL))
5626  {
5627  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5628  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5629  if(m_evt_handler->m_curr->at_line_beginning())
5630  {
5631  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5632  if(m_evt_handler->m_curr->indentation_ge())
5633  {
5634  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5635  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5636  rem = m_evt_handler->m_curr->line_contents.rem;
5637  if(!rem.len)
5638  goto seqblck_again;
5639  }
5640  else if(m_evt_handler->m_curr->indentation_lt())
5641  {
5642  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5643  _handle_indentation_pop_from_block_seq();
5644  goto seqblck_finish;
5645  }
5646  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5647  {
5648  _c4dbgp("seqblck[RVAL]: empty line!");
5649  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5650  goto seqblck_again;
5651  }
5652  }
5653  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5654  else
5655  {
5656  // accomodate annotation on the previous line. eg:
5657  // - &elm
5658  // foo # <-- on this line
5659  // - &elm
5660  // &foo foo: bar # <-- on this line
5661  if(rem.str[0] == ' ')
5662  {
5663  if(_handle_indentation_from_annotations())
5664  {
5665  _c4dbgp("seqblck[RVAL]: annotations!");
5666  rem = m_evt_handler->m_curr->line_contents.rem;
5667  if(!rem.len)
5668  goto seqblck_again;
5669  }
5670  }
5671  }
5672  #endif
5673  _RYML_CB_ASSERT(callbacks(), rem.len);
5674  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5675  const char first = rem.str[0];
5676  const size_t startline = m_evt_handler->m_curr->pos.line;
5677  // warning: the gcc optimizer on x86 builds is brittle with
5678  // this function:
5679  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5680  ScannedScalar sc;
5681  if(first == '\'')
5682  {
5683  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5684  sc = _scan_scalar_squot();
5685  if(!_maybe_scan_following_colon())
5686  {
5687  _c4dbgp("seqblck[RVAL]: set as val");
5688  _handle_annotations_before_blck_val_scalar();
5689  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5690  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5691  addrem_flags(RNXT, RVAL);
5692  }
5693  else
5694  {
5695  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5696  addrem_flags(RNXT, RVAL);
5697  _handle_annotations_before_start_mapblck(startline);
5698  m_evt_handler->begin_map_val_block();
5699  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5700  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5701  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5702  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5703  _maybe_skip_whitespace_tokens();
5704  goto seqblck_finish;
5705  }
5706  }
5707  else if(first == '"')
5708  {
5709  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5710  sc = _scan_scalar_dquot();
5711  if(!_maybe_scan_following_colon())
5712  {
5713  _c4dbgp("seqblck[RVAL]: set as val");
5714  _handle_annotations_before_blck_val_scalar();
5715  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5716  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5717  addrem_flags(RNXT, RVAL);
5718  }
5719  else
5720  {
5721  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5722  addrem_flags(RNXT, RVAL);
5723  _handle_annotations_before_start_mapblck(startline);
5724  m_evt_handler->begin_map_val_block();
5725  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5726  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5727  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5728  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5729  _maybe_skip_whitespace_tokens();
5730  goto seqblck_finish;
5731  }
5732  }
5733  // block scalars can only appear as keys when in QMRK scope
5734  // (ie, after ? tokens), so no need to scan following colon in
5735  // here.
5736  else if(first == '|')
5737  {
5738  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5739  ScannedBlock sb;
5740  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5741  _handle_annotations_before_blck_val_scalar();
5742  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5743  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5744  addrem_flags(RNXT, RVAL);
5745  }
5746  else if(first == '>')
5747  {
5748  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5749  ScannedBlock sb;
5750  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5751  _handle_annotations_before_blck_val_scalar();
5752  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5753  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5754  addrem_flags(RNXT, RVAL);
5755  }
5756  else if(_scan_scalar_plain_seq_blck(&sc))
5757  {
5758  _c4dbgp("seqblck[RVAL]: plain scalar.");
5759  if(!_maybe_scan_following_colon())
5760  {
5761  _c4dbgp("seqblck[RVAL]: set as val");
5762  _handle_annotations_before_blck_val_scalar();
5763  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
5764  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5765  addrem_flags(RNXT, RVAL);
5766  }
5767  else
5768  {
5769  if(startindent > m_evt_handler->m_curr->indref)
5770  {
5771  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5772  addrem_flags(RNXT, RVAL);
5773  _handle_annotations_before_start_mapblck(startline);
5774  m_evt_handler->begin_map_val_block();
5775  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5776  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5777  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5778  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5779  _maybe_skip_whitespace_tokens();
5780  goto seqblck_finish;
5781  }
5782  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
5783  {
5784  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5785  m_evt_handler->set_val_scalar_plain_empty();
5786  m_evt_handler->end_seq();
5787  m_evt_handler->add_sibling();
5788  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5789  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5790  addrem_flags(RVAL, RNXT|RKEY);
5791  _maybe_skip_whitespace_tokens();
5792  goto seqblck_finish;
5793  }
5794  else
5795  {
5796  _c4err("parse error");
5797  }
5798  }
5799  }
5800  else if(first == '[')
5801  {
5802  _c4dbgp("seqblck[RVAL]: start child seqflow");
5803  addrem_flags(RNXT, RVAL);
5804  m_evt_handler->begin_seq_val_flow();
5805  addrem_flags(FLOW|RVAL, BLCK|RNXT);
5806  _line_progressed(1);
5807  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5808  goto seqblck_finish;
5809  }
5810  else if(first == '{')
5811  {
5812  _c4dbgp("seqblck[RVAL]: start child mapflow");
5813  addrem_flags(RNXT, RVAL);
5814  _handle_annotations_before_blck_val_scalar();
5815  m_evt_handler->begin_map_val_flow();
5816  addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
5817  _line_progressed(1);
5818  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5819  goto seqblck_finish;
5820  }
5821  else if(first == '-')
5822  {
5823  if(startindent == m_evt_handler->m_curr->indref)
5824  {
5825  _c4dbgp("seqblck[RVAL]: prev val was empty");
5826  _handle_annotations_before_blck_val_scalar();
5827  m_evt_handler->set_val_scalar_plain_empty();
5828  // keep in RVAL, but for the next sibling
5829  m_evt_handler->add_sibling();
5830  }
5831  else
5832  {
5833  _c4dbgp("seqblck[RVAL]: start child seqblck");
5834  _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5835  addrem_flags(RNXT, RVAL);
5836  _handle_annotations_before_blck_val_scalar();
5837  m_evt_handler->begin_seq_val_block();
5838  addrem_flags(RVAL, RNXT);
5839  _save_indentation();
5840  // keep going on inside this function
5841  }
5842  _line_progressed(1);
5843  _maybe_skip_whitespace_tokens();
5844  }
5845  else if(first == ':')
5846  {
5847  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5848  addrem_flags(RNXT, RVAL);
5849  _handle_annotations_before_start_mapblck(startline);
5850  m_evt_handler->begin_map_val_block();
5851  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5852  m_evt_handler->set_key_scalar_plain_empty();
5853  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5854  _line_progressed(1);
5855  _maybe_skip_whitespace_tokens();
5856  goto seqblck_finish;
5857  }
5858  else if(first == '&')
5859  {
5860  const csubstr anchor = _scan_anchor();
5861  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5862  // we need to buffer the anchors, as there may be two
5863  // consecutive anchors in here
5864  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5865  }
5866  else if(first == '*')
5867  {
5868  csubstr ref = _scan_ref_seq();
5869  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5870  if(!_maybe_scan_following_colon())
5871  {
5872  _c4dbgp("seqblck[RVAL]: set ref as val!");
5873  _handle_annotations_before_blck_val_scalar();
5874  m_evt_handler->set_val_ref(ref);
5875  addrem_flags(RNXT, RVAL);
5876  }
5877  else
5878  {
5879  _c4dbgp("seqblck[RVAL]: ref is key of map");
5880  addrem_flags(RNXT, RVAL);
5881  _handle_annotations_before_start_mapblck(startline);
5882  m_evt_handler->begin_map_val_block();
5883  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5884  m_evt_handler->set_key_ref(ref);
5885  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5886  _set_indentation(startindent);
5887  _maybe_skip_whitespace_tokens();
5888  goto seqblck_finish;
5889  }
5890  }
5891  else if(first == '!')
5892  {
5893  csubstr tag = _scan_tag();
5894  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5895  // we need to buffer the tags, as there may be two
5896  // consecutive tags in here
5897  _add_annotation(&m_pending_tags, tag, startindent, startline);
5898  }
5899  else if(first == '?')
5900  {
5901  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5902  addrem_flags(RNXT, RVAL);
5903  m_was_inside_qmrk = true;
5904  m_evt_handler->begin_map_val_block();
5905  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5906  _save_indentation();
5907  _line_progressed(1);
5908  _maybe_skip_whitespace_tokens();
5909  goto seqblck_finish;
5910  }
5911  else
5912  {
5913  _c4err("parse error");
5914  }
5915  }
5916  else // RNXT
5917  {
5918  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5919  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5920  //
5921  // handle indentation
5922  //
5923  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5924  if(C4_LIKELY(_at_line_begin()))
5925  {
5926  _c4dbgp("seqblck[RNXT]: at line begin");
5927  if(m_evt_handler->m_curr->indentation_ge())
5928  {
5929  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5930  _line_progressed(m_evt_handler->m_curr->indref);
5931  _maybe_skip_whitespace_tokens();
5932  rem = m_evt_handler->m_curr->line_contents.rem;
5933  if(!rem.len)
5934  goto seqblck_again;
5935  }
5936  else if(m_evt_handler->m_curr->indentation_lt())
5937  {
5938  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5939  _handle_indentation_pop_from_block_seq();
5940  if(has_all(RSEQ|BLCK))
5941  {
5942  _c4dbgp("seqblck[RNXT]: still seqblck!");
5943  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5944  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5945  rem = m_evt_handler->m_curr->line_contents.rem;
5946  if(!rem.len)
5947  goto seqblck_again;
5948  }
5949  else
5950  {
5951  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5952  goto seqblck_finish;
5953  }
5954  }
5955  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5956  {
5957  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5958  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5959  rem = m_evt_handler->m_curr->line_contents.rem;
5960  if(!rem.len)
5961  goto seqblck_again;
5962  }
5963  }
5964  else
5965  {
5966  _c4dbgp("seqblck[RNXT]: NOT at line begin");
5967  if(!rem.begins_with_any(" \t"))
5968  {
5969  _c4err("parse error");
5970  }
5971  else
5972  {
5973  _skipchars(" \t");
5974  rem = m_evt_handler->m_curr->line_contents.rem;
5975  if(!rem.len)
5976  {
5977  _c4dbgp("seqblck[RNXT]: again");
5978  goto seqblck_again;
5979  }
5980  }
5981  }
5982  //
5983  // now handle the tokens
5984  //
5985  const char first = rem.str[0];
5986  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5987  if(first == '-')
5988  {
5989  if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5990  {
5991  _c4dbgp("seqblck[RNXT]: expect next val");
5992  addrem_flags(RVAL, RNXT);
5993  m_evt_handler->add_sibling();
5994  _line_progressed(1);
5995  _maybe_skip_whitespace_tokens();
5996  }
5997  else
5998  {
5999  _c4dbgp("seqblck[RNXT]: start doc");
6000  _start_doc_suddenly();
6001  _line_progressed(3);
6002  _maybe_skip_whitespace_tokens();
6003  goto seqblck_finish;
6004  }
6005  }
6006  else if(first == ':')
6007  {
6008  // This happens for example in `- [a: b]: c` (after
6009  // terminating the seq, ie, after `]`). All other cases
6010  // (ie colon after scalars) are caught elsewhere (ie, in
6011  // RVAL state).
6012  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6013  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
6014  {
6015  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6016  m_evt_handler->end_seq();
6017  goto seqblck_finish;
6018  }
6019  else
6020  {
6021  _c4err("parse error");
6022  }
6023  }
6024  else if(first == '.')
6025  {
6026  _c4dbgp("seqblck[RNXT]: maybe doc?");
6027  csubstr rs = rem.sub(1);
6028  if(rs == ".." || rs.begins_with(".. "))
6029  {
6030  _c4dbgp("seqblck[RNXT]: end+start doc");
6031  _end_doc_suddenly();
6032  _line_progressed(3);
6033  _maybe_skip_whitespace_tokens();
6034  goto seqblck_finish;
6035  }
6036  else
6037  {
6038  _c4err("parse error");
6039  }
6040  }
6041  else
6042  {
6043  // may be an indentless sequence nested in a map...
6044  //if(m_evt_handler->m_stack.size() >= 2)
6045  #ifdef RYML_DBG
6046  char flagbuf_[128];
6047  for(auto const& s : m_evt_handler->m_stack)
6048  {
6049  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6050  }
6051  #endif
6052  if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6053  {
6054  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6055  _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6056  _handle_indentation_pop(m_evt_handler->m_parent);
6057  _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
6058  m_evt_handler->add_sibling();
6059  addrem_flags(RKEY, RNXT);
6060  goto seqblck_finish;
6061  }
6062  else //if(first != '*')
6063  {
6064  _c4err("parse error");
6065  }
6066  }
6067  }
6068 
6069  seqblck_again:
6070  _c4dbgt("seqblck: go again", 0);
6071  if(_finished_line())
6072  {
6073  _line_ended();
6074  _scan_line();
6075  if(_finished_file())
6076  {
6077  _c4dbgp("seqblck: finish!");
6078  _end_seq_blck();
6079  goto seqblck_finish;
6080  }
6081  _c4dbgnextline();
6082  }
6083  goto seqblck_start;
6084 
6085  seqblck_finish:
6086  _c4dbgp("seqblck: finish");
6087 }
6088 
6089 
6090 //-----------------------------------------------------------------------------
6091 
6092 template<class EventHandler>
6093 void ParseEngine<EventHandler>::_handle_map_block()
6094 {
6095 mapblck_start:
6096  _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6097 
6098  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
6099  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
6100  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
6101  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
6102  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
6103 
6104  _maybe_skip_comment();
6105  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6106  if(!rem.len)
6107  goto mapblck_again;
6108 
6109  if(has_any(RKEY))
6110  {
6111  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6112  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6113  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6114  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6115  //
6116  // handle indentation
6117  //
6118  if(m_evt_handler->m_curr->at_line_beginning())
6119  {
6120  if(m_evt_handler->m_curr->indentation_eq())
6121  {
6122  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6123  _line_progressed(m_evt_handler->m_curr->indref);
6124  rem = m_evt_handler->m_curr->line_contents.rem;
6125  if(!rem.len)
6126  goto mapblck_again;
6127  }
6128  else if(m_evt_handler->m_curr->indentation_lt())
6129  {
6130  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6131  _handle_indentation_pop_from_block_map();
6132  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6133  if(has_all(RMAP|BLCK))
6134  {
6135  _c4dbgp("mapblck[RKEY]: still mapblck!");
6136  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
6137  rem = m_evt_handler->m_curr->line_contents.rem;
6138  if(!rem.len)
6139  goto mapblck_again;
6140  }
6141  else
6142  {
6143  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6144  goto mapblck_finish;
6145  }
6146  }
6147  else
6148  {
6149  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6150  _c4err("invalid indentation");
6151  }
6152  }
6153  //
6154  // now handle the tokens
6155  //
6156  const char first = rem.str[0];
6157  const size_t startline = m_evt_handler->m_curr->pos.line;
6158  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6159  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6160  ScannedScalar sc;
6161  if(first == '\'')
6162  {
6163  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6164  sc = _scan_scalar_squot();
6165  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6166  _handle_annotations_before_blck_key_scalar();
6167  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6168  addrem_flags(RVAL, RKEY);
6169  if(!_maybe_scan_following_colon())
6170  _c4err("could not find ':' colon after key");
6171  _maybe_skip_whitespace_tokens();
6172  }
6173  else if(first == '"')
6174  {
6175  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6176  sc = _scan_scalar_dquot();
6177  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6178  _handle_annotations_before_blck_key_scalar();
6179  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6180  addrem_flags(RVAL, RKEY);
6181  if(!_maybe_scan_following_colon())
6182  _c4err("could not find ':' colon after key");
6183  _maybe_skip_whitespace_tokens();
6184  }
6185  // block scalars (| and >) can not be used as keys unless they
6186  // appear in an explicit QMRK scope (ie, after the ? token),
6187  else if(C4_UNLIKELY(first == '|'))
6188  {
6189  _c4err("block literal keys must be enclosed in '?'");
6190  }
6191  else if(C4_UNLIKELY(first == '>'))
6192  {
6193  _c4err("block literal keys must be enclosed in '?'");
6194  }
6195  else if(_scan_scalar_plain_map_blck(&sc))
6196  {
6197  _c4dbgp("mapblck[RKEY]: plain scalar");
6198  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6199  _handle_annotations_before_blck_key_scalar();
6200  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6201  addrem_flags(RVAL, RKEY);
6202  if(!_maybe_scan_following_colon())
6203  _c4err("could not find ':' colon after key");
6204  _maybe_skip_whitespace_tokens();
6205  }
6206  else if(first == '?')
6207  {
6208  _c4dbgp("mapblck[RKEY]: key token!");
6209  addrem_flags(QMRK, RKEY);
6210  _line_progressed(1);
6211  _maybe_skip_whitespace_tokens();
6212  m_was_inside_qmrk = true;
6213  goto mapblck_again;
6214  }
6215  else if(first == ':')
6216  {
6217  _c4dbgp("mapblck[RKEY]: setting empty key");
6218  _handle_annotations_before_blck_key_scalar();
6219  m_evt_handler->set_key_scalar_plain_empty();
6220  addrem_flags(RVAL, RKEY);
6221  _line_progressed(1);
6222  _maybe_skip_whitespace_tokens();
6223  }
6224  else if(first == '*')
6225  {
6226  csubstr ref = _scan_ref_map();
6227  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6228  _handle_annotations_before_blck_key_scalar();
6229  m_evt_handler->set_key_ref(ref);
6230  addrem_flags(RVAL, RKEY);
6231  if(!_maybe_scan_following_colon())
6232  _c4err("could not find ':' colon after key");
6233  _maybe_skip_whitespace_tokens();
6234  }
6235  else if(first == '&')
6236  {
6237  csubstr anchor = _scan_anchor();
6238  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6239  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6240  }
6241  else if(first == '!')
6242  {
6243  csubstr tag = _scan_tag();
6244  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6245  _add_annotation(&m_pending_tags, tag, startindent, startline);
6246  }
6247  else if(first == '[')
6248  {
6249  // RYML's tree cannot store container keys, but that's
6250  // handled inside the tree handler. Other handlers may be
6251  // able to handle it.
6252  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6253  addrem_flags(RKCL, RKEY);
6254  _handle_annotations_before_blck_key_scalar();
6255  m_evt_handler->begin_seq_key_flow();
6256  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
6257  _line_progressed(1);
6258  _set_indentation(startindent);
6259  goto mapblck_finish;
6260  }
6261  else if(first == '{')
6262  {
6263  // RYML's tree cannot store container keys, but that's
6264  // handled inside the tree handler. Other handlers may be
6265  // able to handle it.
6266  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6267  addrem_flags(RKCL, RKEY);
6268  _handle_annotations_before_blck_key_scalar();
6269  m_evt_handler->begin_map_key_flow();
6270  addrem_flags(FLOW|RKEY, BLCK|RKCL);
6271  _line_progressed(1);
6272  _set_indentation(startindent);
6273  goto mapblck_finish;
6274  }
6275  else if(first == '-')
6276  {
6277  _c4dbgp("mapblck[RKEY]: maybe doc?");
6278  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6279  {
6280  _c4dbgp("mapblck[RKEY]: end+start doc");
6281  _start_doc_suddenly();
6282  _line_progressed(3);
6283  _maybe_skip_whitespace_tokens();
6284  goto mapblck_finish;
6285  }
6286  else
6287  {
6288  _c4err("parse error");
6289  }
6290  }
6291  else if(first == '.')
6292  {
6293  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6294  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6295  {
6296  _c4dbgp("mapblck[RKEY]: end doc");
6297  _end_doc_suddenly();
6298  _line_progressed(3);
6299  _maybe_skip_whitespace_tokens();
6300  goto mapblck_finish;
6301  }
6302  else
6303  {
6304  _c4err("parse error");
6305  }
6306  }
6308  else if(first == '\t')
6309  {
6310  _c4dbgp("mapblck[RKEY]: skip tabs");
6311  _maybe_skipchars('\t');
6312  })
6313  else
6314  {
6315  _c4err("parse error");
6316  }
6317  }
6318  else if(has_any(RKCL)) // read the key colon
6319  {
6320  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6321  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6322  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6323  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6324  //
6325  // handle indentation
6326  //
6327  if(m_evt_handler->m_curr->at_line_beginning())
6328  {
6329  if(m_evt_handler->m_curr->indentation_eq())
6330  {
6331  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6332  _line_progressed(m_evt_handler->m_curr->indref);
6333  rem = m_evt_handler->m_curr->line_contents.rem;
6334  if(!rem.len)
6335  goto mapblck_again;
6336  }
6337  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6338  {
6339  _c4err("invalid indentation");
6340  }
6341  }
6342  const char first = rem.str[0];
6343  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6344  if(first == ':')
6345  {
6346  _c4dbgp("mapblck[RKCL]: found the colon");
6347  addrem_flags(RVAL, RKCL);
6348  _line_progressed(1);
6349  _maybe_skip_whitespace_tokens();
6350  }
6351  else if(first == '?')
6352  {
6353  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6354  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6355  m_evt_handler->set_val_scalar_plain_empty();
6356  m_evt_handler->add_sibling();
6357  addrem_flags(QMRK, RKCL);
6358  _line_progressed(1);
6359  _maybe_skip_whitespace_tokens();
6360  }
6361  else if(first == '-')
6362  {
6363  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6364  {
6365  _c4dbgp("mapblck[RKCL]: end+start doc");
6366  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6367  _start_doc_suddenly();
6368  _line_progressed(3);
6369  _maybe_skip_whitespace_tokens();
6370  goto mapblck_finish;
6371  }
6372  else
6373  {
6374  _c4err("parse error");
6375  }
6376  }
6377  else if(first == '.')
6378  {
6379  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6380  csubstr rs = rem.sub(1);
6381  if(rs == ".." || rs.begins_with(".. "))
6382  {
6383  _c4dbgp("mapblck[RKCL]: end+start doc");
6384  _end_doc_suddenly();
6385  _line_progressed(3);
6386  goto mapblck_finish;
6387  }
6388  else
6389  {
6390  _c4err("parse error");
6391  }
6392  }
6393  else if(m_was_inside_qmrk)
6394  {
6395  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6396  _c4dbgp("mapblck[RKCL]: missing :");
6397  m_evt_handler->set_val_scalar_plain_empty();
6398  m_evt_handler->add_sibling();
6399  m_was_inside_qmrk = false;
6400  addrem_flags(RKEY, RKCL);
6401  }
6402  else
6403  {
6404  _c4err("parse error");
6405  }
6406  }
6407  else if(has_any(RVAL))
6408  {
6409  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6410  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6411  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6412  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6413  //
6414  // handle indentation
6415  //
6416  if(m_evt_handler->m_curr->at_line_beginning())
6417  {
6418  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6419  m_evt_handler->m_curr->more_indented = false;
6420  if(m_evt_handler->m_curr->indref == npos)
6421  {
6422  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6423  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6424  _line_progressed(m_evt_handler->m_curr->indref);
6425  rem = m_evt_handler->m_curr->line_contents.rem;
6426  if(!rem.len)
6427  goto mapblck_again;
6428  }
6429  else if(m_evt_handler->m_curr->indentation_eq())
6430  {
6431  _c4dbgp("mapblck[RVAL]: skip indentation!");
6432  _line_progressed(m_evt_handler->m_curr->indref);
6433  rem = m_evt_handler->m_curr->line_contents.rem;
6434  if(!rem.len)
6435  goto mapblck_again;
6436  // TODO: this is valid:
6437  //
6438  // ```yaml
6439  // a:
6440  // b:
6441  // ---
6442  // a:
6443  // b
6444  // ---
6445  // a:
6446  // b: c
6447  // ```
6448  //
6449  // ... but this is not:
6450  //
6451  // ```yaml
6452  // a:
6453  // v
6454  // ---
6455  // a: b: c
6456  // ```
6457  //
6458  // here, we probably need to set a boolean on the state
6459  // to disambiguate between these cases.
6460  }
6461  else if(m_evt_handler->m_curr->indentation_gt())
6462  {
6463  _c4dbgp("mapblck[RVAL]: more indented!");
6464  m_evt_handler->m_curr->more_indented = true;
6465  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6466  rem = m_evt_handler->m_curr->line_contents.rem;
6467  if(!rem.len)
6468  goto mapblck_again;
6469  }
6470  else if(m_evt_handler->m_curr->indentation_lt())
6471  {
6472  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6473  _handle_indentation_pop_from_block_map();
6474  if(has_all(RMAP|BLCK))
6475  {
6476  _c4dbgp("mapblck[RVAL]: still mapblck!");
6477  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6478  if(has_any(RNXT))
6479  {
6480  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6481  m_evt_handler->add_sibling();
6482  addrem_flags(RKEY, RNXT);
6483  }
6484  goto mapblck_again;
6485  }
6486  else
6487  {
6488  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6489  goto mapblck_finish;
6490  }
6491  }
6492  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6493  {
6494  _c4dbgp("mapblck[RVAL]: empty line!");
6495  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6496  goto mapblck_again;
6497  }
6498  }
6499  //
6500  // now handle the tokens
6501  //
6502  const char first = rem.str[0];
6503  const size_t startline = m_evt_handler->m_curr->pos.line;
6504  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6505  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6506  ScannedScalar sc;
6507  if(first == '\'')
6508  {
6509  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6510  sc = _scan_scalar_squot();
6511  if(!_maybe_scan_following_colon())
6512  {
6513  _c4dbgp("mapblck[RVAL]: set as val");
6514  _handle_annotations_before_blck_val_scalar();
6515  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6516  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6517  addrem_flags(RNXT, RVAL);
6518  }
6519  else
6520  {
6521  if(startindent != m_evt_handler->m_curr->indref)
6522  {
6523  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6524  _handle_annotations_before_start_mapblck(startline);
6525  addrem_flags(RNXT, RVAL);
6526  m_evt_handler->begin_map_val_block();
6527  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6528  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6529  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6530  _maybe_skip_whitespace_tokens();
6531  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6532  // keep the child state on RVAL
6533  addrem_flags(RVAL, RNXT);
6534  }
6535  else
6536  {
6537  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6538  m_evt_handler->set_val_scalar_plain_empty();
6539  m_evt_handler->add_sibling();
6540  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6541  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6542  // keep going on RVAL
6543  _maybe_skip_whitespace_tokens();
6544  }
6545  }
6546  }
6547  else if(first == '"')
6548  {
6549  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6550  sc = _scan_scalar_dquot();
6551  if(!_maybe_scan_following_colon())
6552  {
6553  _c4dbgp("mapblck[RVAL]: set as val");
6554  _handle_annotations_before_blck_val_scalar();
6555  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6556  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6557  addrem_flags(RNXT, RVAL);
6558  }
6559  else
6560  {
6561  if(startindent != m_evt_handler->m_curr->indref)
6562  {
6563  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6564  _handle_annotations_before_start_mapblck(startline);
6565  addrem_flags(RNXT, RVAL);
6566  m_evt_handler->begin_map_val_block();
6567  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6568  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6569  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6570  _maybe_skip_whitespace_tokens();
6571  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6572  // keep the child state on RVAL
6573  addrem_flags(RVAL, RNXT);
6574  }
6575  else
6576  {
6577  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6578  m_evt_handler->set_val_scalar_plain_empty();
6579  m_evt_handler->add_sibling();
6580  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6581  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6582  // keep going on RVAL
6583  _maybe_skip_whitespace_tokens();
6584  }
6585  }
6586  }
6587  // block scalars can only appear as keys when in QMRK scope
6588  // (ie, after ? tokens), so no need to scan following colon
6589  else if(first == '|')
6590  {
6591  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6592  ScannedBlock sb;
6593  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6594  _handle_annotations_before_blck_val_scalar();
6595  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6596  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6597  addrem_flags(RNXT, RVAL);
6598  }
6599  else if(first == '>')
6600  {
6601  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6602  ScannedBlock sb;
6603  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6604  _handle_annotations_before_blck_val_scalar();
6605  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6606  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6607  addrem_flags(RNXT, RVAL);
6608  }
6609  else if(_scan_scalar_plain_map_blck(&sc))
6610  {
6611  _c4dbgp("mapblck[RVAL]: plain scalar.");
6612  if(!_maybe_scan_following_colon())
6613  {
6614  _c4dbgp("mapblck[RVAL]: set as val");
6615  _handle_annotations_before_blck_val_scalar();
6616  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6617  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6618  addrem_flags(RNXT, RVAL);
6619  }
6620  else
6621  {
6622  if(startindent != m_evt_handler->m_curr->indref)
6623  {
6624  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6625  addrem_flags(RNXT, RVAL);
6626  _handle_annotations_before_start_mapblck(startline);
6627  m_evt_handler->begin_map_val_block();
6628  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6629  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6630  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6631  _maybe_skip_whitespace_tokens();
6632  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6633  // keep the child state on RVAL
6634  addrem_flags(RVAL, RNXT);
6635  }
6636  else
6637  {
6638  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6639  _handle_annotations_before_blck_val_scalar();
6640  m_evt_handler->set_val_scalar_plain_empty();
6641  m_evt_handler->add_sibling();
6642  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6643  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6644  // keep going on RVAL
6645  _maybe_skip_whitespace_tokens();
6646  }
6647  }
6648  }
6649  else if(first == '-')
6650  {
6651  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6652  {
6653  _c4dbgp("mapblck[RVAL]: start val seqblck");
6654  addrem_flags(RNXT, RVAL);
6655  _handle_annotations_before_blck_val_scalar();
6656  m_evt_handler->begin_seq_val_block();
6657  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6658  _set_indentation(startindent);
6659  _line_progressed(1);
6660  _maybe_skip_whitespace_tokens();
6661  goto mapblck_finish;
6662  }
6663  else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6664  {
6665  _c4dbgp("mapblck[RVAL]: end+start doc");
6666  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6667  _start_doc_suddenly();
6668  _line_progressed(3);
6669  _maybe_skip_whitespace_tokens();
6670  goto mapblck_finish;
6671  }
6672  else
6673  {
6674  _c4err("parse error");
6675  }
6676  }
6677  else if(first == '[')
6678  {
6679  _c4dbgp("mapblck[RVAL]: start val seqflow");
6680  addrem_flags(RNXT, RVAL);
6681  _handle_annotations_before_blck_val_scalar();
6682  m_evt_handler->begin_seq_val_flow();
6683  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT);
6684  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6685  _line_progressed(1);
6686  goto mapblck_finish;
6687  }
6688  else if(first == '{')
6689  {
6690  _c4dbgp("mapblck[RVAL]: start val mapflow");
6691  addrem_flags(RNXT, RVAL);
6692  _handle_annotations_before_blck_val_scalar();
6693  m_evt_handler->begin_map_val_flow();
6694  addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
6695  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6696  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6697  _line_progressed(1);
6698  goto mapblck_finish;
6699  }
6700  else if(first == '*')
6701  {
6702  csubstr ref = _scan_ref_map();
6703  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6704  if(startindent == m_evt_handler->m_curr->indref)
6705  {
6706  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6707  m_evt_handler->set_val_ref(ref);
6708  addrem_flags(RNXT, RVAL);
6709  }
6710  else
6711  {
6712  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6713  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6714  if(_maybe_scan_following_colon())
6715  {
6716  _c4dbgp("mapblck[RVAL]: start child map, block");
6717  addrem_flags(RNXT, RVAL);
6718  _handle_annotations_before_blck_val_scalar();
6719  m_evt_handler->begin_map_val_block();
6720  m_evt_handler->set_key_ref(ref);
6721  _set_indentation(startindent);
6722  // keep going in RVAL
6723  addrem_flags(RVAL, RNXT);
6724  }
6725  else
6726  {
6727  _c4dbgp("mapblck[RVAL]: was val ref");
6728  _handle_annotations_before_blck_val_scalar();
6729  m_evt_handler->set_val_ref(ref);
6730  addrem_flags(RNXT, RVAL);
6731  }
6732  }
6733  _maybe_skip_whitespace_tokens();
6734  }
6735  else if(first == '&')
6736  {
6737  csubstr anchor = _scan_anchor();
6738  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6739  if(startindent == m_evt_handler->m_curr->indref)
6740  {
6741  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6742  m_evt_handler->set_val_scalar_plain_empty();
6743  m_evt_handler->add_sibling();
6744  addrem_flags(RKEY, RVAL);
6745  }
6746  // we need to buffer the anchors, as there may be two
6747  // consecutive anchors in here
6748  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6749  }
6750  else if(first == '!')
6751  {
6752  csubstr tag = _scan_tag();
6753  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6754  if(startindent == m_evt_handler->m_curr->indref)
6755  {
6756  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6757  _handle_annotations_before_blck_val_scalar();
6758  m_evt_handler->set_val_scalar_plain_empty();
6759  m_evt_handler->add_sibling();
6760  addrem_flags(RKEY, RVAL);
6761  }
6762  // we need to buffer the tags, as there may be two
6763  // consecutive tags in here
6764  _add_annotation(&m_pending_tags, tag, startindent, startline);
6765  }
6766  else if(first == '?')
6767  {
6768  if(startindent == m_evt_handler->m_curr->indref)
6769  {
6770  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6771  _handle_annotations_before_blck_val_scalar();
6772  m_evt_handler->set_val_scalar_plain_empty();
6773  m_evt_handler->add_sibling();
6774  addrem_flags(QMRK, RVAL);
6775  }
6776  else if(startindent > m_evt_handler->m_curr->indref)
6777  {
6778  _c4dbgp("mapblck[RVAL]: start val mapblck");
6779  addrem_flags(RNXT, RVAL);
6780  _handle_annotations_before_blck_val_scalar();
6781  m_evt_handler->begin_map_val_block();
6782  addrem_flags(QMRK|BLCK, RNXT);
6783  _set_indentation(startindent);
6784  }
6785  else
6786  {
6787  _c4err("parse error");
6788  }
6789  m_was_inside_qmrk = true;
6790  _line_progressed(1);
6791  _maybe_skip_whitespace_tokens();
6792  goto mapblck_again;
6793  }
6794  else if(first == ':')
6795  {
6796  if(startindent == m_evt_handler->m_curr->indref)
6797  {
6798  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6799  m_evt_handler->set_val_scalar_plain_empty();
6800  m_evt_handler->add_sibling();
6801  m_evt_handler->set_key_scalar_plain_empty();
6802  _line_progressed(1);
6803  _maybe_skip_whitespace_tokens();
6804  goto mapblck_again;
6805  }
6806  else
6807  {
6808  _c4err("parse error");
6809  }
6810  }
6811  else if(first == '.')
6812  {
6813  _c4dbgp("mapblck[RVAL]: maybe doc?");
6814  csubstr rs = rem.sub(1);
6815  if(rs == ".." || rs.begins_with(".. "))
6816  {
6817  _c4dbgp("seqblck[RVAL]: end doc expl");
6818  _end_doc_suddenly();
6819  _line_progressed(3);
6820  _maybe_skip_whitespace_tokens();
6821  goto mapblck_finish;
6822  }
6823  else
6824  {
6825  _c4err("parse error");
6826  }
6827  }
6829  else if(first == '\t')
6830  {
6831  _c4dbgp("mapblck[RVAL]: skip tabs");
6832  _maybe_skipchars('\t');
6833  })
6834  else
6835  {
6836  _c4err("parse error");
6837  }
6838  }
6839  else if(has_any(RNXT))
6840  {
6841  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6842  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6843  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6844  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6845  //
6846  // handle indentation
6847  //
6848  if(m_evt_handler->m_curr->at_line_beginning())
6849  {
6850  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6851  if(m_evt_handler->m_curr->indentation_eq())
6852  {
6853  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6854  _line_progressed(m_evt_handler->m_curr->indref);
6855  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6856  m_evt_handler->add_sibling();
6857  addrem_flags(RKEY, RNXT);
6858  goto mapblck_again;
6859  }
6860  else if(m_evt_handler->m_curr->indentation_lt())
6861  {
6862  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6863  _handle_indentation_pop_from_block_map();
6864  if(has_all(RMAP|BLCK))
6865  {
6866  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6867  if(!has_any(RKCL))
6868  {
6869  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6870  m_evt_handler->add_sibling();
6871  addrem_flags(RKEY, RNXT);
6872  }
6873  goto mapblck_again;
6874  }
6875  else
6876  {
6877  goto mapblck_finish;
6878  }
6879  }
6880  }
6881  else
6882  {
6883  _c4dbgp("mapblck[RNXT]: NOT at line begin");
6884  if(!rem.begins_with_any(" \t"))
6885  {
6886  _c4err("parse error");
6887  }
6888  else
6889  {
6890  _skipchars(" \t");
6891  rem = m_evt_handler->m_curr->line_contents.rem;
6892  if(!rem.len)
6893  {
6894  _c4dbgp("seqblck[RNXT]: again");
6895  goto mapblck_again;
6896  }
6897  }
6898  }
6899  //
6900  // handle tokens
6901  //
6902  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6903  const char first = rem.str[0];
6904  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6905  if(first == ':')
6906  {
6907  if(m_evt_handler->m_curr->more_indented)
6908  {
6909  _c4dbgp("mapblck[RNXT]: start child block map");
6910  C4_NOT_IMPLEMENTED();
6911  //m_evt_handler->actually_as_block_map();
6912  _line_progressed(1);
6913  _set_indentation(m_evt_handler->m_curr->scalar_col);
6914  m_evt_handler->m_curr->more_indented = false;
6915  goto mapblck_again;
6916  }
6917  else
6918  {
6919  _c4err("parse error");
6920  }
6921  }
6922  else if(first == ' ')
6923  {
6924  _c4dbgp("mapblck[RNXT]: skip spaces");
6925  _maybe_skip_whitespace_tokens();
6926  }
6927  else
6928  {
6929  _c4err("parse error");
6930  }
6931  }
6932  else if(has_any(QMRK))
6933  {
6934  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6935  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6936  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6937  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6938  //
6939  // handle indentation
6940  //
6941  if(m_evt_handler->m_curr->at_line_beginning())
6942  {
6943  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
6944  if(m_evt_handler->m_curr->indentation_eq())
6945  {
6946  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6947  _line_progressed(m_evt_handler->m_curr->indref);
6948  rem = m_evt_handler->m_curr->line_contents.rem;
6949  if(!rem.len)
6950  goto mapblck_again;
6951  }
6952  else if(m_evt_handler->m_curr->indentation_lt())
6953  {
6954  _c4dbgp("mapblck[QMRK]: smaller indentation!");
6955  _handle_indentation_pop_from_block_map();
6956  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6957  if(has_all(RMAP|BLCK))
6958  {
6959  _c4dbgp("mapblck[QMRK]: still mapblck!");
6960  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
6961  rem = m_evt_handler->m_curr->line_contents.rem;
6962  if(!rem.len)
6963  goto mapblck_again;
6964  }
6965  else
6966  {
6967  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
6968  goto mapblck_finish;
6969  }
6970  }
6971  // indentation can be larger in QMRK state
6972  else
6973  {
6974  _c4dbgp("mapblck[QMRK]: larger indentation !");
6975  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6976  rem = m_evt_handler->m_curr->line_contents.rem;
6977  if(!rem.len)
6978  goto mapblck_again;
6979  }
6980  }
6981  //
6982  // now handle the tokens
6983  //
6984  const char first = rem.str[0];
6985  const size_t startline = m_evt_handler->m_curr->pos.line;
6986  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6987  _c4dbgpf("mapblck[QMRK]: '{}'", first);
6988  ScannedScalar sc;
6989  if(first == '\'')
6990  {
6991  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
6992  sc = _scan_scalar_squot();
6993  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6994  if(!_maybe_scan_following_colon())
6995  {
6996  _c4dbgp("mapblck[QMRK]: set as key");
6997  _handle_annotations_before_blck_key_scalar();
6998  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6999  addrem_flags(RKCL, QMRK);
7000  }
7001  else
7002  {
7003  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7004  addrem_flags(RKCL, QMRK);
7005  _handle_annotations_before_start_mapblck_as_key();
7006  m_evt_handler->begin_map_key_block();
7007  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7008  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7009  _maybe_skip_whitespace_tokens();
7010  _set_indentation(startindent);
7011  // keep the child state on RVAL
7012  addrem_flags(RVAL, RKCL|QMRK);
7013  }
7014  }
7015  else if(first == '"')
7016  {
7017  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7018  sc = _scan_scalar_dquot();
7019  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7020  if(!_maybe_scan_following_colon())
7021  {
7022  _c4dbgp("mapblck[QMRK]: set as key");
7023  _handle_annotations_before_blck_key_scalar();
7024  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7025  addrem_flags(RKCL, QMRK);
7026  }
7027  else
7028  {
7029  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7030  addrem_flags(RKCL, QMRK);
7031  _handle_annotations_before_start_mapblck_as_key();
7032  m_evt_handler->begin_map_key_block();
7033  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7034  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7035  _maybe_skip_whitespace_tokens();
7036  _set_indentation(startindent);
7037  // keep the child state on RVAL
7038  addrem_flags(RVAL, RKCL|QMRK);
7039  }
7040  }
7041  else if(first == '|')
7042  {
7043  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7044  ScannedBlock sb;
7045  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7046  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7047  _handle_annotations_before_blck_key_scalar();
7048  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7049  addrem_flags(RKCL, QMRK);
7050  }
7051  else if(first == '>')
7052  {
7053  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7054  ScannedBlock sb;
7055  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7056  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7057  _handle_annotations_before_blck_key_scalar();
7058  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7059  addrem_flags(RKCL, QMRK);
7060  }
7061  else if(_scan_scalar_plain_map_blck(&sc))
7062  {
7063  _c4dbgp("mapblck[QMRK]: plain scalar");
7064  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7065  if(!_maybe_scan_following_colon())
7066  {
7067  _c4dbgp("mapblck[QMRK]: set as key");
7068  _handle_annotations_before_blck_key_scalar();
7069  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7070  addrem_flags(RKCL, QMRK);
7071  }
7072  else
7073  {
7074  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7075  addrem_flags(RKCL, QMRK);
7076  _handle_annotations_before_start_mapblck_as_key();
7077  m_evt_handler->begin_map_key_block();
7078  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7079  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7080  _maybe_skip_whitespace_tokens();
7081  _set_indentation(startindent);
7082  // keep the child state on RVAL
7083  addrem_flags(RVAL, RKCL|QMRK);
7084  }
7085  }
7086  else if(first == ':')
7087  {
7088  if(startindent == m_evt_handler->m_curr->indref)
7089  {
7090  _c4dbgp("mapblck[QMRK]: empty key");
7091  addrem_flags(RVAL, QMRK);
7092  _handle_annotations_before_blck_key_scalar();
7093  m_evt_handler->set_key_scalar_plain_empty();
7094  _line_progressed(1);
7095  _maybe_skip_whitespace_tokens();
7096  }
7097  else
7098  {
7099  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7100  addrem_flags(RKCL, QMRK);
7101  _handle_annotations_before_start_mapblck_as_key();
7102  m_evt_handler->begin_map_key_block();
7103  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7104  m_evt_handler->set_key_scalar_plain_empty();
7105  _line_progressed(1);
7106  _maybe_skip_whitespace_tokens();
7107  _set_indentation(startindent);
7108  // keep the child state on RVAL
7109  addrem_flags(RVAL, RKCL|QMRK);
7110  }
7111  }
7112  else if(first == '*')
7113  {
7114  csubstr ref = _scan_ref_map();
7115  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7116  if(!_maybe_scan_following_colon())
7117  {
7118  _c4dbgp("mapblck[QMRK]: set ref as key");
7119  _handle_annotations_before_blck_key_scalar();
7120  m_evt_handler->set_key_ref(ref);
7121  addrem_flags(RKCL, QMRK);
7122  }
7123  else
7124  {
7125  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7126  addrem_flags(RKCL, QMRK);
7127  _handle_annotations_before_blck_key_scalar();
7128  m_evt_handler->begin_map_key_block();
7129  m_evt_handler->set_key_ref(ref);
7130  _set_indentation(startindent);
7131  // keep the child state on RVAL
7132  addrem_flags(RVAL, RKCL|QMRK);
7133  }
7134  _maybe_skip_whitespace_tokens();
7135  }
7136  else if(first == '&')
7137  {
7138  csubstr anchor = _scan_anchor();
7139  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7140  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7141  }
7142  else if(first == '!')
7143  {
7144  csubstr tag = _scan_tag();
7145  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7146  _add_annotation(&m_pending_tags, tag, startindent, startline);
7147  }
7148  else if(first == '-')
7149  {
7150  _c4dbgp("mapblck[QMRK]: maybe doc?");
7151  csubstr rs = rem.sub(1);
7152  if(rs == "--" || rs.begins_with("-- "))
7153  {
7154  _c4dbgp("mapblck[QMRK]: end+start doc");
7155  _start_doc_suddenly();
7156  _line_progressed(3);
7157  }
7158  else
7159  {
7160  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7161  addrem_flags(RKCL, RKEY|QMRK);
7162  m_evt_handler->begin_seq_key_block();
7163  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
7164  _set_indentation(startindent);
7165  _line_progressed(1);
7166  }
7167  _maybe_skip_whitespace_tokens();
7168  goto mapblck_finish;
7169  }
7170  else if(first == '[')
7171  {
7172  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7173  addrem_flags(RKCL, RKEY|QMRK);
7174  m_evt_handler->begin_seq_key_flow();
7175  addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
7176  _set_indentation(m_evt_handler->m_parent->indref);
7177  _line_progressed(1);
7178  goto mapblck_finish;
7179  }
7180  else if(first == '{')
7181  {
7182  _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
7183  addrem_flags(RKCL, RKEY|QMRK);
7184  m_evt_handler->begin_map_key_flow();
7185  addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
7186  _set_indentation(m_evt_handler->m_parent->indref);
7187  _line_progressed(1);
7188  goto mapblck_finish;
7189  }
7190  else if(first == '?')
7191  {
7192  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7193  m_evt_handler->set_key_scalar_plain_empty();
7194  m_evt_handler->set_val_scalar_plain_empty();
7195  m_evt_handler->add_sibling();
7196  _line_progressed(1);
7197  }
7198  else if(first == '.')
7199  {
7200  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7201  csubstr rs = rem.sub(1);
7202  if(rs == ".." || rs.begins_with(".. "))
7203  {
7204  _c4dbgp("mapblck[QMRK]: end+start doc");
7205  _end_doc_suddenly();
7206  _line_progressed(3);
7207  goto mapblck_finish;
7208  }
7209  else
7210  {
7211  _c4err("parse error");
7212  }
7213  }
7214  else
7215  {
7216  _c4err("parse error");
7217  }
7218  }
7219 
7220  mapblck_again:
7221  _c4dbgt("mapblck: again", 0);
7222  if(_finished_line())
7223  {
7224  _line_ended();
7225  _scan_line();
7226  if(_finished_file())
7227  {
7228  _c4dbgp("mapblck: file finished!");
7229  _end_map_blck();
7230  goto mapblck_finish;
7231  }
7232  _c4dbgnextline();
7233  }
7234  goto mapblck_start;
7235 
7236  mapblck_finish:
7237  _c4dbgp("mapblck: finish");
7238 }
7239 
7240 
7241 //-----------------------------------------------------------------------------
7242 
7243 template<class EventHandler>
7244 void ParseEngine<EventHandler>::_handle_unk_json()
7245 {
7246  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7247 
7248  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7249  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7250 
7251  _maybe_skip_comment();
7252  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7253  if(!rem.len)
7254  return;
7255 
7256  size_t pos = rem.first_not_of(" \t");
7257  if(pos)
7258  {
7259  pos = pos != npos ? pos : rem.len;
7260  _c4dbgpf("skipping indentation of {}", pos);
7261  _line_progressed(pos);
7262  rem = m_evt_handler->m_curr->line_contents.rem;
7263  if(!rem.len)
7264  return;
7265  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7266  }
7267 
7268  if(rem.begins_with('['))
7269  {
7270  _c4dbgp("it's a seq");
7271  m_evt_handler->check_trailing_doc_token();
7272  _maybe_begin_doc();
7273  m_evt_handler->begin_seq_val_flow();
7274  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7275  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7276  m_doc_empty = false;
7277  _line_progressed(1);
7278  }
7279  else if(rem.begins_with('{'))
7280  {
7281  _c4dbgp("it's a map");
7282  m_evt_handler->check_trailing_doc_token();
7283  _maybe_begin_doc();
7284  m_evt_handler->begin_map_val_flow();
7285  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7286  m_doc_empty = false;
7287  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7288  _line_progressed(1);
7289  }
7290  else if(_handle_bom())
7291  {
7292  _c4dbgp("byte order mark");
7293  }
7294  else
7295  {
7296  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7297  _maybe_skip_whitespace_tokens();
7298  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7299  if(!s.len)
7300  return;
7301  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7302  const char first = s.str[0];
7303  ScannedScalar sc;
7304  if(first == '"')
7305  {
7306  _c4dbgp("runk_json: scanning double-quoted scalar");
7307  m_evt_handler->check_trailing_doc_token();
7308  _maybe_begin_doc();
7309  add_flags(RDOC);
7310  m_doc_empty = false;
7311  sc = _scan_scalar_dquot();
7312  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7313  if(!_maybe_scan_following_colon())
7314  {
7315  _c4dbgp("runk_json: set as val");
7316  _handle_annotations_before_blck_val_scalar();
7317  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7318  }
7319  else
7320  {
7321  _c4err("parse error");
7322  }
7323  }
7324  else if(_scan_scalar_plain_unk(&sc))
7325  {
7326  _c4dbgp("runk_json: got a plain scalar");
7327  m_evt_handler->check_trailing_doc_token();
7328  _maybe_begin_doc();
7329  add_flags(RDOC);
7330  m_doc_empty = false;
7331  if(!_maybe_scan_following_colon())
7332  {
7333  _c4dbgp("runk_json: set as val");
7334  _handle_annotations_before_blck_val_scalar();
7335  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7336  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7337  }
7338  else
7339  {
7340  _c4err("parse error");
7341  }
7342  }
7343  else
7344  {
7345  _c4err("parse error");
7346  }
7347  }
7348 }
7349 
7350 
7351 //-----------------------------------------------------------------------------
7352 
7353 template<class EventHandler>
7354 void ParseEngine<EventHandler>::_handle_unk()
7355 {
7356  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7357 
7358  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7359  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7360 
7361  _maybe_skip_comment();
7362  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7363  if(!rem.len)
7364  return;
7365 
7366  size_t pos = rem.first_not_of(" \t");
7367  if(pos)
7368  {
7369  pos = pos != npos ? pos : rem.len;
7370  _c4dbgpf("skipping {} whitespace characters", pos);
7371  _line_progressed(pos);
7372  rem = m_evt_handler->m_curr->line_contents.rem;
7373  if(!rem.len)
7374  return;
7375  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7376  }
7377 
7378  if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7379  {
7380  _c4dbgp("rtop: zero indent + at line begin");
7381  if(_handle_bom())
7382  {
7383  _c4dbgp("byte order mark!");
7384  rem = m_evt_handler->m_curr->line_contents.rem;
7385  if(!rem.len)
7386  return;
7387  }
7388  const char first = rem.str[0];
7389  if(first == '-')
7390  {
7391  _c4dbgp("rtop: suspecting doc");
7392  if(_is_doc_begin_token(rem))
7393  {
7394  _c4dbgp("rtop: begin doc");
7395  _maybe_end_doc();
7396  _begin2_doc_expl();
7397  _set_indentation(0);
7398  addrem_flags(RDOC|RUNK, NDOC);
7399  _line_progressed(3u);
7400  _maybe_skip_whitespace_tokens();
7401  return;
7402  }
7403  }
7404  else if(first == '.')
7405  {
7406  _c4dbgp("rtop: suspecting doc end");
7407  if(_is_doc_end_token(rem))
7408  {
7409  _c4dbgp("rtop: end doc");
7410  if(has_any(RDOC))
7411  {
7412  _end2_doc_expl();
7413  }
7414  else
7415  {
7416  _c4dbgp("rtop: ignore end doc");
7417  }
7418  addrem_flags(NDOC|RUNK, RDOC);
7419  _line_progressed(3u);
7420  _maybe_skip_whitespace_tokens();
7421  return;
7422  }
7423  }
7424  else if(first == '%')
7425  {
7426  _c4dbgpf("directive: {}", rem);
7427  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7428  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
7429  _handle_directive(rem);
7430  return;
7431  }
7432  }
7433 
7434  /* no else-if! */
7435  char first = rem.str[0];
7436 
7437  if(first == '[')
7438  {
7439  m_evt_handler->check_trailing_doc_token();
7440  _maybe_begin_doc();
7441  m_doc_empty = false;
7442  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7443  if(C4_LIKELY( ! _annotations_require_key_container()))
7444  {
7445  _c4dbgp("it's a seq, flow");
7446  _handle_annotations_before_blck_val_scalar();
7447  m_evt_handler->begin_seq_val_flow();
7448  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7449  _set_indentation(startindent);
7450  }
7451  else
7452  {
7453  _c4dbgp("start new block map, set flow seq as key (!)");
7454  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7455  m_evt_handler->begin_map_val_block();
7456  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7457  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7458  m_evt_handler->begin_seq_key_flow();
7459  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
7460  _set_indentation(startindent);
7461  }
7462  _line_progressed(1);
7463  }
7464  else if(first == '{')
7465  {
7466  m_evt_handler->check_trailing_doc_token();
7467  _maybe_begin_doc();
7468  m_doc_empty = false;
7469  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7470  if(C4_LIKELY( ! _annotations_require_key_container()))
7471  {
7472  _c4dbgp("it's a map, flow");
7473  _handle_annotations_before_blck_val_scalar();
7474  m_evt_handler->begin_map_val_flow();
7475  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7476  _set_indentation(startindent);
7477  }
7478  else
7479  {
7480  _c4dbgp("start new block map, set flow map as key (!)");
7481  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7482  m_evt_handler->begin_map_val_block();
7483  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7484  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7485  m_evt_handler->begin_map_key_flow();
7486  addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
7487  _set_indentation(startindent);
7488  }
7489  _line_progressed(1);
7490  }
7491  else if(first == '-' && _is_blck_token(rem))
7492  {
7493  _c4dbgp("it's a seq, block");
7494  m_evt_handler->check_trailing_doc_token();
7495  _maybe_begin_doc();
7496  _handle_annotations_before_blck_val_scalar();
7497  m_evt_handler->begin_seq_val_block();
7498  addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7499  m_doc_empty = false;
7500  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7501  _line_progressed(1);
7502  _maybe_skip_whitespace_tokens();
7503  }
7504  else if(first == '?' && _is_blck_token(rem))
7505  {
7506  _c4dbgp("it's a map + this key is complex");
7507  m_evt_handler->check_trailing_doc_token();
7508  _maybe_begin_doc();
7509  _handle_annotations_before_blck_val_scalar();
7510  m_evt_handler->begin_map_val_block();
7511  addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7512  m_doc_empty = false;
7513  m_was_inside_qmrk = true;
7514  _save_indentation();
7515  _line_progressed(1);
7516  _maybe_skip_whitespace_tokens();
7517  }
7518  else if(first == ':' && _is_blck_token(rem))
7519  {
7520  if(m_doc_empty)
7521  {
7522  _c4dbgp("it's a map with an empty key");
7523  m_evt_handler->check_trailing_doc_token();
7524  _maybe_begin_doc();
7525  _handle_annotations_before_blck_val_scalar();
7526  m_evt_handler->begin_map_val_block();
7527  m_evt_handler->set_key_scalar_plain_empty();
7528  m_doc_empty = false;
7529  _save_indentation();
7530  }
7531  else
7532  {
7533  _c4dbgp("actually prev val is a key!");
7534  size_t prev_indentation = m_evt_handler->m_curr->indref;
7535  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7536  _set_indentation(prev_indentation);
7537  }
7538  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7539  _line_progressed(1);
7540  _maybe_skip_whitespace_tokens();
7541  }
7542  else if(first == '&')
7543  {
7544  csubstr anchor = _scan_anchor();
7545  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7546  m_evt_handler->check_trailing_doc_token();
7547  _maybe_begin_doc();
7548  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7549  const size_t line = m_evt_handler->m_curr->pos.line;
7550  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7551  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7552  m_doc_empty = false;
7553  }
7554  else if(first == '*')
7555  {
7556  csubstr ref = _scan_ref_map();
7557  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7558  m_evt_handler->check_trailing_doc_token();
7559  _maybe_begin_doc();
7560  m_doc_empty = false;
7561  if(!_maybe_scan_following_colon())
7562  {
7563  _c4dbgp("runk: set val ref");
7564  _handle_annotations_before_blck_val_scalar();
7565  m_evt_handler->set_val_ref(ref);
7566  }
7567  else
7568  {
7569  _c4dbgp("runk: start new block map, set ref as key");
7570  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7571  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7572  _handle_annotations_before_start_mapblck(startline);
7573  m_evt_handler->begin_map_val_block();
7574  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7575  m_evt_handler->set_key_ref(ref);
7576  _maybe_skip_whitespace_tokens();
7577  _set_indentation(startindent);
7578  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7579  }
7580  }
7581  else if(first == '!')
7582  {
7583  csubstr tag = _scan_tag();
7584  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7585  // we need to buffer the tags, as there may be two
7586  // consecutive tags in here
7587  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7588  const size_t line = m_evt_handler->m_curr->pos.line;
7589  _add_annotation(&m_pending_tags, tag, indentation, line);
7590  }
7591  else
7592  {
7593  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7594  _maybe_skip_whitespace_tokens();
7595  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7596  if(!s.len)
7597  return;
7598  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7599  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7600  first = s.str[0];
7601  ScannedScalar sc;
7602  if(first == '\'')
7603  {
7604  _c4dbgp("runk: scanning single-quoted scalar");
7605  m_evt_handler->check_trailing_doc_token();
7606  _maybe_begin_doc();
7607  add_flags(RDOC);
7608  m_doc_empty = false;
7609  sc = _scan_scalar_squot();
7610  if(!_maybe_scan_following_colon())
7611  {
7612  _c4dbgp("runk: set as val");
7613  _handle_annotations_before_blck_val_scalar();
7614  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7615  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7616  }
7617  else
7618  {
7619  _c4dbgp("runk: start new block map, set scalar as key");
7620  _handle_annotations_before_start_mapblck(startline);
7621  m_evt_handler->begin_map_val_block();
7622  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7623  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7624  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7625  _maybe_skip_whitespace_tokens();
7626  _set_indentation(startindent);
7627  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7628  }
7629  }
7630  else if(first == '"')
7631  {
7632  _c4dbgp("runk: scanning double-quoted scalar");
7633  m_evt_handler->check_trailing_doc_token();
7634  _maybe_begin_doc();
7635  add_flags(RDOC);
7636  m_doc_empty = false;
7637  sc = _scan_scalar_dquot();
7638  if(!_maybe_scan_following_colon())
7639  {
7640  _c4dbgp("runk: set as val");
7641  _handle_annotations_before_blck_val_scalar();
7642  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7643  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7644  }
7645  else
7646  {
7647  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7648  _handle_annotations_before_start_mapblck(startline);
7649  m_evt_handler->begin_map_val_block();
7650  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7651  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7652  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7653  _maybe_skip_whitespace_tokens();
7654  _set_indentation(startindent);
7655  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7656  }
7657  }
7658  else if(first == '|')
7659  {
7660  _c4dbgp("runk: scanning block-literal scalar");
7661  m_evt_handler->check_trailing_doc_token();
7662  _maybe_begin_doc();
7663  add_flags(RDOC);
7664  m_doc_empty = false;
7665  ScannedBlock sb;
7666  _scan_block(&sb, startindent);
7667  if(C4_LIKELY(!_maybe_scan_following_colon()))
7668  {
7669  _c4dbgp("runk: set as val");
7670  _handle_annotations_before_blck_val_scalar();
7671  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7672  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7673  }
7674  else
7675  {
7676  _c4err("block literal keys must be enclosed in '?'");
7677  }
7678  }
7679  else if(first == '>')
7680  {
7681  _c4dbgp("runk: scanning block-folded scalar");
7682  m_evt_handler->check_trailing_doc_token();
7683  _maybe_begin_doc();
7684  add_flags(RDOC);
7685  m_doc_empty = false;
7686  ScannedBlock sb;
7687  _scan_block(&sb, startindent);
7688  if(C4_LIKELY(!_maybe_scan_following_colon()))
7689  {
7690  _c4dbgp("runk: set as val");
7691  _handle_annotations_before_blck_val_scalar();
7692  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7693  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7694  }
7695  else
7696  {
7697  _c4err("block folded keys must be enclosed in '?'");
7698  }
7699  }
7700  else if(_scan_scalar_plain_unk(&sc))
7701  {
7702  _c4dbgp("runk: got a plain scalar");
7703  m_evt_handler->check_trailing_doc_token();
7704  _maybe_begin_doc();
7705  add_flags(RDOC);
7706  m_doc_empty = false;
7707  if(!_maybe_scan_following_colon())
7708  {
7709  _c4dbgp("runk: set as val");
7710  _handle_annotations_before_blck_val_scalar();
7711  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7712  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7713  }
7714  else
7715  {
7716  _c4dbgp("runk: start new block map, set scalar as key");
7717  _handle_annotations_before_start_mapblck(startline);
7718  m_evt_handler->begin_map_val_block();
7719  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7720  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7721  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7722  _maybe_skip_whitespace_tokens();
7723  _set_indentation(startindent);
7724  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7725  }
7726  }
7727  }
7728 }
7729 
7730 
7731 //-----------------------------------------------------------------------------
7732 
7733 template<class EventHandler>
7734 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7735 {
7736  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7737 
7738  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
7739 
7740  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7741  if(has_any(RNXT))
7742  {
7743  _c4dbgp("usty[RNXT]: finishing!");
7744  _end_stream();
7745  }
7746  #endif
7747 
7748  _maybe_skip_comment();
7749  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7750  if(!rem.len)
7751  return;
7752 
7753  size_t pos = rem.first_not_of(" \t");
7754  if(pos)
7755  {
7756  pos = pos != npos ? pos : rem.len;
7757  _c4dbgpf("skipping indentation of {}", pos);
7758  _line_progressed(pos);
7759  rem = m_evt_handler->m_curr->line_contents.rem;
7760  if(!rem.len)
7761  return;
7762  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7763  }
7764 
7765  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7766  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7767  char first = rem.str[0];
7768  if(has_any(RSEQ)) // destination is a sequence
7769  {
7770  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7771  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7772  if(first == '[')
7773  {
7774  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7775  add_flags(RNXT);
7776  m_evt_handler->_push();
7777  addrem_flags(FLOW|RVAL, RNXT|USTY);
7778  _set_indentation(startindent);
7779  _line_progressed(1);
7780  _maybe_skip_whitespace_tokens();
7781  }
7782  else if(first == '-' && _is_blck_token(rem))
7783  {
7784  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7785  add_flags(RNXT);
7786  m_evt_handler->_push();
7787  addrem_flags(BLCK|RVAL, RNXT|USTY);
7788  _set_indentation(startindent);
7789  _line_progressed(1);
7790  _maybe_skip_whitespace_tokens();
7791  }
7792  else
7793  {
7794  _c4err("can only parse a seq into an existing seq");
7795  }
7796  }
7797  else if(has_any(RMAP)) // destination is a map
7798  {
7799  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7800  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7801  if(first == '{')
7802  {
7803  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7804  add_flags(RNXT);
7805  _handle_annotations_before_blck_val_scalar();
7806  m_evt_handler->_push();
7807  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7808  _set_indentation(startindent);
7809  _line_progressed(1);
7810  _maybe_skip_whitespace_tokens();
7811  }
7812  else if(first == '?' && _is_blck_token(rem))
7813  {
7814  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7815  add_flags(RNXT);
7816  _handle_annotations_before_blck_val_scalar();
7817  m_evt_handler->_push();
7818  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7819  m_was_inside_qmrk = true;
7820  _save_indentation();
7821  _line_progressed(1);
7822  _maybe_skip_whitespace_tokens();
7823  }
7824  else if(first == ':' && _is_blck_token(rem))
7825  {
7826  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7827  add_flags(RNXT);
7828  _handle_annotations_before_blck_val_scalar();
7829  m_evt_handler->_push();
7830  m_evt_handler->set_key_scalar_plain_empty();
7831  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7832  _save_indentation();
7833  _line_progressed(1);
7834  _maybe_skip_whitespace_tokens();
7835  }
7836  else if(rem.begins_with('&'))
7837  {
7838  csubstr anchor = _scan_anchor();
7839  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7840  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7841  const size_t line = m_evt_handler->m_curr->pos.line;
7842  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7843  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7844  }
7845  else if(first == '*')
7846  {
7847  csubstr ref = _scan_ref_map();
7848  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7849  if(!_maybe_scan_following_colon())
7850  {
7851  _c4err("cannot read a VAL to a map");
7852  }
7853  else
7854  {
7855  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7856  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7857  add_flags(RNXT);
7858  _handle_annotations_before_start_mapblck(startline);
7859  m_evt_handler->_push();
7860  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7861  m_evt_handler->set_key_ref(ref);
7862  _maybe_skip_whitespace_tokens();
7863  _set_indentation(startindent);
7864  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7865  }
7866  }
7867  else if(first == '!')
7868  {
7869  csubstr tag = _scan_tag();
7870  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7871  // we need to buffer the tags, as there may be two
7872  // consecutive tags in here
7873  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7874  const size_t line = m_evt_handler->m_curr->pos.line;
7875  _add_annotation(&m_pending_tags, tag, indentation, line);
7876  }
7877  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7878  {
7879  _c4err("cannot parse a seq into an existing map");
7880  }
7881  else
7882  {
7883  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7884  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7885  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7886  ScannedScalar sc;
7887  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7888  if(first == '\'')
7889  {
7890  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7891  sc = _scan_scalar_squot();
7892  if(!_maybe_scan_following_colon())
7893  {
7894  _c4err("cannot read a VAL to a map");
7895  }
7896  else
7897  {
7898  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7899  add_flags(RNXT);
7900  _handle_annotations_before_start_mapblck(startline);
7901  m_evt_handler->_push();
7902  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7903  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7904  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7905  _set_indentation(startindent);
7906  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7907  _maybe_skip_whitespace_tokens();
7908  }
7909  }
7910  else if(first == '"')
7911  {
7912  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
7913  sc = _scan_scalar_dquot();
7914  if(!_maybe_scan_following_colon())
7915  {
7916  _c4err("cannot read a VAL to a map");
7917  }
7918  else
7919  {
7920  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
7921  add_flags(RNXT);
7922  _handle_annotations_before_start_mapblck(startline);
7923  m_evt_handler->_push();
7924  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7925  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7926  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7927  _set_indentation(startindent);
7928  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7929  _maybe_skip_whitespace_tokens();
7930  }
7931  }
7932  else if(first == '|')
7933  {
7934  _c4err("block literal keys must be enclosed in '?'");
7935  }
7936  else if(first == '>')
7937  {
7938  _c4err("block literal keys must be enclosed in '?'");
7939  }
7940  else if(_scan_scalar_plain_unk(&sc))
7941  {
7942  _c4dbgp("usty[RMAP]: got a plain scalar");
7943  if(!_maybe_scan_following_colon())
7944  {
7945  _c4err("cannot read a VAL to a map");
7946  }
7947  else
7948  {
7949  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7950  add_flags(RNXT);
7951  _handle_annotations_before_start_mapblck(startline);
7952  m_evt_handler->_push();
7953  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7954  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7955  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7956  _set_indentation(startindent);
7957  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7958  _maybe_skip_whitespace_tokens();
7959  }
7960  }
7961  else
7962  {
7963  _c4err("parse error");
7964  }
7965  }
7966  }
7967  else // destination is unknown
7968  {
7969  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7970  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
7971  if(first == '[')
7972  {
7973  _c4dbgp("usty[UNK]: it's a flow seq");
7974  add_flags(RNXT);
7975  _handle_annotations_before_blck_val_scalar();
7976  m_evt_handler->begin_seq_val_flow();
7977  addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
7978  _set_indentation(startindent);
7979  _line_progressed(1);
7980  _maybe_skip_whitespace_tokens();
7981  }
7982  else if(first == '-' && _is_blck_token(rem))
7983  {
7984  _c4dbgp("usty[UNK]: it's a block seq");
7985  add_flags(RNXT);
7986  _handle_annotations_before_blck_val_scalar();
7987  m_evt_handler->begin_seq_val_block();
7988  addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
7989  _set_indentation(startindent);
7990  _line_progressed(1);
7991  _maybe_skip_whitespace_tokens();
7992  }
7993  else if(first == '{')
7994  {
7995  _c4dbgp("usty[UNK]: it's a flow map");
7996  add_flags(RNXT);
7997  _handle_annotations_before_blck_val_scalar();
7998  m_evt_handler->begin_map_val_flow();
7999  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
8000  _set_indentation(startindent);
8001  _line_progressed(1);
8002  _maybe_skip_whitespace_tokens();
8003  }
8004  else if(first == '?' && _is_blck_token(rem))
8005  {
8006  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8007  add_flags(RNXT);
8008  _handle_annotations_before_blck_val_scalar();
8009  m_evt_handler->begin_map_val_block();
8010  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
8011  m_was_inside_qmrk = true;
8012  _save_indentation();
8013  _line_progressed(1);
8014  _maybe_skip_whitespace_tokens();
8015  }
8016  else if(first == ':' && _is_blck_token(rem))
8017  {
8018  _c4dbgp("usty[UNK]: it's a map with an empty key");
8019  add_flags(RNXT);
8020  _handle_annotations_before_blck_val_scalar();
8021  m_evt_handler->begin_map_val_block();
8022  m_evt_handler->set_key_scalar_plain_empty();
8023  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8024  _save_indentation();
8025  _line_progressed(1);
8026  _maybe_skip_whitespace_tokens();
8027  }
8028  else if(first == '&')
8029  {
8030  csubstr anchor = _scan_anchor();
8031  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8032  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8033  const size_t line = m_evt_handler->m_curr->pos.line;
8034  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8035  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8036  }
8037  else if(first == '*')
8038  {
8039  csubstr ref = _scan_ref_map();
8040  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8041  if(!_maybe_scan_following_colon())
8042  {
8043  _c4dbgp("usty[UNK]: set val ref");
8044  _handle_annotations_before_blck_val_scalar();
8045  m_evt_handler->set_val_ref(ref);
8046  }
8047  else
8048  {
8049  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8050  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8051  add_flags(RNXT);
8052  _handle_annotations_before_start_mapblck(startline);
8053  m_evt_handler->begin_map_val_block();
8054  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8055  m_evt_handler->set_key_ref(ref);
8056  _maybe_skip_whitespace_tokens();
8057  _set_indentation(startindent);
8058  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8059  }
8060  }
8061  else if(first == '!')
8062  {
8063  csubstr tag = _scan_tag();
8064  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8065  // we need to buffer the tags, as there may be two
8066  // consecutive tags in here
8067  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8068  const size_t line = m_evt_handler->m_curr->pos.line;
8069  _add_annotation(&m_pending_tags, tag, indentation, line);
8070  }
8071  else
8072  {
8073  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
8074  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8075  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8076  first = rem.str[0];
8077  ScannedScalar sc;
8078  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8079  if(first == '\'')
8080  {
8081  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8082  sc = _scan_scalar_squot();
8083  if(!_maybe_scan_following_colon())
8084  {
8085  _c4dbgp("usty[UNK]: set as val");
8086  _handle_annotations_before_blck_val_scalar();
8087  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8088  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8089  _end_stream();
8090  }
8091  else
8092  {
8093  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8094  add_flags(RNXT);
8095  _handle_annotations_before_start_mapblck(startline);
8096  m_evt_handler->begin_map_val_block();
8097  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8098  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8099  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8100  _set_indentation(startindent);
8101  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8102  _maybe_skip_whitespace_tokens();
8103  }
8104  }
8105  else if(first == '"')
8106  {
8107  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8108  sc = _scan_scalar_dquot();
8109  if(!_maybe_scan_following_colon())
8110  {
8111  _c4dbgp("usty[UNK]: set as val");
8112  _handle_annotations_before_blck_val_scalar();
8113  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8114  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8115  _end_stream();
8116  }
8117  else
8118  {
8119  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8120  add_flags(RNXT);
8121  _handle_annotations_before_start_mapblck(startline);
8122  m_evt_handler->begin_map_val_block();
8123  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8124  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8125  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8126  _set_indentation(startindent);
8127  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8128  _maybe_skip_whitespace_tokens();
8129  }
8130  }
8131  else if(first == '|')
8132  {
8133  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8134  ScannedBlock sb;
8135  _scan_block(&sb, startindent);
8136  _c4dbgp("usty[UNK]: set as val");
8137  _handle_annotations_before_blck_val_scalar();
8138  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8139  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8140  _end_stream();
8141  }
8142  else if(first == '>')
8143  {
8144  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8145  ScannedBlock sb;
8146  _scan_block(&sb, startindent);
8147  _c4dbgp("usty[UNK]: set as val");
8148  _handle_annotations_before_blck_val_scalar();
8149  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8150  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8151  _end_stream();
8152  }
8153  else if(_scan_scalar_plain_unk(&sc))
8154  {
8155  _c4dbgp("usty[UNK]: got a plain scalar");
8156  if(!_maybe_scan_following_colon())
8157  {
8158  _c4dbgp("usty[UNK]: set as val");
8159  _handle_annotations_before_blck_val_scalar();
8160  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8161  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8162  _end_stream();
8163  }
8164  else
8165  {
8166  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8167  add_flags(RNXT);
8168  _handle_annotations_before_start_mapblck(startline);
8169  m_evt_handler->begin_map_val_block();
8170  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8171  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8172  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8173  _set_indentation(startindent);
8174  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8175  _maybe_skip_whitespace_tokens();
8176  }
8177  }
8178  else
8179  {
8180  _c4err("parse error");
8181  }
8182  }
8183  }
8184 }
8185 
8186 
8187 //-----------------------------------------------------------------------------
8188 
8189 template<class EventHandler>
8190 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8191 {
8192  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8193  m_file = filename;
8194  m_buf = src;
8195  _reset();
8196  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8197  m_evt_handler->begin_stream();
8198  while( ! _finished_file())
8199  {
8200  _scan_line();
8201  while( ! _finished_line())
8202  {
8203  _c4dbgnextline();
8204  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8205  if(has_any(RSEQ))
8206  {
8207  _handle_seq_json();
8208  }
8209  else if(has_any(RMAP))
8210  {
8211  _handle_map_json();
8212  }
8213  else if(has_any(RUNK))
8214  {
8215  _handle_unk_json();
8216  }
8217  else
8218  {
8219  _c4err("internal error");
8220  }
8221  }
8222  if(_finished_file())
8223  break; // it may have finished because of multiline blocks
8224  _line_ended();
8225  }
8226  _end_stream();
8227  m_evt_handler->finish_parse();
8228 }
8229 
8230 
8231 //-----------------------------------------------------------------------------
8232 
8233 template<class EventHandler>
8234 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8235 {
8236  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8237  m_file = filename;
8238  m_buf = src;
8239  _reset();
8240  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8241  m_evt_handler->begin_stream();
8242  while( ! _finished_file())
8243  {
8244  _scan_line();
8245  while( ! _finished_line())
8246  {
8247  _c4dbgnextline();
8248  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8249  if(has_any(FLOW))
8250  {
8251  if(has_none(RSEQIMAP))
8252  {
8253  if(has_any(RSEQ))
8254  {
8255  _handle_seq_flow();
8256  }
8257  else
8258  {
8259  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8260  _handle_map_flow();
8261  }
8262  }
8263  else
8264  {
8265  _handle_seq_imap();
8266  }
8267  }
8268  else if(has_any(BLCK))
8269  {
8270  if(has_any(RSEQ))
8271  {
8272  _handle_seq_block();
8273  }
8274  else
8275  {
8276  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8277  _handle_map_block();
8278  }
8279  }
8280  else if(has_any(RUNK))
8281  {
8282  _handle_unk();
8283  }
8284  else if(has_any(USTY))
8285  {
8286  _handle_usty();
8287  }
8288  else
8289  {
8290  _c4err("internal error");
8291  }
8292  }
8293  if(_finished_file())
8294  break; // it may have finished because of multiline blocks
8295  _line_ended();
8296  }
8297  _end_stream();
8298  m_evt_handler->finish_parse();
8299 }
8300 /** @endcond */
8301 
8302 } // namespace yml
8303 } // namespace c4
8304 
8305 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
8306 
8307 #undef _c4dbgnextline
8308 
8309 #if defined(_MSC_VER)
8310 # pragma warning(pop)
8311 #elif defined(__clang__)
8312 # pragma clang diagnostic pop
8313 #elif defined(__GNUC__)
8314 # pragma GCC diagnostic pop
8315 #endif
8316 
8317 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_ERRMSG_SIZE
size for the error message buffer
Definition: common.hpp:24
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:49
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1547
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:890
size_t to_chars(substr buf, uint8_t v) noexcept
Definition: charconv.hpp:2328
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ npos
a null string position
Definition: common.hpp:267
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
Definition: parse.cpp:132
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
Encoding_e
Definition: common.hpp:427
@ UTF16BE
Definition: common.hpp:431
@ UTF8
Definition: common.hpp:429
@ UTF16LE
Definition: common.hpp:430
@ NOBOM
Definition: common.hpp:428
@ UTF32BE
Definition: common.hpp:433
@ UTF32LE
Definition: common.hpp:432
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark