rapidyaml  0.9.0
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 
9 #include <ctype.h>
10 
11 #include "c4/yml/detail/parser_dbg.hpp"
13 #ifdef RYML_DBG
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
16 #endif
17 
18 
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
23 #else
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
27 #endif
28 
29 
30 // scaffold:
31 #define _c4dbgnextline() \
32  do { \
33  _c4dbgq("\n-----------"); \
34  _c4dbgt("handling line={}, offset={}B", \
35  m_evt_handler->m_curr->pos.line, \
36  m_evt_handler->m_curr->pos.offset); \
37  } while(0)
38 
39 
40 #if defined(_MSC_VER)
41 # pragma warning(push)
42 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
43 # pragma warning(disable: 4702/*unreachable code*/)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
54 # if __GNUC__ >= 7
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
56 # endif
57 #endif
58 
59 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
60 
61 namespace c4 {
62 namespace yml {
63 
64 namespace { // NOLINT
65 
66 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
67 {
68  RYML_ASSERT(s.len > 0);
69  RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
70  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
71 }
72 
73 inline bool _is_doc_begin_token(csubstr s)
74 {
75  RYML_ASSERT(s.begins_with('-'));
76  RYML_ASSERT(!s.ends_with("\n"));
77  RYML_ASSERT(!s.ends_with("\r"));
78  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
79  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
80 }
81 
82 inline bool _is_doc_end_token(csubstr s)
83 {
84  RYML_ASSERT(s.begins_with('.'));
85  RYML_ASSERT(!s.ends_with("\n"));
86  RYML_ASSERT(!s.ends_with("\r"));
87  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
88  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
89 }
90 
91 inline bool _is_doc_token(csubstr s) noexcept
92 {
93  //
94  // NOTE: this function was failing under some scenarios when
95  // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
96  // related to optimizer assumptions on the input string and
97  // possibly caused from UB around assignment to that string (the
98  // call site was in _scan_block()). For more details see:
99  //
100  // https://github.com/biojppm/rapidyaml/issues/440
101  //
102  // The current version does not suffer this problem, but it may
103  // appear again.
104  //
105  //
106  // UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
107  // (but not any other optimization level, nor any other compiler
108  // or version), because the assignment to s is being hoisted out
109  // of the loop which calls this function. Then the length doesn't
110  // enter the s.len >= 3 when it should. Adding a
111  // C4_DONT_OPTIMIZE(var) makes the problem go away.
112  //
113  if(s.len >= 3)
114  {
115  switch(s.str[0])
116  {
117  case '-':
118  //return _is_doc_begin_token(s); // this was failing with gcc -O2
119  return (s.str[1] == '-' && s.str[2] == '-')
120  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
121  case '.':
122  //return _is_doc_end_token(s); // this was failing with gcc -O2
123  return (s.str[1] == '.' && s.str[2] == '.')
124  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
125  }
126  }
127  return false;
128 }
129 
130 inline size_t _is_special_json_scalar(csubstr s)
131 {
132  RYML_ASSERT(s.len);
133  switch(s.str[0])
134  {
135  case 'f':
136  if(s.len >= 5 && s.begins_with("false"))
137  return 5u;
138  break;
139  case 't':
140  if(s.len >= 4 && s.begins_with("true"))
141  return 4u;
142  break;
143  case 'n':
144  if(s.len >= 4 && s.begins_with("null"))
145  return 4u;
146  break;
147  }
148  return 0u;
149 }
150 
151 
152 //-----------------------------------------------------------------------------
153 
154 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
155 {
156  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
157 }
158 
159 //! look for the next newline chars, and jump to the right of those
160 inline substr from_next_line(substr rem)
161 {
162  size_t nlpos = rem.first_of("\r\n");
163  if(nlpos == csubstr::npos)
164  return {};
165  const char nl = rem[nlpos];
166  rem = rem.right_of(nlpos);
167  if(rem.empty())
168  return {};
169  if(_extend_from_combined_newline(nl, rem.front()))
170  rem = rem.sub(1);
171  return rem;
172 }
173 
174 
175 //-----------------------------------------------------------------------------
176 
177 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
178 {
179  RYML_ASSERT(r[*i] == '\n');
180  size_t numnl_following = 0;
181  ++(*i);
182  for( ; *i < r.len; ++(*i))
183  {
184  if(r.str[*i] == '\n')
185  ++numnl_following;
186  // skip leading whitespace
187  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
188  ;
189  else
190  break;
191  }
192  return numnl_following;
193 }
194 
195 /** @p i is set to the first non whitespace character after the line
196  * @return the number of empty lines after the initial position */
197 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
198 {
199  RYML_ASSERT(r[*i] == '\n');
200  size_t numnl_following = 0;
201  ++(*i);
202  if(indentation == 0)
203  {
204  for( ; *i < r.len; ++(*i))
205  {
206  if(r.str[*i] == '\n')
207  ++numnl_following;
208  // skip leading whitespace
209  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
210  ;
211  else
212  break;
213  }
214  }
215  else
216  {
217  for( ; *i < r.len; ++(*i))
218  {
219  if(r.str[*i] == '\n')
220  {
221  ++numnl_following;
222  // skip the indentation after the newline
223  size_t stop = *i + indentation;
224  for( ; *i < r.len; ++(*i))
225  {
226  if(r.str[*i] != ' ' && r.str[*i] != '\r')
227  break;
228  RYML_ASSERT(*i < stop);
229  }
230  C4_UNUSED(stop);
231  }
232  // skip leading whitespace
233  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
234  ;
235  else
236  break;
237  }
238  }
239  return numnl_following;
240 }
241 
242 } // anon namespace
243 
244 
245 //-----------------------------------------------------------------------------
246 //-----------------------------------------------------------------------------
247 //-----------------------------------------------------------------------------
248 
249 template<class EventHandler>
251 {
252  _free();
253  _clr();
254 }
255 
256 template<class EventHandler>
258  : m_options(opts)
259  , m_file()
260  , m_buf()
261  , m_evt_handler(evt_handler)
262  , m_pending_anchors()
263  , m_pending_tags()
264  , m_was_inside_qmrk(false)
265  , m_doc_empty(false)
266  , m_prev_colon(npos)
267  , m_encoding(NOBOM)
268  , m_newline_offsets()
269  , m_newline_offsets_size(0)
270  , m_newline_offsets_capacity(0)
271  , m_newline_offsets_buf()
272 {
273  RYML_CHECK(evt_handler);
274 }
275 
276 template<class EventHandler>
278  : m_options(that.m_options)
279  , m_file(that.m_file)
280  , m_buf(that.m_buf)
281  , m_evt_handler(that.m_evt_handler)
282  , m_pending_anchors(that.m_pending_anchors)
283  , m_pending_tags(that.m_pending_tags)
284  , m_was_inside_qmrk(false)
285  , m_doc_empty(false)
286  , m_prev_colon(npos)
287  , m_encoding(NOBOM)
288  , m_newline_offsets(that.m_newline_offsets)
289  , m_newline_offsets_size(that.m_newline_offsets_size)
290  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
291  , m_newline_offsets_buf(that.m_newline_offsets_buf)
292 {
293  that._clr();
294 }
295 
296 template<class EventHandler>
298  : m_options(that.m_options)
299  , m_file(that.m_file)
300  , m_buf(that.m_buf)
301  , m_evt_handler(that.m_evt_handler)
302  , m_pending_anchors(that.m_pending_anchors)
303  , m_pending_tags(that.m_pending_tags)
304  , m_was_inside_qmrk(false)
305  , m_doc_empty(false)
306  , m_prev_colon(npos)
307  , m_encoding(NOBOM)
308  , m_newline_offsets()
309  , m_newline_offsets_size()
310  , m_newline_offsets_capacity()
311  , m_newline_offsets_buf()
312 {
313  if(that.m_newline_offsets_capacity)
314  {
315  _resize_locations(that.m_newline_offsets_capacity);
316  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
317  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
318  m_newline_offsets_size = that.m_newline_offsets_size;
319  }
320 }
321 
322 template<class EventHandler>
324 {
325  _free();
326  m_options = (that.m_options);
327  m_file = (that.m_file);
328  m_buf = (that.m_buf);
329  m_evt_handler = that.m_evt_handler;
330  m_pending_anchors = that.m_pending_anchors;
331  m_pending_tags = that.m_pending_tags;
332  m_was_inside_qmrk = that.m_was_inside_qmrk;
333  m_doc_empty = that.m_doc_empty;
334  m_prev_colon = that.m_prev_colon;
335  m_encoding = that.m_encoding;
336  m_newline_offsets = (that.m_newline_offsets);
337  m_newline_offsets_size = (that.m_newline_offsets_size);
338  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
339  m_newline_offsets_buf = (that.m_newline_offsets_buf);
340  that._clr();
341  return *this;
342 }
343 
344 template<class EventHandler>
346 {
347  if(&that != this)
348  {
349  _free();
350  m_options = (that.m_options);
351  m_file = (that.m_file);
352  m_buf = (that.m_buf);
353  m_evt_handler = that.m_evt_handler;
354  m_pending_anchors = that.m_pending_anchors;
355  m_pending_tags = that.m_pending_tags;
356  m_was_inside_qmrk = that.m_was_inside_qmrk;
357  m_doc_empty = that.m_doc_empty;
358  m_prev_colon = that.m_prev_colon;
359  m_encoding = that.m_encoding;
360  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
361  _resize_locations(that.m_newline_offsets_capacity);
362  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
363  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
364  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
365  m_newline_offsets_size = that.m_newline_offsets_size;
366  m_newline_offsets_buf = that.m_newline_offsets_buf;
367  }
368  return *this;
369 }
370 
371 template<class EventHandler>
373 {
374  m_options = {};
375  m_file = {};
376  m_buf = {};
377  m_evt_handler = {};
378  m_pending_anchors = {};
379  m_pending_tags = {};
380  m_was_inside_qmrk = false;
381  m_doc_empty = true;
382  m_prev_colon = npos;
383  m_encoding = NOBOM;
384  m_newline_offsets = {};
385  m_newline_offsets_size = {};
386  m_newline_offsets_capacity = {};
387  m_newline_offsets_buf = {};
388 }
389 
390 template<class EventHandler>
391 void ParseEngine<EventHandler>::_free()
392 {
393  if(m_newline_offsets)
394  {
395  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
396  m_newline_offsets = nullptr;
397  m_newline_offsets_size = 0u;
398  m_newline_offsets_capacity = 0u;
399  m_newline_offsets_buf = nullptr;
400  }
401 }
402 
403 
404 //-----------------------------------------------------------------------------
405 
406 template<class EventHandler>
407 void ParseEngine<EventHandler>::_reset()
408 {
409  m_pending_anchors = {};
410  m_pending_tags = {};
411  m_doc_empty = true;
412  m_was_inside_qmrk = false;
413  m_prev_colon = npos;
414  m_encoding = NOBOM;
415  if(m_options.locations())
416  {
417  _prepare_locations();
418  }
419 }
420 
421 
422 //-----------------------------------------------------------------------------
423 
424 template<class EventHandler>
425 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
426 {
427  #define _ryml_relocate(s) \
428  if((s).is_sub(prev_arena)) \
429  { \
430  (s).str = next_arena.str + ((s).str - prev_arena.str); \
431  }
432  _ryml_relocate(m_buf);
433  _ryml_relocate(m_newline_offsets_buf);
434  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
435  _ryml_relocate(m_pending_tags.annotations[i].str);
436  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
437  _ryml_relocate(m_pending_anchors.annotations[i].str);
438  #undef _ryml_relocate
439 }
440 
441 template<class EventHandler>
442 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
443 {
444  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
445 }
446 
447 
448 //-----------------------------------------------------------------------------
449 
450 template<class EventHandler>
451 template<class DumpFn>
452 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
453 {
454  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
455  auto const& lc = st->line_contents;
456  csubstr contents = lc.stripped;
457  if(contents.len)
458  {
459  // print the yaml src line
460  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
461  if(m_file.len)
462  {
463  detail::_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
464  offs += m_file.len + 1;
465  }
466  detail::_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
467  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
468  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
469  detail::_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
470  // highlight the remaining portion of the previous line
471  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
472  size_t lastcol = firstcol + lc.rem.len;
473  for(size_t i = 0; i < offs + firstcol; ++i)
474  std::forward<DumpFn>(dumpfn)(" ");
475  std::forward<DumpFn>(dumpfn)("^");
476  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
477  std::forward<DumpFn>(dumpfn)("~");
478  detail::_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
479  }
480  else
481  {
482  std::forward<DumpFn>(dumpfn)("\n");
483  }
484 
485 #ifdef RYML_DBG
486  // next line: print the state flags
487  {
488  char flagbuf_[128];
489  detail::_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
490  }
491 #endif
492 }
493 
494 
495 //-----------------------------------------------------------------------------
496 
497 template<class EventHandler>
498 template<class ...Args>
499 void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
500 {
501  char errmsg[RYML_ERRMSG_SIZE];
502  detail::_SubstrWriter writer(errmsg);
503  auto dumpfn = [&writer](csubstr s){ writer.append(s); };
504  detail::_dump(dumpfn, fmt, args...);
505  writer.append('\n');
506  _fmt_msg(dumpfn);
507  size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
508  m_evt_handler->cancel_parse();
509  m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
510 }
511 
512 
513 //-----------------------------------------------------------------------------
514 #ifdef RYML_DBG
515 template<class EventHandler>
516 template<class ...Args>
517 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
518 {
519  if(_dbg_enabled())
520  {
521  auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
522  detail::_dump(dumpfn, fmt, args...);
523  dumpfn("\n");
524  _fmt_msg(dumpfn);
525  }
526 }
527 #endif
528 
529 
530 //-----------------------------------------------------------------------------
531 template<class EventHandler>
532 bool ParseEngine<EventHandler>::_finished_file() const
533 {
534  bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
535  if(ret)
536  {
537  _c4dbgp("finished file!!!");
538  }
539  return ret;
540 }
541 
542 template<class EventHandler>
543 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
544 {
545  return m_evt_handler->m_curr->line_contents.rem.empty();
546 }
547 
548 
549 //-----------------------------------------------------------------------------
550 
551 template<class EventHandler>
552 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
553 {
554  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
555  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
556  {
557  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
558  if(pos == npos)
559  pos = rem.len; // maybe the line is just all whitespace
560  _c4dbgpf("skip {} whitespace characters", pos);
561  _line_progressed(pos);
562  }
563 }
564 
565 template<class EventHandler>
566 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
567 {
568  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
569  if(rem.len && rem.str[0] == c)
570  {
571  size_t pos = rem.first_not_of(c);
572  if(pos == npos)
573  pos = rem.len; // maybe the line is just all c
574  _c4dbgpf("skip {}x'{}'", pos, c);
575  _line_progressed(pos);
576  }
577 }
578 
579 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
580 template<class EventHandler>
581 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
582 {
583  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
584  if(rem.len && rem.str[0] == c)
585  {
586  size_t pos = rem.first_not_of(c);
587  if(pos == npos)
588  pos = rem.len; // maybe the line is just all c
589  if(pos > max_to_skip)
590  pos = max_to_skip;
591  _c4dbgpf("skip {}x'{}'", pos, c);
592  _line_progressed(pos);
593  }
594 }
595 #endif
596 
597 template<class EventHandler>
598 template<size_t N>
599 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
600 {
601  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
602  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
603  if(pos == npos)
604  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
605  _c4dbgpf("skip {} characters", pos);
606  _line_progressed(pos);
607 }
608 
609 template<class EventHandler>
610 void ParseEngine<EventHandler>::_skip_comment()
611 {
612  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
613  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
614  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
615  csubstr full = m_evt_handler->m_curr->line_contents.full;
616  // raise an error if the comment is not preceded by whitespace
617  if(!full.begins_with('#'))
618  {
619  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
620  const char c = full[(size_t)(rem.str - full.str - 1)];
621  if(C4_UNLIKELY(c != ' ' && c != '\t'))
622  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
623  }
624  else
625  {
626  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
627  }
628  _c4dbgpf("comment was '{}'", rem);
629  _line_progressed(rem.len);
630 }
631 
632 template<class EventHandler>
633 void ParseEngine<EventHandler>::_maybe_skip_comment()
634 {
635  csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
636  if(s.begins_with('#'))
637  {
638  _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
639  _skip_comment();
640  }
641 }
642 
643 template<class EventHandler>
644 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
645 {
646  if(m_evt_handler->m_curr->line_contents.rem.len)
647  {
648  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
649  {
650  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
651  if(pos == npos)
652  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
653  _c4dbgpf("skip {}x'{}'", pos, ' ');
654  _line_progressed(pos);
655  }
656  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
657  {
658  _c4dbgp("found ':' colon next");
659  _line_progressed(1);
660  return true;
661  }
662  }
663  return false;
664 }
665 
666 template<class EventHandler>
667 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
668 {
669  if(m_evt_handler->m_curr->line_contents.rem.len)
670  {
671  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
672  {
673  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
674  if(pos == npos)
675  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
676  _c4dbgpf("skip {}x'{}'", pos, ' ');
677  _line_progressed(pos);
678  }
679  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
680  {
681  _c4dbgp("found ',' comma next");
682  _line_progressed(1);
683  return true;
684  }
685  }
686  return false;
687 }
688 
689 
690 //-----------------------------------------------------------------------------
691 
692 template<class EventHandler>
693 csubstr ParseEngine<EventHandler>::_scan_anchor()
694 {
695  csubstr s = m_evt_handler->m_curr->line_contents.rem;
696  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
697  csubstr anchor = s.range(1, s.first_of(' '));
698  _line_progressed(1u + anchor.len);
699  _maybe_skipchars(' ');
700  return anchor;
701 }
702 
703 template<class EventHandler>
704 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
705 {
706  csubstr s = m_evt_handler->m_curr->line_contents.rem;
707  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
708  csubstr ref = s.first(s.first_of(",] :"));
709  _line_progressed(ref.len);
710  return ref;
711 }
712 
713 template<class EventHandler>
714 csubstr ParseEngine<EventHandler>::_scan_ref_map()
715 {
716  csubstr s = m_evt_handler->m_curr->line_contents.rem;
717  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
718  csubstr ref = s.first(s.first_of(",} "));
719  _line_progressed(ref.len);
720  return ref;
721 }
722 
723 template<class EventHandler>
724 csubstr ParseEngine<EventHandler>::_scan_tag()
725 {
726  csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
727  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
728  csubstr t;
729  if(rem.begins_with("!!"))
730  {
731  _c4dbgp("begins with '!!'");
732  if(has_any(FLOW))
733  t = rem.left_of(rem.first_of(" ,"));
734  else
735  t = rem.left_of(rem.first_of(' '));
736  }
737  else if(rem.begins_with("!<"))
738  {
739  _c4dbgp("begins with '!<'");
740  t = rem.left_of(rem.first_of('>'), true);
741  }
742  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
743  else if(rem.begins_with("!h!"))
744  {
745  _c4dbgp("begins with '!h!'");
746  t = rem.left_of(rem.first_of(' '));
747  }
748  #endif
749  else
750  {
751  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
752  _c4dbgp("begins with '!'");
753  if(has_any(FLOW))
754  t = rem.left_of(rem.first_of(" ,"));
755  else
756  t = rem.left_of(rem.first_of(' '));
757  }
758  _line_progressed(t.len);
759  _maybe_skip_whitespace_tokens();
760  return t;
761 }
762 
763 
764 //-----------------------------------------------------------------------------
765 
766 template<class EventHandler>
767 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
768 {
769  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
770 
771  // it's not a scalar if it starts with any of these characters:
772  switch(s.str[0])
773  {
774  // these are all legal tokens which mean no scalar is starting:
775  case '[':
776  case ']':
777  case '{':
778  case '}':
779  case '!':
780  case '&':
781  case '*':
782  case '|':
783  case '>':
784  case '#':
785  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
786  return false;
787  // '-' and ':' are illegal at the beginning if not followed by a scalar character
788  case '-':
789  case ':':
790  if(s.len > 1)
791  {
792  switch(s.str[1])
793  {
794  case '\n':
795  case '\r':
796  case '{':
797  case '[':
798  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
799  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
800  break;
801  case ' ':
802  case '}':
803  case ']':
804  if(s.str[0] == ':')
805  {
806  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
807  return false;
808  }
809  break;
810  default:
811  break;
812  }
813  }
814  else
815  {
816  return false;
817  }
818  break;
819  case '?':
820  if(s.len > 1)
821  {
822  switch(s.str[1])
823  {
824  case ' ':
825  case '\n':
826  case '\r':
827  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
828  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
829  return false;
830  case '{':
831  case '}':
832  case '[':
833  case ']':
834  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
835  break;
836  default:
837  break;
838  }
839  }
840  else
841  {
842  return false;
843  }
844  break;
845  // everything else is a legal starting character
846  default:
847  break;
848  }
849 
850  return true;
851 }
852 
853 template<class EventHandler>
854 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
855 {
856  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
857  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
858  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
859  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
860  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
861 
862  substr s = m_evt_handler->m_curr->line_contents.rem;
863  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
864  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
865 
866  if(!s.len)
867  return false;
868 
869  if(!_is_valid_start_scalar_plain_flow(s))
870  return false;
871 
872  _c4dbgp("scanning seqflow scalar...");
873 
874  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
875  bool needs_filter = false;
876  while(true)
877  {
878  _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
879  for(size_t i = 0; i < s.len; ++i)
880  {
881  const char c = s.str[i];
882  switch(c)
883  {
884  case ',':
885  _c4dbgpf("found terminating character at {}: '{}'", i, c);
886  _line_progressed(i);
887  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
888  {
889  goto ended_scalar;
890  }
891  else
892  {
893  _c4dbgp("at the beginning. no scalar here.");
894  return false;
895  }
896  break;
897  case ']':
898  _c4dbgpf("found terminating character at {}: '{}'", i, c);
899  _line_progressed(i);
900  goto ended_scalar;
901  break;
902  case '#':
903  _c4dbgp("found suspicious '#'");
904  if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
905  {
906  _c4dbgpf("found terminating character at {}: '{}'", i, c);
907  _line_progressed(i);
908  goto ended_scalar;
909  }
910  break;
911  case ':':
912  _c4dbgp("found suspicious ':'");
913  if(s.len > i+1)
914  {
915  const char next = s.str[i+1];
916  _c4dbgpf("next char is '{}'", _c4prc(next));
917  if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
918  {
919  _c4dbgp("map starting!");
920  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
921  {
922  _c4dbgp("scalar finished!");
923  _line_progressed(i);
924  goto ended_scalar;
925  }
926  else
927  {
928  _c4dbgp("at the beginning. no scalar here.");
929  return false;
930  }
931  }
932  else
933  {
934  _c4dbgp("it's a scalar indeed.");
935  ++i; // skip the next char
936  }
937  }
938  else if(s.len == i+1)
939  {
940  _c4dbgp("':' at line end. map starting!");
941  return false;
942  }
943  break;
944  case '[':
945  case '{':
946  case '}':
947  _line_progressed(i);
948  _c4err("invalid character: '{}'", c); // noreturn
949  default:
950  ;
951  }
952  }
953  _line_progressed(s.len);
954  if(!_finished_file())
955  {
956  _c4dbgp("next line!");
957  _line_ended();
958  _scan_line();
959  }
960  else
961  {
962  _c4dbgp("file finished!");
963  goto ended_scalar;
964  }
965  s = m_evt_handler->m_curr->line_contents.rem;
966  needs_filter = true;
967  }
968 
969 ended_scalar:
970 
971  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
972  sc->needs_filter = needs_filter;
973 
974  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
975 
976  return true;
977 }
978 
979 template<class EventHandler>
980 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
981 {
982  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
983  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
984  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
985  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
986  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
987 
988  substr s = m_evt_handler->m_curr->line_contents.rem;
989  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
990 
991  if(!s.len)
992  return false;
993 
994  if(!_is_valid_start_scalar_plain_flow(s))
995  return false;
996 
997  _c4dbgp("scanning scalar...");
998 
999  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1000  bool needs_filter = false;
1001  while(true)
1002  {
1003  for(size_t i = 0; i < s.len; ++i)
1004  {
1005  const char c = s.str[i];
1006  switch(c)
1007  {
1008  case ',':
1009  case '}':
1010  _line_progressed(i);
1011  _c4dbgpf("found terminating character: '{}'", c);
1012  goto ended_scalar;
1013  case ':':
1014  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
1015  {
1016  _line_progressed(i);
1017  _c4dbgpf("found terminating character: '{}'", c);
1018  goto ended_scalar;
1019  }
1020  break;
1021  case '{':
1022  case '[':
1023  _line_progressed(i);
1024  _c4err("invalid character: '{}'", c); // noreturn
1025  break;
1026  case ']':
1027  _line_progressed(i);
1028  if(has_any(RSEQIMAP))
1029  goto ended_scalar;
1030  else
1031  _c4err("invalid character: '{}'", c); // noreturn
1032  break;
1033  case '#':
1034  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
1035  {
1036  _line_progressed(i);
1037  _c4dbgpf("found terminating character: '{}'", c);
1038  goto ended_scalar;
1039  }
1040  break;
1041  default:
1042  ;
1043  }
1044  }
1045  _c4dbgp("next line!");
1046  _line_progressed(s.len);
1047  if(!_finished_file())
1048  {
1049  _c4dbgp("next line!");
1050  _line_ended();
1051  _scan_line();
1052  }
1053  else
1054  {
1055  _c4dbgp("file finished!");
1056  goto ended_scalar;
1057  }
1058  s = m_evt_handler->m_curr->line_contents.rem;
1059  needs_filter = true;
1060  }
1061 
1062 ended_scalar:
1063 
1064  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1065  sc->needs_filter = needs_filter;
1066 
1067  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1068 
1069  return sc->scalar.len > 0u;
1070 }
1071 
1072 template<class EventHandler>
1073 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1074 {
1075  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1076  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1077  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1078  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1079 
1080  substr s = m_evt_handler->m_curr->line_contents.rem;
1081  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1082 
1083  if(!s.len)
1084  return false;
1085 
1086  _c4dbgp("scanning scalar...");
1087 
1088  switch(s.str[0])
1089  {
1090  case ']':
1091  case '{':
1092  case ',':
1093  _c4dbgp("not a scalar.");
1094  return false;
1095  }
1096 
1097  {
1098  const size_t len = _is_special_json_scalar(s);
1099  if(len)
1100  {
1101  sc->scalar = s.first(len);
1102  sc->needs_filter = false;
1103  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1104  _line_progressed(len);
1105  return true;
1106  }
1107  }
1108 
1109  // must be a number
1110  size_t i = 0;
1111  for( ; i < s.len; ++i)
1112  {
1113  const char c = s.str[i];
1114  switch(c)
1115  {
1116  case ',':
1117  case ']':
1118  case ' ':
1119  case '\t':
1120  _c4dbgpf("found terminating character: '{}'", c);
1121  goto ended_scalar;
1122  case '#':
1123  if(!i || s.str[i-1] == ' ')
1124  {
1125  _c4dbgpf("found terminating character: '{}'", c);
1126  goto ended_scalar;
1127  }
1128  break;
1129  default:
1130  ;
1131  }
1132  }
1133 
1134 ended_scalar:
1135 
1136  if(C4_LIKELY(i > 0))
1137  {
1138  _line_progressed(i);
1139  sc->scalar = s.first(i);
1140  sc->needs_filter = false;
1141  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1142  return true;
1143  }
1144 
1145  return false;
1146 }
1147 
1148 template<class EventHandler>
1149 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1150 {
1151  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1152  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1153  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1154  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1155  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1156 
1157  substr s = m_evt_handler->m_curr->line_contents.rem;
1158  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1159 
1160  if(!s.len)
1161  return false;
1162 
1163  _c4dbgp("scanning scalar...");
1164 
1165  {
1166  const size_t len = _is_special_json_scalar(s);
1167  if(len)
1168  {
1169  sc->scalar = s.first(len);
1170  sc->needs_filter = false;
1171  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1172  _line_progressed(len);
1173  return true;
1174  }
1175  }
1176 
1177  // must be a number
1178  size_t i = 0;
1179  for( ; i < s.len; ++i)
1180  {
1181  const char c = s.str[i];
1182  switch(c)
1183  {
1184  case ',':
1185  case '}':
1186  case ' ':
1187  case '\t':
1188  _c4dbgpf("found terminating character: '{}'", c);
1189  goto ended_scalar;
1190  case '#':
1191  if(!i || s.str[i-1] == ' ')
1192  {
1193  _c4dbgpf("found terminating character: '{}'", c);
1194  goto ended_scalar;
1195  }
1196  break;
1197  default:
1198  ;
1199  }
1200  }
1201 
1202 ended_scalar:
1203 
1204  if(C4_LIKELY(i > 0))
1205  {
1206  _line_progressed(i);
1207  sc->scalar = s.first(i);
1208  sc->needs_filter = false;
1209  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1210  return true;
1211  }
1212 
1213  return false;
1214 }
1215 
1216 template<class EventHandler>
1217 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1218 {
1219  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1220  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1221 }
1222 
1223 template<class EventHandler>
1224 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1225 {
1226  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1227  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1228 }
1229 
1230 template<class EventHandler>
1231 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1232 {
1233  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1234  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1235  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
1236 
1237  substr s = m_evt_handler->m_curr->line_contents.rem;
1238  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1239 
1240  if(!s.len)
1241  return false;
1242 
1243  switch(s.str[0])
1244  {
1245  case '-':
1246  if(_is_blck_token(s))
1247  {
1248  return false;
1249  }
1250  else if(_is_doc_begin(s))
1251  {
1252  _c4dbgp("token is doc start");
1253  return false;
1254  }
1255  break;
1256  case ':':
1257  case '?':
1258  if(_is_blck_token(s))
1259  return false;
1260  break;
1261  case '[':
1262  case '{':
1263  case '&':
1264  case '*':
1265  case '!':
1266  _RYML_WITH_TAB_TOKENS(case '\t':)
1267  return false;
1268  case '.':
1269  if(_is_doc_end(s))
1270  {
1271  _c4dbgp("token is doc end");
1272  return false;
1273  }
1274  break;
1275  }
1276 
1277  _c4dbgpf("plain scalar! indentation={}", indentation);
1278 
1279  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1280  const size_t start_line = m_evt_handler->m_curr->pos.line;
1281 
1282  bool needs_filter = false;
1283  while(true)
1284  {
1285  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1286  for(size_t i = 0; i < s.len; ++i)
1287  {
1288  const char curr = s.str[i];
1289  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1290  switch(curr)
1291  {
1292  case ':':
1293  _c4dbgpf("[{}]: got suspicious ':'", i);
1294  // are there more characters?
1295  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1296  {
1297  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1298  _line_progressed(i);
1299  // ': ' is accepted only on the first line
1300  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1301  {
1302  _c4dbgp("start line. scalar ends here");
1303  goto ended_scalar;
1304  }
1305  else
1306  {
1307  _c4err("parse error");
1308  }
1309  }
1310  else
1311  {
1312  size_t j = i;
1313  while(j + 1 < s.len && s.str[j+1] == ':')
1314  {
1315  _c4dbgp("skip colon");
1316  ++j;
1317  }
1318  i = j > i ? j-1 : i;
1319  _c4dbgp("nothing to see here");
1320  }
1321  break;
1322  case '#':
1323  _c4dbgp("got suspicious '#'");
1324  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1325  {
1326  _c4dbgp("comment! scalar ends here");
1327  _line_progressed(i);
1328  goto ended_scalar;
1329  }
1330  else
1331  {
1332  _c4dbgp("nothing to see here");
1333  }
1334  break;
1335  }
1336  }
1337  _line_progressed(s.len);
1338  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1339  next_peeked = next_peeked.trimr("\n\r");
1340  const size_t next_indentation = next_peeked.first_not_of(' ');
1341  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1342  if(next_indentation < indentation)
1343  {
1344  _c4dbgp("smaller indentation! scalar ended");
1345  goto ended_scalar;
1346  }
1347  else if(next_indentation == 0 && next_peeked.len > 0)
1348  {
1349  const char first = next_peeked.str[0];
1350  switch(first)
1351  {
1352  case '-':
1353  next_peeked = next_peeked.trimr("\n\r");
1354  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1355  if(_is_doc_begin_token(next_peeked))
1356  {
1357  _c4dbgp("doc begin! scalar ended");
1358  goto ended_scalar;
1359  }
1360  break;
1361  case '.':
1362  next_peeked = next_peeked.trimr("\n\r");
1363  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1364  if(_is_doc_end_token(next_peeked))
1365  {
1366  _c4dbgp("doc end! scalar ended");
1367  goto ended_scalar;
1368  }
1369  break;
1370  }
1371  }
1372  // load with next line
1373  _c4dbgp("next line!");
1374  if(!_finished_file())
1375  {
1376  _c4dbgp("next line!");
1377  _line_ended();
1378  _scan_line();
1379  }
1380  else
1381  {
1382  _c4dbgp("file finished!");
1383  goto ended_scalar;
1384  }
1385  s = m_evt_handler->m_curr->line_contents.rem;
1386  needs_filter = true;
1387  }
1388 
1389 ended_scalar:
1390 
1391  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1392  sc->needs_filter = needs_filter;
1393 
1394  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1395 
1396  return true;
1397 }
1398 
1399 template<class EventHandler>
1400 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1401 {
1402  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1403  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1404  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1405  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1406  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1407  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1408  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1409 }
1410 
1411 template<class EventHandler>
1412 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1413 {
1414  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1415  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1416  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1417  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1418  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1419  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1420 }
1421 
1422 template<class EventHandler>
1423 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1424 {
1425  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1426  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1427 }
1428 
1429 
1430 //-----------------------------------------------------------------------------
1431 
1432 template<class EventHandler>
1433 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1434 {
1435  substr rem{}; // declare here because of the goto
1436  size_t nlpos{}; // declare here because of the goto
1437  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1438  if(pos >= m_buf.len)
1439  goto next_is_empty;
1440 
1441  // look for the next newline chars, and jump to the right of those
1442  rem = from_next_line(m_buf.sub(pos));
1443  if(rem.empty())
1444  goto next_is_empty;
1445 
1446  // now get everything up to and including the following newline chars
1447  nlpos = rem.first_of("\r\n");
1448  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1449  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1450  rem = rem.left_of(nlpos, /*include_pos*/true);
1451 
1452  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1453  return rem;
1454 
1455 next_is_empty:
1456  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1457  return {};
1458 }
1459 
1460 //-----------------------------------------------------------------------------
1461 
1462 template<class EventHandler>
1463 void ParseEngine<EventHandler>::_scan_line()
1464 {
1465  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1466  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1467  else
1468  m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1469 }
1470 
1471 template<class EventHandler>
1472 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1473 {
1474  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1475  m_evt_handler->m_curr->pos.offset += ahead;
1476  m_evt_handler->m_curr->pos.col += ahead;
1477  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1478  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1479 }
1480 
1481 template<class EventHandler>
1482 void ParseEngine<EventHandler>::_line_ended()
1483 {
1484  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1485  m_evt_handler->m_curr->pos.line,
1486  m_evt_handler->m_curr->line_contents.full.len,
1487  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1488  m_evt_handler->m_curr->pos.col, 1);
1489  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1490  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1491  ++m_evt_handler->m_curr->pos.line;
1492  m_evt_handler->m_curr->pos.col = 1;
1493 }
1494 
1495 template<class EventHandler>
1496 void ParseEngine<EventHandler>::_line_ended_undo()
1497 {
1498  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1499  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1500  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1501  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1502  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1503  m_evt_handler->m_curr->pos.offset -= delta;
1504  --m_evt_handler->m_curr->pos.line;
1505  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1506  // don't forget to undo also the changes to the remainder of the line
1507  //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
1508  m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1509 }
1510 
1511 
1512 //-----------------------------------------------------------------------------
1513 template<class EventHandler>
1514 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1515 {
1516  m_evt_handler->m_curr->indref = indentation;
1517  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1518 }
1519 
1520 template<class EventHandler>
1521 void ParseEngine<EventHandler>::_save_indentation()
1522 {
1523  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1524  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1525  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1526 }
1527 
1528 
1529 //-----------------------------------------------------------------------------
1530 
1531 template<class EventHandler>
1532 void ParseEngine<EventHandler>::_end_map_blck()
1533 {
1534  _c4dbgp("mapblck: end");
1535  if(has_any(RKCL|RVAL))
1536  {
1537  _c4dbgp("mapblck: set missing val");
1538  _handle_annotations_before_blck_val_scalar();
1539  m_evt_handler->set_val_scalar_plain_empty();
1540  }
1541  else if(has_any(QMRK))
1542  {
1543  _c4dbgp("mapblck: set missing keyval");
1544  _handle_annotations_before_blck_key_scalar();
1545  m_evt_handler->set_key_scalar_plain_empty();
1546  _handle_annotations_before_blck_val_scalar();
1547  m_evt_handler->set_val_scalar_plain_empty();
1548  }
1549  m_evt_handler->end_map();
1550 }
1551 
1552 template<class EventHandler>
1553 void ParseEngine<EventHandler>::_end_seq_blck()
1554 {
1555  if(has_any(RVAL))
1556  {
1557  _c4dbgp("seqblck: set missing val");
1558  _handle_annotations_before_blck_val_scalar();
1559  m_evt_handler->set_val_scalar_plain_empty();
1560  }
1561  m_evt_handler->end_seq();
1562 }
1563 
1564 template<class EventHandler>
1565 void ParseEngine<EventHandler>::_end2_map()
1566 {
1567  _c4dbgp("map: end");
1568  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1569  if(has_any(BLCK))
1570  {
1571  _end_map_blck();
1572  }
1573  else
1574  {
1575  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1576  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1577  m_evt_handler->_pop();
1578  }
1579 }
1580 
1581 template<class EventHandler>
1582 void ParseEngine<EventHandler>::_end2_seq()
1583 {
1584  _c4dbgp("seq: end");
1585  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1586  if(has_any(BLCK))
1587  {
1588  _end_seq_blck();
1589  }
1590  else
1591  {
1592  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1593  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1594  m_evt_handler->_pop();
1595  }
1596 }
1597 
1598 template<class EventHandler>
1599 void ParseEngine<EventHandler>::_begin2_doc()
1600 {
1601  m_doc_empty = true;
1602  add_flags(RDOC);
1603  m_evt_handler->begin_doc();
1604  m_evt_handler->m_curr->indref = 0; // ?
1605 }
1606 
1607 template<class EventHandler>
1608 void ParseEngine<EventHandler>::_begin2_doc_expl()
1609 {
1610  m_doc_empty = true;
1611  add_flags(RDOC);
1612  m_evt_handler->begin_doc_expl();
1613  m_evt_handler->m_curr->indref = 0; // ?
1614 }
1615 
1616 template<class EventHandler>
1617 void ParseEngine<EventHandler>::_end2_doc()
1618 {
1619  _c4dbgp("doc: end");
1620  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1621  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1622  {
1623  _c4dbgp("doc was empty; add empty val");
1624  _handle_annotations_before_blck_val_scalar();
1625  m_evt_handler->set_val_scalar_plain_empty();
1626  }
1627  m_evt_handler->end_doc();
1628 }
1629 
1630 template<class EventHandler>
1631 void ParseEngine<EventHandler>::_end2_doc_expl()
1632 {
1633  _c4dbgp("doc: end");
1634  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1635  {
1636  _c4dbgp("doc: no children; add empty val");
1637  _handle_annotations_before_blck_val_scalar();
1638  m_evt_handler->set_val_scalar_plain_empty();
1639  }
1640  m_evt_handler->end_doc_expl();
1641 }
1642 
1643 template<class EventHandler>
1644 void ParseEngine<EventHandler>::_maybe_begin_doc()
1645 {
1646  if(has_none(RDOC))
1647  {
1648  _c4dbgp("doc must be started");
1649  _begin2_doc();
1650  }
1651 }
1652 template<class EventHandler>
1653 void ParseEngine<EventHandler>::_maybe_end_doc()
1654 {
1655  if(has_any(RDOC))
1656  {
1657  _c4dbgp("doc must be finished");
1658  _end2_doc();
1659  }
1660  else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1661  {
1662  _c4dbgp("no doc to finish, but pending annotations");
1663  m_evt_handler->begin_doc();
1664  _handle_annotations_before_blck_val_scalar();
1665  m_evt_handler->set_val_scalar_plain_empty();
1666  m_evt_handler->end_doc();
1667  }
1668 }
1669 
1670 template<class EventHandler>
1671 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1672 {
1673  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1674  if(m_evt_handler->m_stack[0].flags & RDOC)
1675  {
1676  _c4dbgp("root is RDOC");
1677  if(m_evt_handler->m_curr->level != 0)
1678  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1679  }
1680  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1681  {
1682  _c4dbgp("root is STREAM");
1683  if(m_evt_handler->m_curr->level != 1)
1684  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1685  }
1686  else
1687  {
1688  _c4err("internal error");
1689  }
1690  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1691 }
1692 
1693 template<class EventHandler>
1694 void ParseEngine<EventHandler>::_end_doc_suddenly()
1695 {
1696  _c4dbgp("end doc suddenly");
1697  _end_doc_suddenly__pop();
1698  _end2_doc_expl();
1699  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1700 }
1701 
1702 template<class EventHandler>
1703 void ParseEngine<EventHandler>::_start_doc_suddenly()
1704 {
1705  _c4dbgp("start doc suddenly");
1706  _end_doc_suddenly__pop();
1707  _end2_doc();
1708  _begin2_doc_expl();
1709 }
1710 
1711 template<class EventHandler>
1712 void ParseEngine<EventHandler>::_end_stream()
1713 {
1714  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1715  if(has_all(RSEQ|FLOW))
1716  _c4err("missing terminating ]");
1717  else if(has_all(RMAP|FLOW))
1718  _c4err("missing terminating }");
1719  if(m_evt_handler->m_stack.size() > 1)
1720  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1721  if(has_all(RDOC))
1722  {
1723  _end2_doc();
1724  }
1725  else if(has_all(RTOP|RUNK))
1726  {
1727  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1728  {
1729  if(m_doc_empty)
1730  {
1731  m_evt_handler->begin_doc();
1732  _handle_annotations_before_blck_val_scalar();
1733  m_evt_handler->set_val_scalar_plain_empty();
1734  m_evt_handler->end_doc();
1735  }
1736  }
1737  }
1738  m_evt_handler->end_stream();
1739 }
1740 
1741 
1742 template<class EventHandler>
1743 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1744 {
1745  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1746  while(m_evt_handler->m_curr != popto)
1747  {
1748  if(has_any(RSEQ))
1749  {
1750  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1751  _end2_seq();
1752  }
1753  else if(has_any(RMAP))
1754  {
1755  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1756  _end2_map();
1757  }
1758  else
1759  {
1760  break;
1761  }
1762  }
1763  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1764 }
1765 
1766 template<class EventHandler>
1767 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1768 {
1769  // search the stack frame to jump to based on its indentation
1770  using state_type = typename EventHandler::state;
1771  state_type const* popto = nullptr;
1772  auto &stack = m_evt_handler->m_stack;
1773  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1774  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1775  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1776  #ifdef RYML_DBG
1777  if(_dbg_enabled())
1778  {
1779  char flagbuf_[128];
1780  for(state_type const& s : stack)
1781  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1782  }
1783  #endif
1784  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1785  {
1786  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1787  if(s->indref == ind)
1788  {
1789  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1790  popto = s;
1791  break;
1792  }
1793  }
1794  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1795  {
1796  _c4err("parse error: incorrect indentation?");
1797  }
1798  _handle_indentation_pop(popto);
1799 }
1800 
1801 template<class EventHandler>
1802 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1803 {
1804  // search the stack frame to jump to based on its indentation
1805  using state_type = typename EventHandler::state;
1806  auto &stack = m_evt_handler->m_stack;
1807  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1808  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1809  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1810  state_type const* popto = nullptr;
1811  #ifdef RYML_DBG
1812  char flagbuf_[128];
1813  if(_dbg_enabled())
1814  {
1815  for(state_type const& s : stack)
1816  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1817  }
1818  #endif
1819  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1820  {
1821  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1822  if(s->indref < ind)
1823  {
1824  break;
1825  }
1826  else if(s->indref == ind)
1827  {
1828  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1829  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1830  {
1831  break;
1832  }
1833  popto = s;
1834  if(has_all(RSEQ|BLCK, s))
1835  {
1836  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1837  const size_t first = rem.first_not_of(' ');
1838  _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
1839  rem = rem.right_of(first, true);
1840  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1841  if(rem.begins_with('-') && _is_blck_token(rem))
1842  {
1843  _c4dbgp("parent was indentless seq");
1844  break;
1845  }
1846  }
1847  }
1848  }
1849  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1850  {
1851  _c4err("parse error: incorrect indentation?");
1852  }
1853  _handle_indentation_pop(popto);
1854 }
1855 
1856 
1857 //-----------------------------------------------------------------------------
1858 template<class EventHandler>
1859 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1860 {
1861  // quoted scalars can spread over multiple lines!
1862  // nice explanation here: http://yaml-multiline.info/
1863 
1864  // a span to the end of the file
1865  size_t b = m_evt_handler->m_curr->pos.offset;
1866  substr s = m_buf.sub(b);
1867  if(s.begins_with(' '))
1868  {
1869  s = s.triml(' ');
1870  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1871  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1872  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1873  }
1874  b = m_evt_handler->m_curr->pos.offset; // take this into account
1875  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1876 
1877  // skip the opening quote
1878  _line_progressed(1);
1879  s = s.sub(1);
1880 
1881  bool needs_filter = false;
1882 
1883  size_t numlines = 1; // we already have one line
1884  size_t pos = npos; // find the pos of the matching quote
1885  while( ! _finished_file())
1886  {
1887  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1888  bool line_is_blank = true;
1889  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1890  for(size_t i = 0; i < line.len; ++i)
1891  {
1892  const char curr = line.str[i];
1893  if(curr == '\'') // single quotes are escaped with two single quotes
1894  {
1895  const char next = i+1 < line.len ? line.str[i+1] : '~';
1896  if(next != '\'') // so just look for the first quote
1897  { // without another after it
1898  pos = i;
1899  break;
1900  }
1901  else
1902  {
1903  needs_filter = true; // needs filter to remove escaped quotes
1904  ++i; // skip the escaped quote
1905  }
1906  }
1907  else if(curr != ' ')
1908  {
1909  line_is_blank = false;
1910  }
1911  }
1912 
1913  // leading whitespace also needs filtering
1914  needs_filter = needs_filter
1915  || (numlines > 1)
1916  || line_is_blank
1917  || (_at_line_begin() && line.begins_with(' '));
1918 
1919  if(pos == npos)
1920  {
1921  _line_progressed(line.len);
1922  ++numlines;
1923  }
1924  else
1925  {
1926  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1927  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
1928  _line_progressed(pos + 1); // progress beyond the quote
1929  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1930  break;
1931  }
1932 
1933  _line_ended();
1934  _scan_line();
1935  }
1936 
1937  if(pos == npos)
1938  {
1939  _c4err("reached end of file while looking for closing quote");
1940  }
1941  else
1942  {
1943  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1944  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1945  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
1946  s = s.sub(0, pos-1);
1947  }
1948 
1949  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
1950 
1951  return ScannedScalar { s, needs_filter };
1952 }
1953 
1954 
1955 //-----------------------------------------------------------------------------
1956 template<class EventHandler>
1957 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1958 {
1959  // quoted scalars can spread over multiple lines!
1960  // nice explanation here: http://yaml-multiline.info/
1961 
1962  // a span to the end of the file
1963  size_t b = m_evt_handler->m_curr->pos.offset;
1964  substr s = m_buf.sub(b);
1965  if(s.begins_with(' '))
1966  {
1967  s = s.triml(' ');
1968  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1969  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1970  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1971  }
1972  b = m_evt_handler->m_curr->pos.offset; // take this into account
1973  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
1974 
1975  // skip the opening quote
1976  _line_progressed(1);
1977  s = s.sub(1);
1978 
1979  bool needs_filter = false;
1980 
1981  size_t numlines = 1; // we already have one line
1982  size_t pos = npos; // find the pos of the matching quote
1983  while( ! _finished_file())
1984  {
1985  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1986  #if defined(__GNUC__) && __GNUC__ == 11
1987  C4_DONT_OPTIMIZE(line); // prevent erroneous hoist of the assignment out of the loop
1988  #endif
1989  bool line_is_blank = true;
1990  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1991  for(size_t i = 0; i < line.len; ++i)
1992  {
1993  const char curr = line.str[i];
1994  if(curr != ' ')
1995  line_is_blank = false;
1996  // every \ is an escape
1997  if(curr == '\\')
1998  {
1999  const char next = i+1 < line.len ? line.str[i+1] : '~';
2000  needs_filter = true;
2001  if(next == '"' || next == '\\')
2002  ++i;
2003  }
2004  else if(curr == '"')
2005  {
2006  pos = i;
2007  break;
2008  }
2009  }
2010 
2011  // leading whitespace also needs filtering
2012  needs_filter = needs_filter
2013  || (numlines > 1)
2014  || line_is_blank
2015  || (_at_line_begin() && line.begins_with(' '));
2016 
2017  if(pos == npos)
2018  {
2019  _line_progressed(line.len);
2020  ++numlines;
2021  }
2022  else
2023  {
2024  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2025  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"');
2026  _line_progressed(pos + 1); // progress beyond the quote
2027  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
2028  break;
2029  }
2030 
2031  _line_ended();
2032  _scan_line();
2033  }
2034 
2035  if(pos == npos)
2036  {
2037  _c4err("reached end of file looking for closing quote");
2038  }
2039  else
2040  {
2041  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
2042  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
2043  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2044  s = s.sub(0, pos-1);
2045  }
2046 
2047  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2048 
2049  return ScannedScalar { s, needs_filter };
2050 }
2051 
2052 
2053 //-----------------------------------------------------------------------------
2054 template<class EventHandler>
2055 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2056 {
2057  _c4dbgpf("blck: indref={}", indref);
2058  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
2059 
2060  // nice explanation here: http://yaml-multiline.info/
2061  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2062  csubstr trimmed = s.triml(' ');
2063  if(trimmed.str > s.str)
2064  {
2065  _c4dbgp("skipping whitespace");
2066  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2067  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2068  s = trimmed;
2069  }
2070  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2071 
2072  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2073 
2074  // parse the spec
2075  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2076  size_t indentation = npos; // have to find out if no spec is given
2077  csubstr digits;
2078  if(s.len > 1)
2079  {
2080  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2081  csubstr t = s.sub(1);
2082  _c4dbgpf("blck: spec is multichar: '{}'", t);
2083  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2084  size_t pos = t.first_of("-+");
2085  _c4dbgpf("blck: spec chomp char at {}", pos);
2086  if(pos != npos)
2087  {
2088  if(t[pos] == '-')
2089  chomp = CHOMP_STRIP;
2090  else if(t[pos] == '+')
2091  chomp = CHOMP_KEEP;
2092  if(pos == 0)
2093  t = t.sub(1);
2094  else
2095  t = t.first(pos);
2096  }
2097  // from here to the end, only digits are considered
2098  digits = t.left_of(t.first_not_of("0123456789"));
2099  if( ! digits.empty())
2100  {
2101  if(C4_UNLIKELY(digits.len > 1))
2102  _c4err("parse error: invalid indentation");
2103  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2104  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2105  _c4err("parse error: could not read indentation as decimal");
2106  if(C4_UNLIKELY( ! indentation))
2107  _c4err("parse error: null indentation");
2108  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2109  indentation += m_evt_handler->m_curr->indref;
2110  }
2111  }
2112 
2113  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2114 
2115  // finish the current line
2116  _line_progressed(s.len);
2117  _line_ended();
2118  _scan_line();
2119 
2120  // start with a zero-length block, already pointing at the right place
2121  substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
2122  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2123 
2124  // read every full line into a raw block,
2125  // from which newlines are to be stripped as needed.
2126  //
2127  // If no explicit indentation was given, pick it from the first
2128  // non-empty line. See
2129  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2130  size_t num_lines = 0;
2131  size_t first = m_evt_handler->m_curr->pos.line;
2132  size_t provisional_indentation = npos;
2133  LineContents lc;
2134  while(( ! _finished_file()))
2135  {
2136  // peek next line, but do not advance immediately
2137  lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2138  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2139  C4_DONT_OPTIMIZE(lc.rem);
2140  #endif
2141  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2142  // evaluate termination conditions
2143  if(indentation != npos)
2144  {
2145  _c4dbgpf("blck: indentation={}", indentation);
2146  // stop when the line is deindented and not empty
2147  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2148  {
2149  if(raw_block.len)
2150  {
2151  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2152  }
2153  else
2154  {
2155  _c4err("indentation decreased without any scalar");
2156  }
2157  break;
2158  }
2159  else if(indentation == 0)
2160  {
2161  _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2162  if(_is_doc_token(lc.rem))
2163  {
2164  _c4dbgp("blck: stop. indentation=0 and doc ended");
2165  break;
2166  }
2167  }
2168  }
2169  else
2170  {
2171  const size_t fns = lc.stripped.first_not_of(' ');
2172  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2173  if(fns != npos) // non-empty line
2174  {
2176  if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
2177  _c4err("parse error");
2178  )
2179  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2180  if(provisional_indentation == npos)
2181  {
2182  if(lc.indentation < indref)
2183  {
2184  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2185  if(raw_block.len == 0)
2186  {
2187  _c4dbgp("blck: was empty, undo next line");
2188  _line_ended_undo();
2189  }
2190  break;
2191  }
2192  else if(lc.indentation == m_evt_handler->m_curr->indref)
2193  {
2194  if(has_any(RSEQ|RMAP))
2195  {
2196  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2197  break;
2198  }
2199  }
2200  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2201  indentation = lc.indentation;
2202  }
2203  else
2204  {
2205  if(lc.indentation >= provisional_indentation)
2206  {
2207  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2208  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2209  indentation = lc.indentation;
2210  }
2211  else
2212  {
2213  break;
2214  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2215  }
2216  }
2217  }
2218  else // empty line
2219  {
2220  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2221  if(provisional_indentation != npos)
2222  {
2223  if(lc.stripped.len >= provisional_indentation)
2224  {
2225  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2226  provisional_indentation = lc.stripped.len;
2227  }
2228  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2229  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2230  {
2231  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2232  provisional_indentation = lc.indentation;
2233  }
2234  #endif
2235  }
2236  else
2237  {
2238  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2239  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2240  if(provisional_indentation == npos)
2241  {
2242  provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
2243  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2244  }
2245  if(provisional_indentation < indref)
2246  {
2247  provisional_indentation = indref;
2248  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2249  }
2250  }
2251  }
2252  }
2253  // advance now that we know the folded scalar continues
2254  m_evt_handler->m_curr->line_contents = lc;
2255  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2256  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2257  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2258  _line_ended();
2259  ++num_lines;
2260  }
2261  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2262  C4_UNUSED(num_lines);
2263  C4_UNUSED(first);
2264 
2265  if(indentation == npos)
2266  {
2267  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2268  indentation = provisional_indentation;
2269  }
2270 
2271  if(num_lines)
2272  _line_ended_undo();
2273 
2274  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2275 
2276  sb->scalar = raw_block;
2277  sb->indentation = indentation;
2278  sb->chomp = chomp;
2279 }
2280 
2281 
2282 //-----------------------------------------------------------------------------
2283 //-----------------------------------------------------------------------------
2284 //-----------------------------------------------------------------------------
2285 /** @cond dev */
2286 
2287 // a debugging scaffold:
2288 #if 0
2289 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2290 #else
2291 #define _c4dbgfws(...)
2292 #endif
2293 
2294 template<class EventHandler>
2295 template<class FilterProcessor>
2296 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2297 {
2298  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2299  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2300 
2301  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2302  if(first_pos != npos)
2303  {
2304  const char first_char = proc.src[first_pos];
2305  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2306  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2307  {
2308  _c4dbgfws("whitespace is trailing on line", "");
2309  proc.skip(first_pos - proc.rpos);
2310  }
2311  else // a legit whitespace
2312  {
2313  proc.copy();
2314  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2315  }
2316  return true;
2317  }
2318  _c4dbgfws("whitespace is trailing on line", "");
2319  return false;
2320 }
2321 
2322 template<class EventHandler>
2323 template<class FilterProcessor>
2324 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2325 {
2326  if(!_filter_ws_handle_to_first_non_space(proc))
2327  {
2328  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2329  proc.copy(proc.src.len - proc.rpos);
2330  }
2331 }
2332 
2333 template<class EventHandler>
2334 template<class FilterProcessor>
2335 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2336 {
2337  if(!_filter_ws_handle_to_first_non_space(proc))
2338  {
2339  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2340  proc.skip(proc.src.len - proc.rpos);
2341  }
2342 }
2343 
2344 #undef _c4dbgfws
2345 
2346 
2347 //-----------------------------------------------------------------------------
2348 //-----------------------------------------------------------------------------
2349 //-----------------------------------------------------------------------------
2350 /* plain scalars */
2351 
2352 // a debugging scaffold:
2353 #if 0
2354 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2355 #else
2356 #define _c4dbgfps(fmt, ...)
2357 #endif
2358 
2359 template<class EventHandler>
2360 template<class FilterProcessor>
2361 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2362 {
2363  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2364 
2365  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2366  size_t ii = proc.rpos;
2367  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2368  if(numnl_following)
2369  {
2370  proc.set('\n', numnl_following);
2371  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2372  }
2373  else
2374  {
2375  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2376  if(ret != npos)
2377  {
2378  proc.set(' ');
2379  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2380  }
2381  else
2382  {
2383  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2384  ii = proc.src.len;
2385  }
2386  }
2387  proc.rpos = ii;
2388 }
2389 
2390 template<class EventHandler>
2391 template<class FilterProcessor>
2392 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2393 {
2394  _RYML_CB_ASSERT(this->callbacks(), indentation != npos);
2395  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2396 
2397  while(proc.has_more_chars())
2398  {
2399  const char curr = proc.curr();
2400  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2401  switch(curr)
2402  {
2403  case ' ':
2404  _RYML_WITH_TAB_TOKENS(case '\t':)
2405  _c4dbgfps("whitespace", curr);
2406  _filter_ws_skip_trailing(proc);
2407  break;
2408  case '\n':
2409  _c4dbgfps("newline", curr);
2410  _filter_nl_plain(proc, /*indentation*/indentation);
2411  break;
2412  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2413  _c4dbgfps("carriage return, ignore", curr);
2414  proc.skip();
2415  break;
2416  default:
2417  proc.copy();
2418  break;
2419  }
2420  }
2421 
2422  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2423 
2424  return proc.result();
2425 }
2426 
2427 #undef _c4dbgfps
2428 
2429 
2430 template<class EventHandler>
2431 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2432 {
2433  FilterProcessorSrcDst proc(scalar, dst);
2434  return _filter_plain(proc, indentation);
2435 }
2436 
2437 template<class EventHandler>
2438 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2439 {
2440  FilterProcessorInplaceEndExtending proc(dst, cap);
2441  return _filter_plain(proc, indentation);
2442 }
2443 
2444 
2445 //-----------------------------------------------------------------------------
2446 //-----------------------------------------------------------------------------
2447 //-----------------------------------------------------------------------------
2448 /* single quoted */
2449 
2450 // a debugging scaffold:
2451 #if 0
2452 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2453 #else
2454 #define _c4dbgfsq(fmt, ...)
2455 #endif
2456 
2457 template<class EventHandler>
2458 template<class FilterProcessor>
2459 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2460 {
2461  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2462 
2463  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2464  size_t ii = proc.rpos;
2465  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2466  if(numnl_following)
2467  {
2468  proc.set('\n', numnl_following);
2469  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2470  }
2471  else
2472  {
2473  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2474  if(ret != npos)
2475  {
2476  proc.set(' ');
2477  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2478  }
2479  else
2480  {
2481  proc.set(' ');
2482  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2483  }
2484  }
2485  proc.rpos = ii;
2486 }
2487 
2488 template<class EventHandler>
2489 template<class FilterProcessor>
2490 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2491 {
2492  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2493 
2494  // from the YAML spec for double-quoted scalars:
2495  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2496  while(proc.has_more_chars())
2497  {
2498  const char curr = proc.curr();
2499  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2500  switch(curr)
2501  {
2502  case ' ':
2503  case '\t':
2504  _c4dbgfsq("whitespace", curr);
2505  _filter_ws_copy_trailing(proc);
2506  break;
2507  case '\n':
2508  _c4dbgfsq("newline", curr);
2509  _filter_nl_squoted(proc);
2510  break;
2511  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2512  _c4dbgfsq("skip cr", curr);
2513  proc.skip();
2514  break;
2515  case '\'':
2516  _c4dbgfsq("squote", curr);
2517  if(proc.next() == '\'')
2518  {
2519  _c4dbgfsq("two consecutive squotes", curr);
2520  proc.skip();
2521  proc.copy();
2522  }
2523  else
2524  {
2525  _c4err("filter error");
2526  }
2527  break;
2528  default:
2529  proc.copy();
2530  break;
2531  }
2532  }
2533 
2534  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2535 
2536  return proc.result();
2537 }
2538 
2539 #undef _c4dbgfsq
2540 
2541 template<class EventHandler>
2542 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2543 {
2544  FilterProcessorSrcDst proc(scalar, dst);
2545  return _filter_squoted(proc);
2546 }
2547 
2548 template<class EventHandler>
2549 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2550 {
2551  FilterProcessorInplaceEndExtending proc(dst, cap);
2552  return _filter_squoted(proc);
2553 }
2554 
2555 
2556 //-----------------------------------------------------------------------------
2557 //-----------------------------------------------------------------------------
2558 //-----------------------------------------------------------------------------
2559 /* double quoted */
2560 
2561 // a debugging scaffold:
2562 #if 0
2563 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2564 #else
2565 #define _c4dbgfdq(...)
2566 #endif
2567 
2568 template<class EventHandler>
2569 template<class FilterProcessor>
2570 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2571 {
2572  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2573 
2574  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2575  size_t ii = proc.rpos;
2576  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2577  if(numnl_following)
2578  {
2579  proc.set('\n', numnl_following);
2580  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2581  }
2582  else
2583  {
2584  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2585  if(ret != npos)
2586  {
2587  proc.set(' ');
2588  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2589  }
2590  else
2591  {
2592  proc.set(' ');
2593  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2594  }
2595  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2596  {
2597  _c4dbgfdq("backslash at [{}]", ii);
2598  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2599  if(next == ' ' || next == '\t')
2600  {
2601  _c4dbgfdq("extend skip to backslash", "");
2602  ++ii;
2603  }
2604  }
2605  }
2606  proc.rpos = ii;
2607 }
2608 
2609 template<class EventHandler>
2610 template<class FilterProcessor>
2611 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2612 {
2613  char next = proc.next();
2614  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2615  if(next == '\r')
2616  {
2617  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2618  {
2619  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2620  next = '\n';
2621  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2622  }
2623  }
2624 
2625  if(next == '\n')
2626  {
2627  size_t ii = proc.rpos + 2;
2628  for( ; ii < proc.src.len; ++ii)
2629  {
2630  // skip leading whitespace
2631  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2632  ;
2633  else
2634  break;
2635  }
2636  proc.skip(ii - proc.rpos);
2637  }
2638  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2639  {
2640  // escapes for json compatibility
2641  proc.translate_esc(next);
2642  _c4dbgfdq("here, used '{}'", _c4prc(next));
2643  }
2644  else if(next == '\r')
2645  {
2646  proc.skip();
2647  }
2648  else if(next == 'n')
2649  {
2650  proc.translate_esc('\n');
2651  }
2652  else if(next == 'r')
2653  {
2654  proc.translate_esc('\r');
2655  }
2656  else if(next == 't')
2657  {
2658  proc.translate_esc('\t');
2659  }
2660  else if(next == '\\')
2661  {
2662  proc.translate_esc('\\');
2663  }
2664  else if(next == 'x') // UTF8
2665  {
2666  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2667  _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2668  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2669  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2670  uint8_t byteval = {};
2671  if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
2672  _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2673  proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
2674  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2675  }
2676  else if(next == 'u') // UTF16
2677  {
2678  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2679  _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2680  char readbuf[8];
2681  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2682  uint32_t codepoint_val = {};
2683  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2684  _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2685  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2686  if(C4_UNLIKELY(numbytes == 0))
2687  _c4err("failed to decode code point={}", proc.rpos);
2688  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2689  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2690  }
2691  else if(next == 'U') // UTF32
2692  {
2693  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2694  _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2695  char readbuf[8];
2696  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2697  uint32_t codepoint_val = {};
2698  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2699  _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2700  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2701  if(C4_UNLIKELY(numbytes == 0))
2702  _c4err("failed to decode code point={}", proc.rpos);
2703  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2704  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2705  }
2706  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2707  else if(next == '0')
2708  {
2709  proc.translate_esc('\0');
2710  }
2711  else if(next == 'b') // backspace
2712  {
2713  proc.translate_esc('\b');
2714  }
2715  else if(next == 'f') // form feed
2716  {
2717  proc.translate_esc('\f');
2718  }
2719  else if(next == 'a') // bell character
2720  {
2721  proc.translate_esc('\a');
2722  }
2723  else if(next == 'v') // vertical tab
2724  {
2725  proc.translate_esc('\v');
2726  }
2727  else if(next == 'e') // escape character
2728  {
2729  proc.translate_esc('\x1b');
2730  }
2731  else if(next == '_') // unicode non breaking space \u00a0
2732  {
2733  // https://www.compart.com/en/unicode/U+00a0
2734  const char payload[] = {
2735  _RYML_CHCONST(-0x3e, 0xc2),
2736  _RYML_CHCONST(-0x60, 0xa0),
2737  };
2738  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2739  }
2740  else if(next == 'N') // unicode next line \u0085
2741  {
2742  // https://www.compart.com/en/unicode/U+0085
2743  const char payload[] = {
2744  _RYML_CHCONST(-0x3e, 0xc2),
2745  _RYML_CHCONST(-0x7b, 0x85),
2746  };
2747  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2748  }
2749  else if(next == 'L') // unicode line separator \u2028
2750  {
2751  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2752  const char payload[] = {
2753  _RYML_CHCONST(-0x1e, 0xe2),
2754  _RYML_CHCONST(-0x80, 0x80),
2755  _RYML_CHCONST(-0x58, 0xa8),
2756  };
2757  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2758  }
2759  else if(next == 'P') // unicode paragraph separator \u2029
2760  {
2761  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2762  const char payload[] = {
2763  _RYML_CHCONST(-0x1e, 0xe2),
2764  _RYML_CHCONST(-0x80, 0x80),
2765  _RYML_CHCONST(-0x57, 0xa9),
2766  };
2767  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2768  }
2769  else if(next == '\0')
2770  {
2771  proc.skip();
2772  }
2773  else
2774  {
2775  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2776  }
2777  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2778 }
2779 
2780 
2781 template<class EventHandler>
2782 template<class FilterProcessor>
2783 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2784 {
2785  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2786  // from the YAML spec for double-quoted scalars:
2787  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2788  while(proc.has_more_chars())
2789  {
2790  const char curr = proc.curr();
2791  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2792  switch(curr)
2793  {
2794  case ' ':
2795  case '\t':
2796  {
2797  _c4dbgfdq("whitespace", curr);
2798  _filter_ws_copy_trailing(proc);
2799  break;
2800  }
2801  case '\n':
2802  {
2803  _c4dbgfdq("newline", curr);
2804  _filter_nl_dquoted(proc);
2805  break;
2806  }
2807  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2808  {
2809  _c4dbgfdq("carriage return, ignore", curr);
2810  proc.skip();
2811  break;
2812  }
2813  case '\\':
2814  {
2815  _filter_dquoted_backslash(proc);
2816  break;
2817  }
2818  default:
2819  {
2820  proc.copy();
2821  break;
2822  }
2823  }
2824  }
2825  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2826  return proc.result();
2827 }
2828 
2829 #undef _c4dbgfdq
2830 
2831 
2832 template<class EventHandler>
2833 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2834 {
2835  FilterProcessorSrcDst proc(scalar, dst);
2836  return _filter_dquoted(proc);
2837 }
2838 
2839 template<class EventHandler>
2840 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2841 {
2842  FilterProcessorInplaceMidExtending proc(dst, cap);
2843  return _filter_dquoted(proc);
2844 }
2845 
2846 
2847 //-----------------------------------------------------------------------------
2848 //-----------------------------------------------------------------------------
2849 //-----------------------------------------------------------------------------
2850 // block filtering helpers
2851 
2852 template<class EventHandler>
2853 template<class FilterProcessor>
2854 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2855 {
2856  _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2857  _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2858 
2859  // a debugging scaffold:
2860  #if 0
2861  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2862  #else
2863  #define _c4dbgchomp(...)
2864  #endif
2865 
2866  // advance to the last line having spaces beyond the indentation
2867  {
2868  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2869  if(last != npos)
2870  {
2871  _c4dbgchomp("found newline and larger indentation. last={}", last);
2872  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2873  _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2874  // remove indentation spaces, copy the rest
2875  while((proc.rpos < last) && proc.has_more_chars())
2876  {
2877  const char curr = proc.curr();
2878  _c4dbgchomp("curr='{}'", _c4prc(curr));
2879  switch(curr)
2880  {
2881  case '\n':
2882  {
2883  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2884  proc.copy();
2885  // are there spaces after the newline?
2886  csubstr at_next_line = proc.rem();
2887  if(at_next_line.begins_with(' '))
2888  {
2889  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2890  // there are spaces.
2891  size_t first_non_space = at_next_line.first_not_of(' ');
2892  _c4dbgchomp("first_non_space={}", first_non_space);
2893  if(first_non_space == npos)
2894  {
2895  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2896  first_non_space = at_next_line.len;
2897  }
2898  if(first_non_space <= indentation)
2899  {
2900  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2901  proc.skip(first_non_space);
2902  }
2903  else
2904  {
2905  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2906  proc.skip(indentation);
2907  // copy the spaces after the indentation
2908  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2909  proc.copy(first_non_space - indentation);
2910  }
2911  }
2912  break;
2913  }
2914  case '\r':
2915  proc.skip();
2916  break;
2917  default:
2918  _c4err("parse error");
2919  break;
2920  }
2921  }
2922  }
2923  }
2924 
2925  // from now on, we only have line ends (or indentation spaces)
2926  switch(chomp)
2927  {
2928  case CHOMP_CLIP:
2929  {
2930  bool had_one = false;
2931  while(proc.has_more_chars())
2932  {
2933  const char curr = proc.curr();
2934  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
2935  switch(curr)
2936  {
2937  case '\n':
2938  {
2939  _c4dbgchomp("copy newline!", curr);
2940  proc.copy();
2941  proc.set_at_end();
2942  had_one = true;
2943  break;
2944  }
2945  case ' ':
2946  case '\r':
2947  _c4dbgchomp("skip!", curr);
2948  proc.skip();
2949  break;
2950  }
2951  }
2952  if(!had_one) // there were no newline characters. add one.
2953  {
2954  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
2955  proc.set('\n');
2956  }
2957  break;
2958  }
2959  case CHOMP_KEEP:
2960  {
2961  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2962  while(proc.has_more_chars())
2963  {
2964  const char curr = proc.curr();
2965  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
2966  switch(curr)
2967  {
2968  case '\n':
2969  _c4dbgchomp("copy newline!", curr);
2970  proc.copy();
2971  break;
2972  case ' ':
2973  case '\r':
2974  _c4dbgchomp("skip!", curr);
2975  proc.skip();
2976  break;
2977  }
2978  }
2979  break;
2980  }
2981  case CHOMP_STRIP:
2982  {
2983  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
2984  // nothing to do!
2985  break;
2986  }
2987  }
2988 
2989  #undef _c4dbgchomp
2990 }
2991 
2992 
2993 // a debugging scaffold:
2994 #if 0
2995 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2996 #else
2997 #define _c4dbgfb(...)
2998 #endif
2999 
3000 template<class EventHandler>
3001 template<class FilterProcessor>
3002 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3003 {
3004  csubstr rem = proc.rem(); // remaining
3005  if(rem.len)
3006  {
3007  size_t first = rem.first_not_of(' ');
3008  if(first != npos)
3009  {
3010  _c4dbgfb("{} spaces follow before next nonws character", first);
3011  if(first < indentation)
3012  {
3013  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3014  proc.skip(first);
3015  }
3016  else
3017  {
3018  _c4dbgfb("skip {} spaces from indentation", indentation);
3019  proc.skip(indentation);
3020  }
3021  }
3022  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3023  else
3024  {
3025  _c4dbgfb("all spaces to the end: {} spaces", first);
3026  first = rem.len;
3027  if(first)
3028  {
3029  if(first < indentation)
3030  {
3031  _c4dbgfb("skip everything", first);
3032  proc.skip(proc.src.len - proc.rpos);
3033  }
3034  else
3035  {
3036  _c4dbgfb("skip {} spaces from indentation", indentation);
3037  proc.skip(indentation);
3038  }
3039  }
3040  }
3041  #endif
3042  }
3043 }
3044 
3045 template<class EventHandler>
3046 template<class FilterProcessor>
3047 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3048 {
3049  csubstr contents = proc.src.trimr(" \n\r");
3050  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3051  if(!contents.len)
3052  {
3053  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3054  if(chomp == CHOMP_KEEP && proc.src.len)
3055  {
3056  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3057  while(proc.has_more_chars())
3058  {
3059  const char curr = proc.curr();
3060  if(curr == '\n')
3061  proc.copy();
3062  else
3063  proc.skip();
3064  }
3065  if(!proc.wpos)
3066  {
3067  proc.set('\n');
3068  }
3069  }
3070  }
3071  return contents.len;
3072 }
3073 
3074 template<class EventHandler>
3075 template<class FilterProcessor>
3076 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3077 {
3078  _c4dbgfb("contents_len={}", contents_len);
3079 
3080  _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3081 
3082  // extend contents to just before the first newline at the end,
3083  // in case it is preceded by spaces
3084  size_t firstnewl = proc.src.first_of('\n', contents_len);
3085  if(firstnewl != npos)
3086  {
3087  contents_len = firstnewl;
3088  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3089  }
3090  else
3091  {
3092  contents_len = proc.src.len;
3093  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3094  }
3095 
3096  return contents_len;
3097 }
3098 
3099 #undef _c4dbgfb
3100 
3101 
3102 //-----------------------------------------------------------------------------
3103 //-----------------------------------------------------------------------------
3104 //-----------------------------------------------------------------------------
3105 
3106 // a debugging scaffold:
3107 #if 0
3108 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3109 #else
3110 #define _c4dbgfbl(...)
3111 #endif
3112 
3113 template<class EventHandler>
3114 template<class FilterProcessor>
3115 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3116 {
3117  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3118 
3119  size_t contents_len = _handle_all_whitespace(proc, chomp);
3120  if(!contents_len)
3121  return proc.result();
3122 
3123  contents_len = _extend_to_chomp(proc, contents_len);
3124 
3125  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3126 
3127  _filter_block_indentation(proc, indentation);
3128 
3129  // now filter the bulk
3130  while(proc.has_more_chars(/*maxpos*/contents_len))
3131  {
3132  const char curr = proc.curr();
3133  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3134  switch(curr)
3135  {
3136  case '\n':
3137  {
3138  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3139  proc.copy(); // copy the newline
3140  _filter_block_indentation(proc, indentation);
3141  break;
3142  }
3143  case '\r':
3144  proc.skip();
3145  break;
3146  default:
3147  proc.copy();
3148  break;
3149  }
3150  }
3151 
3152  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3153 
3154  _filter_chomp(proc, chomp, indentation);
3155 
3156  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3157 
3158  return proc.result();
3159 }
3160 
3161 #undef _c4dbgfbl
3162 
3163 template<class EventHandler>
3164 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3165 {
3166  FilterProcessorSrcDst proc(scalar, dst);
3167  return _filter_block_literal(proc, indentation, chomp);
3168 }
3169 
3170 template<class EventHandler>
3171 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3172 {
3173  FilterProcessorInplaceEndExtending proc(scalar, cap);
3174  return _filter_block_literal(proc, indentation, chomp);
3175 }
3176 
3177 
3178 //-----------------------------------------------------------------------------
3179 //-----------------------------------------------------------------------------
3180 //-----------------------------------------------------------------------------
3181 
3182 // a debugging scaffold:
3183 #if 0
3184 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3185 #else
3186 #define _c4dbgfbf(...)
3187 #endif
3188 
3189 
3190 template<class EventHandler>
3191 template<class FilterProcessor>
3192 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3193 {
3194  _filter_block_indentation(proc, indentation);
3195  while(proc.has_more_chars(len))
3196  {
3197  const char curr = proc.curr();
3198  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3199  switch(curr)
3200  {
3201  case '\n':
3202  _c4dbgfbf("newline.", curr);
3203  proc.copy();
3204  _filter_block_indentation(proc, indentation);
3205  break;
3206  case '\r':
3207  proc.skip();
3208  break;
3209  case ' ':
3210  case '\t':
3211  {
3212  size_t first = proc.rem().first_not_of(" \t");
3213  _c4dbgfbf("space. first={}", first);
3214  if(first == npos)
3215  first = proc.rem().len;
3216  _c4dbgfbf("... indentation increased to {}", first);
3217  _filter_block_folded_indented_block(proc, indentation, len, first);
3218  break;
3219  }
3220  default:
3221  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3222  return;
3223  }
3224  }
3225 }
3226 
3227 template<class EventHandler>
3228 template<class FilterProcessor>
3229 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3230 {
3231  switch(num_newl)
3232  {
3233  case 1u:
3234  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3235  wpos_at_first_newl = proc.wpos;
3236  proc.skip();
3237  proc.set(' ');
3238  break;
3239  case 2u:
3240  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3241  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
3242  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3243  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3244  proc.skip();
3245  proc.set_at(wpos_at_first_newl, '\n');
3246  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3247  break;
3248  default:
3249  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3250  proc.copy();
3251  break;
3252  }
3253  return wpos_at_first_newl;
3254 }
3255 
3256 template<class EventHandler>
3257 template<class FilterProcessor>
3258 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3259 {
3260  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
3261  size_t num_newl = 0;
3262  size_t wpos_at_first_newl = npos;
3263  while(proc.has_more_chars(len))
3264  {
3265  const char curr = proc.curr();
3266  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3267  switch(curr)
3268  {
3269  case '\n':
3270  {
3271  _c4dbgfbf("newline. sofar={}", num_newl);
3272  // NOTE: vs2022-32bit-release builds were giving wrong
3273  // results in this block, if it was written as either
3274  // as a switch(num_newl) or its equivalent if-form.
3275  //
3276  // For this reason, we're using a dedicated function
3277  // (**_compress), which seems to work around the issue.
3278  //
3279  // The manifested problem was that somewhere between the
3280  // assignment to curr and this point, proc.wpos (the
3281  // write-position of the processor) jumped to npos, which
3282  // made the write wrap-around! To make things worse,
3283  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3284  // problem go away!
3285  //
3286  // The only way to make the problem appear with prints
3287  // enabled was by disabling all prints in this function
3288  // (including in the block which was moved to the compress
3289  // function) and then selectively enabling only some of
3290  // those prints.
3291  //
3292  // This may be due to some bug in the cl-x86 optimizer; or
3293  // it may be triggered by some UB which may be
3294  // inadvertedly present in this function or in the filter
3295  // processor. This is despite our best efforts to weed out
3296  // any such UB problem: neither clang-tidy nor none of the
3297  // sanitizers, or gcc's -fanalyzer pointed to any problems
3298  // in this code.
3299  //
3300  // In the end, moving this block to a separate function
3301  // was the only way to bury the problem. But it may
3302  // resurface again, as The Undead, rising to from the
3303  // grave to haunt us with his terrible presence.
3304  //
3305  // We may have to revisit this. With a stake, and lots of
3306  // garlic.
3307  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3308  _filter_block_indentation(proc, indentation);
3309  break;
3310  }
3311  case ' ':
3312  case '\t':
3313  {
3314  size_t first = proc.rem().first_not_of(" \t");
3315  _c4dbgfbf("space. first={}", first);
3316  if(first == npos)
3317  first = proc.rem().len;
3318  _c4dbgfbf("... indentation increased to {}", first);
3319  if(num_newl)
3320  {
3321  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3322  proc.set_at(wpos_at_first_newl, '\n');
3323  }
3324  if(num_newl > 1u)
3325  {
3326  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3327  proc.set('\n');
3328  }
3329  _filter_block_folded_indented_block(proc, indentation, len, first);
3330  num_newl = 0;
3331  wpos_at_first_newl = npos;
3332  break;
3333  }
3334  case '\r':
3335  proc.skip();
3336  break;
3337  default:
3338  _c4dbgfbf("not space, not newline. stop.", 0);
3339  return;
3340  }
3341  }
3342 }
3343 
3344 
3345 template<class EventHandler>
3346 template<class FilterProcessor>
3347 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3348 {
3349  _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3350  if(curr_indentation)
3351  proc.copy(curr_indentation);
3352  while(proc.has_more_chars(len))
3353  {
3354  const char curr = proc.curr();
3355  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3356  switch(curr)
3357  {
3358  case '\n':
3359  {
3360  proc.copy();
3361  _filter_block_indentation(proc, indentation);
3362  csubstr rem = proc.rem();
3363  const size_t first = rem.first_not_of(' ');
3364  _c4dbgfbf("newline. firstns={}", first);
3365  if(first == 0)
3366  {
3367  const char c = rem[first];
3368  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3369  if(c == '\n' || c == '\r')
3370  {
3371  ;
3372  }
3373  else
3374  {
3375  _c4dbgfbf("done with indented block", first);
3376  goto endloop;
3377  }
3378  }
3379  else if(first != npos)
3380  {
3381  proc.copy(first);
3382  _c4dbgfbf("copy all {} spaces", first);
3383  }
3384  break;
3385  }
3386  break;
3387  case '\r':
3388  proc.skip();
3389  break;
3390  default:
3391  proc.copy();
3392  break;
3393  }
3394  }
3395  endloop:
3396  return;
3397 }
3398 
3399 
3400 template<class EventHandler>
3401 template<class FilterProcessor>
3402 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3403 {
3404  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3405 
3406  size_t contents_len = _handle_all_whitespace(proc, chomp);
3407  if(!contents_len)
3408  return proc.result();
3409 
3410  contents_len = _extend_to_chomp(proc, contents_len);
3411 
3412  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3413 
3414  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3415 
3416  // now filter the bulk
3417  while(proc.has_more_chars(/*maxpos*/contents_len))
3418  {
3419  const char curr = proc.curr();
3420  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3421  switch(curr)
3422  {
3423  case '\n':
3424  {
3425  _c4dbgfbf("found newline", curr);
3426  _filter_block_folded_newlines(proc, indentation, contents_len);
3427  break;
3428  }
3429  case '\r':
3430  proc.skip();
3431  break;
3432  default:
3433  proc.copy();
3434  break;
3435  }
3436  }
3437 
3438  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3439 
3440  _filter_chomp(proc, chomp, indentation);
3441 
3442  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3443 
3444  return proc.result();
3445 }
3446 
3447 #undef _c4dbgfbf
3448 
3449 template<class EventHandler>
3450 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3451 {
3452  FilterProcessorSrcDst proc(scalar, dst);
3453  return _filter_block_folded(proc, indentation, chomp);
3454 }
3455 
3456 template<class EventHandler>
3457 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3458 {
3459  FilterProcessorInplaceEndExtending proc(scalar, cap);
3460  return _filter_block_folded(proc, indentation, chomp);
3461 }
3462 
3463 
3464 //-----------------------------------------------------------------------------
3465 //-----------------------------------------------------------------------------
3466 //-----------------------------------------------------------------------------
3467 
3468 template<class EventHandler>
3469 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3470 {
3471  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3472  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3473  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3474  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3475  return r.get();
3476 }
3477 
3478 //-----------------------------------------------------------------------------
3479 
3480 template<class EventHandler>
3481 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3482 {
3483  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3484  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3485  _RYML_CB_ASSERT(this->callbacks(), r.valid());
3486  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3487  return r.get();
3488 }
3489 
3490 
3491 //-----------------------------------------------------------------------------
3492 
3493 template<class EventHandler>
3494 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3495 {
3496  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3497  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3498  if(C4_LIKELY(r.valid()))
3499  {
3500  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3501  return r.get();
3502  }
3503  else
3504  {
3505  const size_t len = r.required_len();
3506  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3507  substr dst = m_evt_handler->alloc_arena(len, &s);
3508  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3509  _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3510  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3511  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3512  _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3513  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3514  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3515  return rsd.get();
3516  }
3517 }
3518 
3519 
3520 //-----------------------------------------------------------------------------
3521 template<class EventHandler>
3522 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3523 {
3524  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3525  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3526  if(C4_LIKELY(r.valid()))
3527  {
3528  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3529  return r.get();
3530  }
3531  else
3532  {
3533  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3534  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3535  FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3536  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3537  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3538  return rsd.get();
3539  }
3540 }
3541 
3542 
3543 //-----------------------------------------------------------------------------
3544 template<class EventHandler>
3545 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3546 {
3547  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3548  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3549  if(C4_LIKELY(r.valid()))
3550  {
3551  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3552  return r.get();
3553  }
3554  else
3555  {
3556  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3557  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3558  FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3559  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3560  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3561  return rsd.get();
3562  }
3563 }
3564 
3565 
3566 //-----------------------------------------------------------------------------
3567 
3568 template<class EventHandler>
3569 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3570 {
3571  if(sc.needs_filter)
3572  {
3573  if(m_options.scalar_filtering())
3574  {
3575  return _filter_scalar_plain(sc.scalar, indentation);
3576  }
3577  else
3578  {
3579  _c4dbgp("plain scalar left unfiltered");
3580  m_evt_handler->mark_key_scalar_unfiltered();
3581  }
3582  }
3583  else
3584  {
3585  _c4dbgp("plain scalar doesn't need filtering");
3586  }
3587  return sc.scalar;
3588 }
3589 
3590 template<class EventHandler>
3591 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3592 {
3593  if(sc.needs_filter)
3594  {
3595  if(m_options.scalar_filtering())
3596  {
3597  return _filter_scalar_plain(sc.scalar, indentation);
3598  }
3599  else
3600  {
3601  _c4dbgp("plain scalar left unfiltered");
3602  m_evt_handler->mark_val_scalar_unfiltered();
3603  }
3604  }
3605  else
3606  {
3607  _c4dbgp("plain scalar doesn't need filtering");
3608  }
3609  return sc.scalar;
3610 }
3611 
3612 
3613 //-----------------------------------------------------------------------------
3614 
3615 template<class EventHandler>
3616 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3617 {
3618  if(sc.needs_filter)
3619  {
3620  if(m_options.scalar_filtering())
3621  {
3622  return _filter_scalar_squot(sc.scalar);
3623  }
3624  else
3625  {
3626  _c4dbgp("squo key scalar left unfiltered");
3627  m_evt_handler->mark_key_scalar_unfiltered();
3628  }
3629  }
3630  else
3631  {
3632  _c4dbgp("squo key scalar doesn't need filtering");
3633  }
3634  return sc.scalar;
3635 }
3636 
3637 template<class EventHandler>
3638 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3639 {
3640  if(sc.needs_filter)
3641  {
3642  if(m_options.scalar_filtering())
3643  {
3644  return _filter_scalar_squot(sc.scalar);
3645  }
3646  else
3647  {
3648  _c4dbgp("squo val scalar left unfiltered");
3649  m_evt_handler->mark_val_scalar_unfiltered();
3650  }
3651  }
3652  else
3653  {
3654  _c4dbgp("squo val scalar doesn't need filtering");
3655  }
3656  return sc.scalar;
3657 }
3658 
3659 
3660 //-----------------------------------------------------------------------------
3661 
3662 template<class EventHandler>
3663 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3664 {
3665  if(sc.needs_filter)
3666  {
3667  if(m_options.scalar_filtering())
3668  {
3669  return _filter_scalar_dquot(sc.scalar);
3670  }
3671  else
3672  {
3673  _c4dbgp("dquo scalar left unfiltered");
3674  m_evt_handler->mark_key_scalar_unfiltered();
3675  }
3676  }
3677  else
3678  {
3679  _c4dbgp("dquo scalar doesn't need filtering");
3680  }
3681  return sc.scalar;
3682 }
3683 
3684 template<class EventHandler>
3685 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3686 {
3687  if(sc.needs_filter)
3688  {
3689  if(m_options.scalar_filtering())
3690  {
3691  return _filter_scalar_dquot(sc.scalar);
3692  }
3693  else
3694  {
3695  _c4dbgp("dquo scalar left unfiltered");
3696  m_evt_handler->mark_val_scalar_unfiltered();
3697  }
3698  }
3699  else
3700  {
3701  _c4dbgp("dquo scalar doesn't need filtering");
3702  }
3703  return sc.scalar;
3704 }
3705 
3706 
3707 //-----------------------------------------------------------------------------
3708 
3709 template<class EventHandler>
3710 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3711 {
3712  if(m_options.scalar_filtering())
3713  {
3714  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3715  }
3716  else
3717  {
3718  _c4dbgp("literal scalar left unfiltered");
3719  m_evt_handler->mark_key_scalar_unfiltered();
3720  }
3721  return sb.scalar;
3722 }
3723 
3724 template<class EventHandler>
3725 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3726 {
3727  if(m_options.scalar_filtering())
3728  {
3729  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3730  }
3731  else
3732  {
3733  _c4dbgp("literal scalar left unfiltered");
3734  m_evt_handler->mark_val_scalar_unfiltered();
3735  }
3736  return sb.scalar;
3737 }
3738 
3739 
3740 //-----------------------------------------------------------------------------
3741 
3742 template<class EventHandler>
3743 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3744 {
3745  if(m_options.scalar_filtering())
3746  {
3747  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3748  }
3749  else
3750  {
3751  _c4dbgp("folded scalar left unfiltered");
3752  m_evt_handler->mark_key_scalar_unfiltered();
3753  }
3754  return sb.scalar;
3755 }
3756 
3757 template<class EventHandler>
3758 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3759 {
3760  if(m_options.scalar_filtering())
3761  {
3762  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3763  }
3764  else
3765  {
3766  _c4dbgp("folded scalar left unfiltered");
3767  m_evt_handler->mark_val_scalar_unfiltered();
3768  }
3769  return sb.scalar;
3770 }
3771 
3772 
3773 //-----------------------------------------------------------------------------
3774 //-----------------------------------------------------------------------------
3775 //-----------------------------------------------------------------------------
3776 
3777 #ifdef RYML_DBG // !!! <----------------------------------
3778 
3779 template<class EventHandler>
3780 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3781 {
3782  char buf1_[64], buf2_[64], buf3_[64];
3783  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3784  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3785  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3786  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3787  s->flags |= on;
3788 }
3789 
3790 template<class EventHandler>
3791 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3792 {
3793  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3794  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3795  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3796  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3797  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3798  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3799  s->flags |= on;
3800  s->flags &= ~off;
3801 }
3802 
3803 template<class EventHandler>
3804 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3805 {
3806  char buf1_[64], buf2_[64], buf3_[64];
3807  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3808  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3809  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3810  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3811  s->flags &= ~off;
3812 }
3813 
3814 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3815 {
3816  size_t pos = 0;
3817  bool gotone = false;
3818 
3819  #define _prflag(fl) \
3820  if((flags & fl) == (fl)) \
3821  { \
3822  if(gotone) \
3823  { \
3824  if(pos + 1 < buf.len) \
3825  buf[pos] = '|'; \
3826  ++pos; \
3827  } \
3828  csubstr fltxt = #fl; \
3829  if(pos + fltxt.len <= buf.len) \
3830  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3831  pos += fltxt.len; \
3832  gotone = true; \
3833  }
3834 
3835  _prflag(RTOP);
3836  _prflag(RUNK);
3837  _prflag(RMAP);
3838  _prflag(RSEQ);
3839  _prflag(FLOW);
3840  _prflag(BLCK);
3841  _prflag(QMRK);
3842  _prflag(RKEY);
3843  _prflag(RVAL);
3844  _prflag(RKCL);
3845  _prflag(RNXT);
3846  _prflag(SSCL);
3847  _prflag(QSCL);
3848  _prflag(RSET);
3849  _prflag(RDOC);
3850  _prflag(NDOC);
3851  _prflag(USTY);
3852  _prflag(RSEQIMAP);
3853 
3854  #undef _prflag
3855 
3856  if(pos == 0)
3857  if(buf.len > 0)
3858  buf[pos++] = '0';
3859 
3860  RYML_CHECK(pos <= buf.len);
3861 
3862  return buf.first(pos);
3863 }
3864 
3865 #endif // RYML_DBG !!! <----------------------------------
3866 
3867 
3868 //-----------------------------------------------------------------------------
3869 //-----------------------------------------------------------------------------
3870 //-----------------------------------------------------------------------------
3871 
3872 template<class EventHandler>
3873 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
3874 {
3875  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3876  return m_buf.sub(loc.offset);
3877 }
3878 
3879 template<class EventHandler>
3880 Location ParseEngine<EventHandler>::location(ConstNodeRef node) const
3881 {
3882  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
3883  return location(*node.tree(), node.id());
3884 }
3885 
3886 template<class EventHandler>
3887 Location ParseEngine<EventHandler>::location(Tree const& tree, id_type node) const
3888 {
3889  // try hard to avoid getting the location from a null string.
3890  Location loc;
3891  if(_location_from_node(tree, node, &loc, 0))
3892  return loc;
3893  return val_location(m_buf.str);
3894 }
3895 
3896 template<class EventHandler>
3897 bool ParseEngine<EventHandler>::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const
3898 {
3899  if(tree.has_key(node))
3900  {
3901  csubstr k = tree.key(node);
3902  if(C4_LIKELY(k.str != nullptr))
3903  {
3904  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3905  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3906  *loc = val_location(k.str);
3907  return true;
3908  }
3909  }
3910 
3911  if(tree.has_val(node))
3912  {
3913  csubstr v = tree.val(node);
3914  if(C4_LIKELY(v.str != nullptr))
3915  {
3916  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3917  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3918  *loc = val_location(v.str);
3919  return true;
3920  }
3921  }
3922 
3923  if(tree.is_container(node))
3924  {
3925  if(_location_from_cont(tree, node, loc))
3926  return true;
3927  }
3928 
3929  if(tree.type(node) != NOTYPE && level == 0)
3930  {
3931  // try the prev sibling
3932  {
3933  const id_type prev = tree.prev_sibling(node);
3934  if(prev != NONE)
3935  {
3936  if(_location_from_node(tree, prev, loc, level+1))
3937  return true;
3938  }
3939  }
3940  // try the next sibling
3941  {
3942  const id_type next = tree.next_sibling(node);
3943  if(next != NONE)
3944  {
3945  if(_location_from_node(tree, next, loc, level+1))
3946  return true;
3947  }
3948  }
3949  // try the parent
3950  {
3951  const id_type parent = tree.parent(node);
3952  if(parent != NONE)
3953  {
3954  if(_location_from_node(tree, parent, loc, level+1))
3955  return true;
3956  }
3957  }
3958  }
3959 
3960  return false;
3961 }
3962 
3963 template<class EventHandler>
3964 bool ParseEngine<EventHandler>::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const
3965 {
3966  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3967  if(!tree.is_stream(node))
3968  {
3969  const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container
3970  if(tree.has_children(node))
3971  {
3972  id_type child = tree.first_child(node);
3973  if(tree.has_key(child))
3974  {
3975  // when a map starts, the container was set after the key
3976  csubstr k = tree.key(child);
3977  if(k.str && node_start > k.str)
3978  node_start = k.str;
3979  }
3980  }
3981  *loc = val_location(node_start);
3982  return true;
3983  }
3984  else // it's a stream
3985  {
3986  *loc = val_location(m_buf.str); // just return the front of the buffer
3987  }
3988  return true;
3989 }
3990 
3991 
3992 template<class EventHandler>
3993 Location ParseEngine<EventHandler>::val_location(const char *val) const
3994 {
3995  if(C4_UNLIKELY(val == nullptr))
3996  return {m_file, 0, 0, 0};
3997  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3998  // NOTE: if any of these checks fails, the parser needs to be
3999  // instantiated with locations enabled.
4000  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
4001  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
4002  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4003  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4004  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4005  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4006  // NOTE: the pointer needs to belong to the buffer that was used to parse.
4007  csubstr src = m_buf;
4008  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4009  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4010  // ok. search the first stored newline after the given ptr
4011  using lineptr_type = size_t const* C4_RESTRICT;
4012  lineptr_type lineptr = nullptr;
4013  size_t offset = (size_t)(val - src.begin());
4014  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4015  {
4016  // just do a linear search if the size is small.
4017  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4018  {
4019  if(*curr > offset)
4020  {
4021  lineptr = curr;
4022  break;
4023  }
4024  }
4025  }
4026  else
4027  {
4028  // do a bisection search if the size is not small.
4029  //
4030  // We could use std::lower_bound but this is simple enough and
4031  // spares the costly include of <algorithm>.
4032  size_t count = m_newline_offsets_size;
4033  size_t step;
4034  lineptr_type it;
4035  lineptr = m_newline_offsets;
4036  while(count)
4037  {
4038  step = count >> 1;
4039  it = lineptr + step;
4040  if(*it < offset)
4041  {
4042  lineptr = ++it;
4043  count -= step + 1;
4044  }
4045  else
4046  {
4047  count = step;
4048  }
4049  }
4050  }
4051  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4052  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4053  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4054  Location loc;
4055  loc.name = m_file;
4056  loc.offset = offset;
4057  loc.line = (size_t)(lineptr - m_newline_offsets);
4058  if(lineptr > m_newline_offsets)
4059  loc.col = (offset - *(lineptr-1) - 1u);
4060  else
4061  loc.col = offset;
4062  return loc;
4063 }
4064 
4065 template<class EventHandler>
4066 void ParseEngine<EventHandler>::_prepare_locations()
4067 {
4068  m_newline_offsets_buf = m_buf;
4069  size_t numnewlines = 1u + m_buf.count('\n');
4070  _resize_locations(numnewlines);
4071  m_newline_offsets_size = 0;
4072  for(size_t i = 0; i < m_buf.len; i++)
4073  if(m_buf[i] == '\n')
4074  m_newline_offsets[m_newline_offsets_size++] = i;
4075  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4076  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4077 }
4078 
4079 template<class EventHandler>
4080 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4081 {
4082  if(numnewlines > m_newline_offsets_capacity)
4083  {
4084  if(m_newline_offsets)
4085  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4086  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4087  m_newline_offsets_capacity = numnewlines;
4088  }
4089 }
4090 
4091 template<class EventHandler>
4092 bool ParseEngine<EventHandler>::_locations_dirty() const
4093 {
4094  return !m_newline_offsets_size;
4095 }
4096 
4097 
4098 //-----------------------------------------------------------------------------
4099 //-----------------------------------------------------------------------------
4100 //-----------------------------------------------------------------------------
4101 
4102 template<class EventHandler>
4103 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4104 {
4105  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4106  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4107  {
4108  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4109  {
4110  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4111  _skipchars(" \t");
4112  }
4113  // comments
4114  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4115  {
4116  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4117  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4118  }
4119  }
4120 }
4121 
4122 
4123 //-----------------------------------------------------------------------------
4124 
4125 
4126 template<class EventHandler>
4127 void ParseEngine<EventHandler>::_handle_colon()
4128 {
4129  size_t curr = m_evt_handler->m_curr->pos.line;
4130  if(m_prev_colon != npos)
4131  {
4132  if(curr == m_prev_colon)
4133  _c4err("two colons on same line");
4134  }
4135  m_prev_colon = curr;
4136 }
4137 
4138 template<class EventHandler>
4139 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4140 {
4141  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4142  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) // NOLINT(bugprone-sizeof-expression)
4143  _c4err("too many annotations");
4144  dst->annotations[dst->num_entries].str = str;
4145  dst->annotations[dst->num_entries].indentation = indentation;
4146  dst->annotations[dst->num_entries].line = line;
4147  ++dst->num_entries;
4148 }
4149 
4150 template<class EventHandler>
4151 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4152 {
4153  dst->num_entries = 0;
4154 }
4155 
4156 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4157 template<class EventHandler>
4158 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4159 {
4160  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4161  {
4162  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4163  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4164  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4165  size_t to_skip = m_evt_handler->m_curr->indref;
4166  if(m_pending_anchors.num_entries)
4167  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4168  if(m_pending_tags.num_entries)
4169  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4170  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4171  _maybe_skipchars_up_to(' ', to_skip);
4172  return true;
4173  }
4174  return false;
4175 }
4176 #endif
4177 
4178 template<class EventHandler>
4179 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4180 {
4181  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4182 }
4183 
4184 template<class EventHandler>
4185 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4186 {
4187  if(!tag.begins_with("!<"))
4188  {
4189  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4190  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4191  }
4192  else
4193  {
4194  if(C4_UNLIKELY(!tag.ends_with('>')))
4195  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos);
4196  }
4197 }
4198 
4199 template<class EventHandler>
4200 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4201 {
4202  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4203  if(m_pending_tags.num_entries)
4204  {
4205  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4206  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4207  {
4208  _check_tag(m_pending_tags.annotations[0].str);
4209  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4210  _clear_annotations(&m_pending_tags);
4211  }
4212  else
4213  {
4214  _c4err("too many tags");
4215  }
4216  }
4217  if(m_pending_anchors.num_entries)
4218  {
4219  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4220  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4221  {
4222  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4223  _clear_annotations(&m_pending_anchors);
4224  }
4225  else
4226  {
4227  _c4err("too many anchors");
4228  }
4229  }
4230 }
4231 
4232 template<class EventHandler>
4233 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4234 {
4235  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4236  if(m_pending_tags.num_entries)
4237  {
4238  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4239  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4240  {
4241  _check_tag(m_pending_tags.annotations[0].str);
4242  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4243  _clear_annotations(&m_pending_tags);
4244  }
4245  else
4246  {
4247  _c4err("too many tags");
4248  }
4249  }
4250  if(m_pending_anchors.num_entries)
4251  {
4252  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4253  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4254  {
4255  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4256  _clear_annotations(&m_pending_anchors);
4257  }
4258  else
4259  {
4260  _c4err("too many anchors");
4261  }
4262  }
4263 }
4264 
4265 template<class EventHandler>
4266 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4267 {
4268  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4269  if(m_pending_tags.num_entries == 2)
4270  {
4271  _c4dbgp("2 tags, setting entry 0");
4272  _check_tag(m_pending_tags.annotations[0].str);
4273  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4274  }
4275  else if(m_pending_tags.num_entries == 1)
4276  {
4277  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4278  if(m_pending_tags.annotations[0].line < current_line)
4279  {
4280  _c4dbgp("...tag is for the map. setting it.");
4281  _check_tag(m_pending_tags.annotations[0].str);
4282  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4283  _clear_annotations(&m_pending_tags);
4284  }
4285  }
4286  //
4287  if(m_pending_anchors.num_entries == 2)
4288  {
4289  _c4dbgp("2 anchors, setting entry 0");
4290  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4291  }
4292  else if(m_pending_anchors.num_entries == 1)
4293  {
4294  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4295  if(m_pending_anchors.annotations[0].line < current_line)
4296  {
4297  _c4dbgp("...anchor is for the map. setting it.");
4298  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4299  _clear_annotations(&m_pending_anchors);
4300  }
4301  }
4302 }
4303 
4304 template<class EventHandler>
4305 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4306 {
4307  _c4dbgp("annotations_before_start_mapblck_as_key");
4308  if(m_pending_tags.num_entries == 2)
4309  {
4310  _check_tag(m_pending_tags.annotations[0].str);
4311  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4312  }
4313  if(m_pending_anchors.num_entries == 2)
4314  {
4315  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4316  }
4317 }
4318 
4319 template<class EventHandler>
4320 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4321 {
4322  _c4dbgp("annotations_after_start_mapblck");
4323  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4324  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4325  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4326  {
4327  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4328  switch(m_pending_tags.num_entries)
4329  {
4330  case 1u:
4331  _check_tag(m_pending_tags.annotations[0].str);
4332  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4333  _clear_annotations(&m_pending_tags);
4334  break;
4335  case 2u:
4336  _check_tag(m_pending_tags.annotations[1].str);
4337  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4338  _clear_annotations(&m_pending_tags);
4339  break;
4340  }
4341  switch(m_pending_anchors.num_entries)
4342  {
4343  case 1u:
4344  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4345  _clear_annotations(&m_pending_anchors);
4346  break;
4347  case 2u:
4348  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4349  _clear_annotations(&m_pending_anchors);
4350  break;
4351  }
4352  }
4353  _set_indentation(key_indentation);
4354 }
4355 
4356 template<class EventHandler>
4357 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4358 {
4359  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4360  // select the left-most annotation on the max line
4361  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4362  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4363  {
4364  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4365  if(ann.line > curr->line)
4366  curr = &ann;
4367  else if(ann.indentation < curr->indentation)
4368  curr = &ann;
4369  }
4370  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4371  {
4372  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4373  if(ann.line > curr->line)
4374  curr = &ann;
4375  else if(ann.indentation < curr->indentation)
4376  curr = &ann;
4377  }
4378  return curr->line < val_line ? val_indentation : curr->indentation;
4379 }
4380 
4381 template<class EventHandler>
4382 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4383 {
4384  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4385  const size_t pos = rem.find('#');
4386  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4387  if(pos == npos) // no comments
4388  {
4389  m_evt_handler->add_directive(rem);
4390  _line_progressed(rem.len);
4391  }
4392  else
4393  {
4394  csubstr to_comment = rem.first(pos);
4395  csubstr trimmed = to_comment.trimr(" \t");
4396  m_evt_handler->add_directive(trimmed);
4397  _line_progressed(pos);
4398  _skip_comment();
4399  }
4400 }
4401 
4402 template<class EventHandler>
4403 bool ParseEngine<EventHandler>::_handle_bom()
4404 {
4405  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4406  if(rem.len)
4407  {
4408  const csubstr rest = rem.sub(1);
4409  // https://yaml.org/spec/1.2.2/#52-character-encodings
4410  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4411  if(rem.begins_with({"\x00\x00\xfe\xff", 4}) || (rem.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4412  {
4413  _c4dbgp("byte order mark: UTF32BE");
4414  _handle_bom(UTF32BE);
4415  _line_progressed(4);
4416  return true;
4417  }
4418  else if(rem.begins_with("\xff\xfe\x00\x00") || (rest.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4419  {
4420  _c4dbgp("byte order mark: UTF32LE");
4421  _handle_bom(UTF32LE);
4422  _line_progressed(4);
4423  return true;
4424  }
4425  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4426  {
4427  _c4dbgp("byte order mark: UTF16BE");
4428  _handle_bom(UTF16BE);
4429  _line_progressed(2);
4430  return true;
4431  }
4432  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4433  {
4434  _c4dbgp("byte order mark: UTF16LE");
4435  _handle_bom(UTF16LE);
4436  _line_progressed(2);
4437  return true;
4438  }
4439  else if(rem.begins_with("\xef\xbb\xbf"))
4440  {
4441  _c4dbgp("byte order mark: UTF8");
4442  _handle_bom(UTF8);
4443  _line_progressed(3);
4444  return true;
4445  }
4446  #undef _rymlisascii
4447  }
4448  return false;
4449 }
4450 
4451 template<class EventHandler>
4452 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4453 {
4454  if(m_encoding == NOBOM)
4455  {
4456  const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
4457  if(enc == UTF8 || is_beginning_of_file)
4458  m_encoding = enc;
4459  else
4460  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4461  }
4462  else if(enc != m_encoding)
4463  {
4464  _c4err("byte order mark can only be set once");
4465  }
4466 }
4467 
4468 
4469 //-----------------------------------------------------------------------------
4470 
4471 template<class EventHandler>
4472 void ParseEngine<EventHandler>::_handle_seq_json()
4473 {
4474 seqjson_start:
4475  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4476 
4477  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4478  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4479  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4480  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4481  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4482 
4483  _handle_flow_skip_whitespace();
4484  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4485  if(!rem.len)
4486  goto seqjson_again;
4487 
4488  if(has_any(RVAL))
4489  {
4490  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4491  const char first = rem.str[0];
4492  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4493  switch(first)
4494  {
4495  case '"':
4496  {
4497  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4498  ScannedScalar sc = _scan_scalar_dquot();
4499  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4500  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4501  addrem_flags(RNXT, RVAL);
4502  break;
4503  }
4504  case '[':
4505  {
4506  _c4dbgp("seqjson[RVAL]: start child seqjson");
4507  addrem_flags(RNXT, RVAL);
4508  m_evt_handler->begin_seq_val_flow();
4509  addrem_flags(RVAL, RNXT);
4510  _line_progressed(1);
4511  break;
4512  }
4513  case '{':
4514  {
4515  _c4dbgp("seqjson[RVAL]: start child mapjson");
4516  addrem_flags(RNXT, RVAL);
4517  m_evt_handler->begin_map_val_flow();
4518  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4519  _line_progressed(1);
4520  goto seqjson_finish;
4521  }
4522  case ']': // this happens on a trailing comma like ", ]"
4523  {
4524  _c4dbgp("seqjson[RVAL]: end!");
4525  rem_flags(RSEQ);
4526  m_evt_handler->end_seq();
4527  _line_progressed(1);
4528  if(!has_all(RSEQ|FLOW))
4529  goto seqjson_finish;
4530  break;
4531  }
4532  default:
4533  {
4534  ScannedScalar sc;
4535  if(_scan_scalar_seq_json(&sc))
4536  {
4537  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4538  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4539  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4540  addrem_flags(RNXT, RVAL);
4541  }
4542  else
4543  {
4544  _c4err("parse error");
4545  }
4546  }
4547  }
4548  }
4549  else // RNXT
4550  {
4551  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4552  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4553  const char first = rem.str[0];
4554  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4555  switch(first)
4556  {
4557  case ',':
4558  {
4559  _c4dbgp("seqjson[RNXT]: expect next val");
4560  addrem_flags(RVAL, RNXT);
4561  m_evt_handler->add_sibling();
4562  _line_progressed(1);
4563  break;
4564  }
4565  case ']':
4566  {
4567  _c4dbgp("seqjson[RNXT]: end!");
4568  m_evt_handler->end_seq();
4569  _line_progressed(1);
4570  goto seqjson_finish;
4571  }
4572  default:
4573  _c4err("parse error");
4574  }
4575  }
4576 
4577  seqjson_again:
4578  _c4dbgt("seqjson: go again", 0);
4579  if(_finished_line())
4580  {
4581  if(C4_LIKELY(!_finished_file()))
4582  {
4583  _line_ended();
4584  _scan_line();
4585  _c4dbgnextline();
4586  }
4587  else
4588  {
4589  _c4err("missing terminating ]");
4590  }
4591  }
4592  goto seqjson_start;
4593 
4594  seqjson_finish:
4595  _c4dbgp("seqjson: finish");
4596 }
4597 
4598 
4599 //-----------------------------------------------------------------------------
4600 
4601 template<class EventHandler>
4602 void ParseEngine<EventHandler>::_handle_map_json()
4603 {
4604 mapjson_start:
4605  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4606 
4607  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4608  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4609  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4610  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4611  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4612 
4613  _handle_flow_skip_whitespace();
4614  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4615  if(!rem.len)
4616  goto mapjson_again;
4617 
4618  if(has_any(RKEY))
4619  {
4620  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4621  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4622  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4623  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4624  const char first = rem.str[0];
4625  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4626  switch(first)
4627  {
4628  case '"':
4629  {
4630  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4631  ScannedScalar sc = _scan_scalar_dquot();
4632  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4633  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4634  addrem_flags(RKCL, RKEY);
4635  break;
4636  }
4637  case '}': // this happens on a trailing comma like ", }"
4638  {
4639  _c4dbgp("mapjson[RKEY]: end!");
4640  m_evt_handler->end_map();
4641  _line_progressed(1);
4642  goto mapjson_finish;
4643  }
4644  default:
4645  _c4err("parse error");
4646  }
4647  }
4648  else if(has_any(RVAL))
4649  {
4650  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4651  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4652  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4653  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4654  const char first = rem.str[0];
4655  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4656  switch(first)
4657  {
4658  case '"':
4659  {
4660  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4661  ScannedScalar sc = _scan_scalar_dquot();
4662  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4663  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4664  addrem_flags(RNXT, RVAL);
4665  break;
4666  }
4667  case '[':
4668  {
4669  _c4dbgp("mapjson[RVAL]: start val seqjson");
4670  addrem_flags(RNXT, RVAL);
4671  m_evt_handler->begin_seq_val_flow();
4672  _set_indentation(m_evt_handler->m_parent->indref);
4673  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4674  _line_progressed(1);
4675  goto mapjson_finish;
4676  }
4677  case '{':
4678  {
4679  _c4dbgp("mapjson[RVAL]: start val mapjson");
4680  addrem_flags(RNXT, RVAL);
4681  m_evt_handler->begin_map_val_flow();
4682  _set_indentation(m_evt_handler->m_parent->indref);
4683  addrem_flags(RKEY, RNXT);
4684  _line_progressed(1);
4685  // keep going in this function
4686  break;
4687  }
4688  default:
4689  {
4690  ScannedScalar sc;
4691  if(_scan_scalar_map_json(&sc))
4692  {
4693  _c4dbgp("mapjson[RVAL]: plain scalar.");
4694  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4695  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4696  addrem_flags(RNXT, RVAL);
4697  }
4698  else
4699  {
4700  _c4err("parse error");
4701  }
4702  break;
4703  }
4704  }
4705  }
4706  else if(has_any(RKCL)) // read the key colon
4707  {
4708  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4709  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4710  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4711  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4712  const char first = rem.str[0];
4713  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4714  if(first == ':')
4715  {
4716  _c4dbgp("mapjson[RKCL]: found the colon");
4717  addrem_flags(RVAL, RKCL);
4718  _line_progressed(1);
4719  }
4720  else
4721  {
4722  _c4err("parse error");
4723  }
4724  }
4725  else if(has_any(RNXT))
4726  {
4727  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4728  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4729  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4730  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4731  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4732  if(rem.begins_with(','))
4733  {
4734  _c4dbgp("mapjson[RNXT]: expect next keyval");
4735  m_evt_handler->add_sibling();
4736  addrem_flags(RKEY, RNXT);
4737  _line_progressed(1);
4738  }
4739  else if(rem.begins_with('}'))
4740  {
4741  _c4dbgp("mapjson[RNXT]: end!");
4742  m_evt_handler->end_map();
4743  _line_progressed(1);
4744  goto mapjson_finish;
4745  }
4746  else
4747  {
4748  _c4err("parse error");
4749  }
4750  }
4751 
4752  mapjson_again:
4753  _c4dbgt("mapjson: go again", 0);
4754  if(_finished_line())
4755  {
4756  if(C4_LIKELY(!_finished_file()))
4757  {
4758  _line_ended();
4759  _scan_line();
4760  _c4dbgnextline();
4761  }
4762  else
4763  {
4764  _c4err("missing terminating }");
4765  }
4766  }
4767  goto mapjson_start;
4768 
4769  mapjson_finish:
4770  _c4dbgp("mapjson: finish");
4771 }
4772 
4773 
4774 //-----------------------------------------------------------------------------
4775 
4776 template<class EventHandler>
4777 void ParseEngine<EventHandler>::_handle_seq_imap()
4778 {
4779 seqimap_start:
4780  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4781 
4782  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4783  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4784  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4785  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4786  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4787 
4788  _handle_flow_skip_whitespace();
4789  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4790  if(!rem.len)
4791  goto seqimap_again;
4792 
4793  if(has_any(RVAL))
4794  {
4795  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4796  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4797  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4798  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4799  const char first = rem.str[0];
4800  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4801  ScannedScalar sc;
4802  if(first == '\'')
4803  {
4804  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4805  sc = _scan_scalar_squot();
4806  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4807  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4808  m_evt_handler->end_map();
4809  goto seqimap_finish;
4810  }
4811  else if(first == '"')
4812  {
4813  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4814  sc = _scan_scalar_dquot();
4815  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4816  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4817  m_evt_handler->end_map();
4818  goto seqimap_finish;
4819  }
4820  // block scalars (ie | and >) cannot appear in flow containers
4821  else if(_scan_scalar_plain_map_flow(&sc))
4822  {
4823  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4824  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4825  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4826  m_evt_handler->end_map();
4827  goto seqimap_finish;
4828  }
4829  else if(first == '[')
4830  {
4831  _c4dbgp("seqimap[RVAL]: start child seqflow");
4832  addrem_flags(RNXT, RVAL);
4833  m_evt_handler->begin_seq_val_flow();
4834  addrem_flags(RVAL, RNXT|RSEQIMAP);
4835  _set_indentation(m_evt_handler->m_parent->indref);
4836  _line_progressed(1);
4837  goto seqimap_finish;
4838  }
4839  else if(first == '{')
4840  {
4841  _c4dbgp("seqimap[RVAL]: start child mapflow");
4842  addrem_flags(RNXT, RVAL);
4843  m_evt_handler->begin_map_val_flow();
4844  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4845  _set_indentation(m_evt_handler->m_parent->indref);
4846  _line_progressed(1);
4847  goto seqimap_finish;
4848  }
4849  else if(first == ',' || first == ']')
4850  {
4851  _c4dbgp("seqimap[RVAL]: finish without val.");
4852  m_evt_handler->set_val_scalar_plain_empty();
4853  m_evt_handler->end_map();
4854  goto seqimap_finish;
4855  }
4856  else if(first == '&')
4857  {
4858  csubstr anchor = _scan_anchor();
4859  _c4dbgp("seqimap[RVAL]: anchor!");
4860  m_evt_handler->set_val_anchor(anchor);
4861  }
4862  else if(first == '*')
4863  {
4864  csubstr ref = _scan_ref_seq();
4865  _c4dbgp("seqimap[RVAL]: ref!");
4866  m_evt_handler->set_val_ref(ref);
4867  addrem_flags(RNXT, RVAL);
4868  }
4869  else
4870  {
4871  _c4err("parse error");
4872  }
4873  }
4874  else if(has_any(RNXT))
4875  {
4876  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4877  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4878  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4879  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4880  const char first = rem.str[0];
4881  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4882  if(first == ',' || first == ']')
4883  {
4884  // we may get here because a map or a seq started and we
4885  // return later
4886  _c4dbgp("seqimap: done");
4887  m_evt_handler->end_map();
4888  goto seqimap_finish;
4889  }
4890  else
4891  {
4892  _c4err("parse error");
4893  }
4894  }
4895  else if(has_any(QMRK))
4896  {
4897  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4898  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4899  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4900  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4901  const char first = rem.str[0];
4902  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4903  ScannedScalar sc;
4904  if(first == '\'')
4905  {
4906  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4907  sc = _scan_scalar_squot();
4908  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4909  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4910  addrem_flags(RKCL, QMRK);
4911  goto seqimap_again;
4912  }
4913  else if(first == '"')
4914  {
4915  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4916  sc = _scan_scalar_dquot();
4917  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4918  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4919  addrem_flags(RKCL, QMRK);
4920  goto seqimap_again;
4921  }
4922  // block scalars (ie | and >) cannot appear in flow containers
4923  else if(_scan_scalar_plain_map_flow(&sc))
4924  {
4925  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4926  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4927  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4928  addrem_flags(RKCL, QMRK);
4929  goto seqimap_again;
4930  }
4931  else if(first == '[')
4932  {
4933  _c4dbgp("seqimap[QMRK]: start child seqflow");
4934  addrem_flags(RKCL, QMRK);
4935  m_evt_handler->begin_seq_key_flow();
4936  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4937  _set_indentation(m_evt_handler->m_parent->indref);
4938  _line_progressed(1);
4939  goto seqimap_finish;
4940  }
4941  else if(first == '{')
4942  {
4943  _c4dbgp("seqimap[QMRK]: start child mapflow");
4944  addrem_flags(RKCL, QMRK);
4945  m_evt_handler->begin_map_key_flow();
4946  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4947  _set_indentation(m_evt_handler->m_parent->indref);
4948  _line_progressed(1);
4949  goto seqimap_finish;
4950  }
4951  else if(first == ',' || first == ']')
4952  {
4953  _c4dbgp("seqimap[QMRK]: finish without key.");
4954  m_evt_handler->set_key_scalar_plain_empty();
4955  m_evt_handler->set_val_scalar_plain_empty();
4956  m_evt_handler->end_map();
4957  goto seqimap_finish;
4958  }
4959  else if(first == '&')
4960  {
4961  csubstr anchor = _scan_anchor();
4962  _c4dbgp("seqimap[QMRK]: anchor!");
4963  m_evt_handler->set_key_anchor(anchor);
4964  }
4965  else if(first == '*')
4966  {
4967  csubstr ref = _scan_ref_seq();
4968  _c4dbgp("seqimap[QMRK]: ref!");
4969  m_evt_handler->set_key_ref(ref);
4970  addrem_flags(RKCL, QMRK);
4971  }
4972  else
4973  {
4974  _c4err("parse error");
4975  }
4976  }
4977  else if(has_any(RKCL))
4978  {
4979  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4980  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4981  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4982  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4983  const char first = rem.str[0];
4984  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4985  if(first == ':')
4986  {
4987  _c4dbgp("seqimap[RKCL]: found ':'");
4988  addrem_flags(RVAL, RKCL);
4989  _line_progressed(1);
4990  goto seqimap_again;
4991  }
4992  else if(first == ',' || first == ']')
4993  {
4994  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
4995  m_evt_handler->set_val_scalar_plain_empty();
4996  m_evt_handler->end_map();
4997  goto seqimap_finish;
4998  }
4999  else
5000  {
5001  _c4err("parse error");
5002  }
5003  }
5004 
5005  seqimap_again:
5006  _c4dbgt("seqimap: go again", 0);
5007  if(_finished_line())
5008  {
5009  if(C4_LIKELY(!_finished_file()))
5010  {
5011  _line_ended();
5012  _scan_line();
5013  _c4dbgnextline();
5014  }
5015  else
5016  {
5017  _c4err("parse error");
5018  }
5019  }
5020  goto seqimap_start;
5021 
5022  seqimap_finish:
5023  _c4dbgp("seqimap: finish");
5024 }
5025 
5026 
5027 //-----------------------------------------------------------------------------
5028 
5029 template<class EventHandler>
5030 void ParseEngine<EventHandler>::_handle_seq_flow()
5031 {
5032 seqflow_start:
5033  _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5034 
5035  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5036  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5037  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5038  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5039  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
5040  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
5041 
5042  _handle_flow_skip_whitespace();
5043  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
5044  if(!m_evt_handler->m_curr->line_contents.rem.len)
5045  goto seqflow_again;
5046 
5047  if(has_any(RVAL))
5048  {
5049  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5050  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5051  ScannedScalar sc;
5052  if(first == '\'')
5053  {
5054  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5055  sc = _scan_scalar_squot();
5056  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5057  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5058  addrem_flags(RNXT, RVAL);
5059  }
5060  else if(first == '"')
5061  {
5062  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5063  sc = _scan_scalar_dquot();
5064  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5065  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5066  addrem_flags(RNXT, RVAL);
5067  }
5068  // block scalars (ie | and >) cannot appear in flow containers
5069  else if(_scan_scalar_plain_seq_flow(&sc))
5070  {
5071  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5072  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5073  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5074  addrem_flags(RNXT, RVAL);
5075  }
5076  else if(first == '[')
5077  {
5078  _c4dbgp("seqflow[RVAL]: start child seqflow");
5079  addrem_flags(RNXT, RVAL);
5080  m_evt_handler->begin_seq_val_flow();
5081  _set_indentation(m_evt_handler->m_parent->indref);
5082  addrem_flags(RVAL, RNXT);
5083  _line_progressed(1);
5084  }
5085  else if(first == '{')
5086  {
5087  _c4dbgp("seqflow[RVAL]: start child mapflow");
5088  addrem_flags(RNXT, RVAL);
5089  m_evt_handler->begin_map_val_flow();
5090  _set_indentation(m_evt_handler->m_parent->indref);
5091  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5092  _line_progressed(1);
5093  goto seqflow_finish;
5094  }
5095  else if(first == ']') // this happens on a trailing comma like ", ]"
5096  {
5097  _c4dbgp("seqflow[RVAL]: end!");
5098  _line_progressed(1);
5099  m_evt_handler->end_seq();
5100  goto seqflow_finish;
5101  }
5102  else if(first == '*')
5103  {
5104  csubstr ref = _scan_ref_seq();
5105  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5106  m_evt_handler->set_val_ref(ref);
5107  addrem_flags(RNXT, RVAL);
5108  }
5109  else if(first == '&')
5110  {
5111  csubstr anchor = _scan_anchor();
5112  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5113  m_evt_handler->set_val_anchor(anchor);
5114  if(_maybe_scan_following_comma())
5115  {
5116  _c4dbgp("seqflow[RVAL]: empty scalar!");
5117  m_evt_handler->set_val_scalar_plain_empty();
5118  m_evt_handler->add_sibling();
5119  }
5120  }
5121  else if(first == '!')
5122  {
5123  csubstr tag = _scan_tag();
5124  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5125  _check_tag(tag);
5126  m_evt_handler->set_val_tag(tag);
5127  if(_maybe_scan_following_comma())
5128  {
5129  _c4dbgp("seqflow[RVAL]: empty scalar!");
5130  m_evt_handler->set_val_scalar_plain_empty();
5131  m_evt_handler->add_sibling();
5132  }
5133  }
5134  else if(first == ':')
5135  {
5136  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5137  addrem_flags(RNXT, RVAL);
5138  m_evt_handler->begin_map_val_flow();
5139  _set_indentation(m_evt_handler->m_parent->indref);
5140  m_evt_handler->set_key_scalar_plain_empty();
5141  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5142  _line_progressed(1);
5143  goto seqflow_finish;
5144  }
5145  else if(first == '?')
5146  {
5147  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5148  addrem_flags(RNXT, RVAL);
5149  m_was_inside_qmrk = true;
5150  m_evt_handler->begin_map_val_flow();
5151  _set_indentation(m_evt_handler->m_parent->indref);
5152  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5153  _line_progressed(1);
5154  _maybe_skip_whitespace_tokens();
5155  goto seqflow_finish;
5156  }
5157  else
5158  {
5159  _c4err("parse error");
5160  }
5161  }
5162  else // RNXT
5163  {
5164  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5165  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5166  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5167  if(first == ',')
5168  {
5169  _c4dbgp("seqflow[RNXT]: expect next val");
5170  addrem_flags(RVAL, RNXT);
5171  m_evt_handler->add_sibling();
5172  _line_progressed(1);
5173  }
5174  else if(first == ']')
5175  {
5176  _c4dbgp("seqflow[RNXT]: end!");
5177  m_evt_handler->end_seq();
5178  _line_progressed(1);
5179  goto seqflow_finish;
5180  }
5181  else if(first == ':')
5182  {
5183  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5184  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5185  _set_indentation(m_evt_handler->m_parent->indref);
5186  _line_progressed(1);
5187  addrem_flags(RSEQIMAP|RVAL, RNXT);
5188  goto seqflow_finish;
5189  }
5190  else
5191  {
5192  _c4err("parse error");
5193  }
5194  }
5195 
5196  seqflow_again:
5197  _c4dbgt("seqflow: go again", 0);
5198  if(_finished_line())
5199  {
5200  if(C4_LIKELY(!_finished_file()))
5201  {
5202  _line_ended();
5203  _scan_line();
5204  _c4dbgnextline();
5205  }
5206  else
5207  {
5208  _c4err("missing terminating ]");
5209  }
5210  }
5211  goto seqflow_start;
5212 
5213  seqflow_finish:
5214  _c4dbgp("seqflow: finish");
5215 }
5216 
5217 
5218 //-----------------------------------------------------------------------------
5219 
5220 template<class EventHandler>
5221 void ParseEngine<EventHandler>::_handle_map_flow()
5222 {
5223 mapflow_start:
5224  _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5225 
5226  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5227  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5228  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5229  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5230 
5231  _handle_flow_skip_whitespace();
5232  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5233  if(!rem.len)
5234  goto mapflow_again;
5235 
5236  if(has_any(RKEY))
5237  {
5238  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5239  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5240  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5241  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5242  const char first = rem.str[0];
5243  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5244  ScannedScalar sc;
5245  if(first == '\'')
5246  {
5247  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5248  sc = _scan_scalar_squot();
5249  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5250  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5251  addrem_flags(RKCL, RKEY|QMRK);
5252  }
5253  else if(first == '"')
5254  {
5255  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5256  sc = _scan_scalar_dquot();
5257  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5258  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5259  addrem_flags(RKCL, RKEY|QMRK);
5260  }
5261  // block scalars (ie | and >) cannot appear in flow containers
5262  else if(_scan_scalar_plain_map_flow(&sc))
5263  {
5264  _c4dbgp("mapflow[RKEY]: plain scalar");
5265  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5266  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5267  addrem_flags(RKCL, RKEY|QMRK);
5268  }
5269  else if(first == '?')
5270  {
5271  _c4dbgp("mapflow[RKEY]: explicit key");
5272  _line_progressed(1);
5273  addrem_flags(QMRK, RKEY);
5274  _maybe_skip_whitespace_tokens();
5275  }
5276  else if(first == ':')
5277  {
5278  _c4dbgp("mapflow[RKEY]: setting empty key");
5279  m_evt_handler->set_key_scalar_plain_empty();
5280  addrem_flags(RVAL, RKEY|QMRK);
5281  _line_progressed(1);
5282  _maybe_skip_whitespace_tokens();
5283  }
5284  else if(first == ',')
5285  {
5286  _c4dbgp("mapflow[RKEY]: empty key+val!");
5287  m_evt_handler->set_key_scalar_plain_empty();
5288  m_evt_handler->set_val_scalar_plain_empty();
5289  addrem_flags(RNXT, RKEY|QMRK);
5290  // keep going in this function
5291  }
5292  else if(first == '}') // this happens on a trailing comma like ", }"
5293  {
5294  _c4dbgp("mapflow[RKEY]: end!");
5295  m_evt_handler->end_map();
5296  _line_progressed(1);
5297  goto mapflow_finish;
5298  }
5299  else if(first == '&')
5300  {
5301  csubstr anchor = _scan_anchor();
5302  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5303  m_evt_handler->set_key_anchor(anchor);
5304  }
5305  else if(first == '*')
5306  {
5307  csubstr ref = _scan_ref_map();
5308  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5309  m_evt_handler->set_key_ref(ref);
5310  addrem_flags(RKCL, RKEY);
5311  }
5312  else if(first == '[')
5313  {
5314  // RYML's tree cannot store container keys, but that's
5315  // handled inside the tree sink. Other sink types may be
5316  // able to handle it.
5317  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5318  addrem_flags(RKCL, RKEY);
5319  m_evt_handler->begin_seq_key_flow();
5320  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5321  _set_indentation(m_evt_handler->m_parent->indref);
5322  _line_progressed(1);
5323  goto mapflow_finish;
5324  }
5325  else if(first == '{')
5326  {
5327  // RYML's tree cannot store container keys, but that's
5328  // handled inside the tree sink. Other sink types may be
5329  // able to handle it.
5330  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5331  addrem_flags(RKCL, RKEY);
5332  m_evt_handler->begin_map_key_flow();
5333  addrem_flags(RKEY, RVAL|RKCL);
5334  _set_indentation(m_evt_handler->m_parent->indref);
5335  _line_progressed(1);
5336  // keep going in this function
5337  }
5338  else if(first == '!')
5339  {
5340  csubstr tag = _scan_tag();
5341  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5342  _check_tag(tag);
5343  m_evt_handler->set_key_tag(tag);
5344  }
5345  else
5346  {
5347  _c4err("parse error");
5348  }
5349  }
5350  else if(has_any(RKCL)) // read the key colon
5351  {
5352  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5353  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5354  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5355  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5356  const char first = rem.str[0];
5357  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5358  if(first == ':')
5359  {
5360  _c4dbgp("mapflow[RKCL]: found the colon");
5361  addrem_flags(RVAL, RKCL);
5362  _line_progressed(1);
5363  }
5364  else if(first == '}')
5365  {
5366  _c4dbgp("mapflow[RKCL]: end with missing val!");
5367  addrem_flags(RVAL, RKCL);
5368  m_evt_handler->set_val_scalar_plain_empty();
5369  m_evt_handler->end_map();
5370  _line_progressed(1);
5371  goto mapflow_finish;
5372  }
5373  else if(first == ',')
5374  {
5375  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5376  m_evt_handler->set_val_scalar_plain_empty();
5377  m_evt_handler->add_sibling();
5378  addrem_flags(RKEY, RKCL);
5379  _line_progressed(1);
5380  }
5381  else
5382  {
5383  _c4err("parse error");
5384  }
5385  }
5386  else if(has_any(RVAL))
5387  {
5388  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5389  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5390  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5391  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5392  const char first = rem.str[0];
5393  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5394  ScannedScalar sc;
5395  if(first == '\'')
5396  {
5397  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5398  sc = _scan_scalar_squot();
5399  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5400  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5401  addrem_flags(RNXT, RVAL);
5402  }
5403  else if(first == '"')
5404  {
5405  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5406  sc = _scan_scalar_dquot();
5407  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5408  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5409  addrem_flags(RNXT, RVAL);
5410  }
5411  // block scalars (ie | and >) cannot appear in flow containers
5412  else if(_scan_scalar_plain_map_flow(&sc))
5413  {
5414  _c4dbgp("mapflow[RVAL]: plain scalar.");
5415  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5416  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5417  addrem_flags(RNXT, RVAL);
5418  }
5419  else if(first == '[')
5420  {
5421  _c4dbgp("mapflow[RVAL]: start val seqflow");
5422  addrem_flags(RNXT, RVAL);
5423  m_evt_handler->begin_seq_val_flow();
5424  _set_indentation(m_evt_handler->m_parent->indref);
5425  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5426  _line_progressed(1);
5427  goto mapflow_finish;
5428  }
5429  else if(first == '{')
5430  {
5431  _c4dbgp("mapflow[RVAL]: start val mapflow");
5432  addrem_flags(RNXT, RVAL);
5433  m_evt_handler->begin_map_val_flow();
5434  _set_indentation(m_evt_handler->m_parent->indref);
5435  addrem_flags(RKEY, RNXT);
5436  _line_progressed(1);
5437  // keep going in this function
5438  }
5439  else if(first == '}')
5440  {
5441  _c4dbgp("mapflow[RVAL]: end!");
5442  m_evt_handler->set_val_scalar_plain_empty();
5443  m_evt_handler->end_map();
5444  _line_progressed(1);
5445  goto mapflow_finish;
5446  }
5447  else if(first == ',')
5448  {
5449  _c4dbgp("mapflow[RVAL]: empty val!");
5450  m_evt_handler->set_val_scalar_plain_empty();
5451  addrem_flags(RNXT, RVAL);
5452  // keep going in this function
5453  }
5454  else if(first == '*')
5455  {
5456  csubstr ref = _scan_ref_map();
5457  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5458  m_evt_handler->set_val_ref(ref);
5459  addrem_flags(RNXT, RVAL);
5460  }
5461  else if(first == '&')
5462  {
5463  csubstr anchor = _scan_anchor();
5464  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5465  m_evt_handler->set_val_anchor(anchor);
5466  }
5467  else if(first == '!')
5468  {
5469  csubstr tag = _scan_tag();
5470  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5471  _check_tag(tag);
5472  m_evt_handler->set_val_tag(tag);
5473  }
5474  else
5475  {
5476  _c4err("parse error");
5477  }
5478  }
5479  else if(has_any(RNXT))
5480  {
5481  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5482  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5483  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5484  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5485  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5486  if(rem.begins_with(','))
5487  {
5488  _c4dbgp("mapflow[RNXT]: expect next keyval");
5489  m_evt_handler->add_sibling();
5490  addrem_flags(RKEY, RNXT);
5491  _line_progressed(1);
5492  }
5493  else if(rem.begins_with('}'))
5494  {
5495  _c4dbgp("mapflow[RNXT]: end!");
5496  m_evt_handler->end_map();
5497  _line_progressed(1);
5498  goto mapflow_finish;
5499  }
5500  else
5501  {
5502  _c4err("parse error");
5503  }
5504  }
5505  else if(has_any(QMRK))
5506  {
5507  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5508  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5509  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5510  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5511  const char first = rem.str[0];
5512  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5513  ScannedScalar sc;
5514  if(first == '\'')
5515  {
5516  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5517  sc = _scan_scalar_squot();
5518  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5519  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5520  addrem_flags(RKCL, QMRK);
5521  }
5522  else if(first == '"')
5523  {
5524  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5525  sc = _scan_scalar_dquot();
5526  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5527  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5528  addrem_flags(RKCL, QMRK);
5529  }
5530  // block scalars (ie | and >) cannot appear in flow containers
5531  else if(_scan_scalar_plain_map_flow(&sc))
5532  {
5533  _c4dbgp("mapflow[QMRK]: plain scalar");
5534  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5535  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5536  addrem_flags(RKCL, QMRK);
5537  }
5538  else if(first == ':')
5539  {
5540  _c4dbgp("mapflow[QMRK]: setting empty key");
5541  m_evt_handler->set_key_scalar_plain_empty();
5542  addrem_flags(RVAL, QMRK);
5543  _line_progressed(1);
5544  _maybe_skip_whitespace_tokens();
5545  }
5546  else if(first == '}') // this happens on a trailing comma like ", }"
5547  {
5548  _c4dbgp("mapflow[QMRK]: end!");
5549  m_evt_handler->set_key_scalar_plain_empty();
5550  m_evt_handler->set_val_scalar_plain_empty();
5551  m_evt_handler->end_map();
5552  _line_progressed(1);
5553  goto mapflow_finish;
5554  }
5555  else if(first == ',')
5556  {
5557  _c4dbgp("mapflow[QMRK]: empty key+val!");
5558  m_evt_handler->set_key_scalar_plain_empty();
5559  m_evt_handler->set_val_scalar_plain_empty();
5560  addrem_flags(RNXT, QMRK);
5561  }
5562  else if(first == '&')
5563  {
5564  csubstr anchor = _scan_anchor();
5565  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5566  m_evt_handler->set_key_anchor(anchor);
5567  }
5568  else if(first == '*')
5569  {
5570  csubstr ref = _scan_ref_map();
5571  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5572  m_evt_handler->set_key_ref(ref);
5573  addrem_flags(RKCL, QMRK);
5574  }
5575  else if(first == '[')
5576  {
5577  // RYML's tree cannot store container keys, but that's
5578  // handled inside the tree sink. Other sink types may be
5579  // able to handle it.
5580  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5581  addrem_flags(RKCL, QMRK);
5582  m_evt_handler->begin_seq_key_flow();
5583  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5584  _set_indentation(m_evt_handler->m_parent->indref);
5585  _line_progressed(1);
5586  goto mapflow_finish;
5587  }
5588  else if(first == '{')
5589  {
5590  // RYML's tree cannot store container keys, but that's
5591  // handled inside the tree sink. Other sink types may be
5592  // able to handle it.
5593  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5594  addrem_flags(RKCL, QMRK);
5595  m_evt_handler->begin_map_key_flow();
5596  _set_indentation(m_evt_handler->m_parent->indref);
5597  addrem_flags(RKEY, RKCL);
5598  _line_progressed(1);
5599  // keep going in this function
5600  }
5601  else if(first == '!')
5602  {
5603  csubstr tag = _scan_tag();
5604  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5605  _check_tag(tag);
5606  m_evt_handler->set_key_tag(tag);
5607  }
5608  else
5609  {
5610  _c4err("parse error");
5611  }
5612  }
5613 
5614  mapflow_again:
5615  _c4dbgt("mapflow: go again", 0);
5616  if(_finished_line())
5617  {
5618  if(C4_LIKELY(!_finished_file()))
5619  {
5620  _line_ended();
5621  _scan_line();
5622  _c4dbgnextline();
5623  }
5624  else
5625  {
5626  _c4err("missing terminating }");
5627  }
5628  }
5629  goto mapflow_start;
5630 
5631  mapflow_finish:
5632  _c4dbgp("mapflow: finish");
5633 }
5634 
5635 
5636 //-----------------------------------------------------------------------------
5637 
5638 template<class EventHandler>
5639 void ParseEngine<EventHandler>::_handle_seq_block()
5640 {
5641 seqblck_start:
5642  _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5643 
5644  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5645  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5646  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5647  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5648 
5649  _maybe_skip_comment();
5650  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5651  if(!rem.len)
5652  goto seqblck_again;
5653 
5654  if(has_any(RVAL))
5655  {
5656  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5657  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5658  if(m_evt_handler->m_curr->at_line_beginning())
5659  {
5660  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5661  if(m_evt_handler->m_curr->indentation_ge())
5662  {
5663  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5664  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5665  rem = m_evt_handler->m_curr->line_contents.rem;
5666  if(!rem.len)
5667  goto seqblck_again;
5668  }
5669  else if(m_evt_handler->m_curr->indentation_lt())
5670  {
5671  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5672  _handle_indentation_pop_from_block_seq();
5673  goto seqblck_finish;
5674  }
5675  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5676  {
5677  _c4dbgp("seqblck[RVAL]: empty line!");
5678  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5679  goto seqblck_again;
5680  }
5681  }
5682  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5683  else
5684  {
5685  // accomodate annotation on the previous line. eg:
5686  // - &elm
5687  // foo # <-- on this line
5688  // - &elm
5689  // &foo foo: bar # <-- on this line
5690  if(rem.str[0] == ' ')
5691  {
5692  if(_handle_indentation_from_annotations())
5693  {
5694  _c4dbgp("seqblck[RVAL]: annotations!");
5695  rem = m_evt_handler->m_curr->line_contents.rem;
5696  if(!rem.len)
5697  goto seqblck_again;
5698  }
5699  }
5700  }
5701  #endif
5702  _RYML_CB_ASSERT(callbacks(), rem.len);
5703  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5704  const char first = rem.str[0];
5705  const size_t startline = m_evt_handler->m_curr->pos.line;
5706  // warning: the gcc optimizer on x86 builds is brittle with
5707  // this function:
5708  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5709  ScannedScalar sc;
5710  if(first == '\'')
5711  {
5712  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5713  sc = _scan_scalar_squot();
5714  if(!_maybe_scan_following_colon())
5715  {
5716  _c4dbgp("seqblck[RVAL]: set as val");
5717  _handle_annotations_before_blck_val_scalar();
5718  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5719  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5720  addrem_flags(RNXT, RVAL);
5721  }
5722  else
5723  {
5724  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5725  addrem_flags(RNXT, RVAL);
5726  _handle_annotations_before_start_mapblck(startline);
5727  _handle_colon();
5728  m_evt_handler->begin_map_val_block();
5729  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5730  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5731  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5732  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5733  _maybe_skip_whitespace_tokens();
5734  goto seqblck_finish;
5735  }
5736  }
5737  else if(first == '"')
5738  {
5739  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5740  sc = _scan_scalar_dquot();
5741  if(!_maybe_scan_following_colon())
5742  {
5743  _c4dbgp("seqblck[RVAL]: set as val");
5744  _handle_annotations_before_blck_val_scalar();
5745  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5746  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5747  addrem_flags(RNXT, RVAL);
5748  }
5749  else
5750  {
5751  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5752  addrem_flags(RNXT, RVAL);
5753  _handle_annotations_before_start_mapblck(startline);
5754  _handle_colon();
5755  m_evt_handler->begin_map_val_block();
5756  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5757  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5758  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5759  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5760  _maybe_skip_whitespace_tokens();
5761  goto seqblck_finish;
5762  }
5763  }
5764  // block scalars can only appear as keys when in QMRK scope
5765  // (ie, after ? tokens), so no need to scan following colon in
5766  // here.
5767  else if(first == '|')
5768  {
5769  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5770  ScannedBlock sb;
5771  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5772  _handle_annotations_before_blck_val_scalar();
5773  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5774  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5775  addrem_flags(RNXT, RVAL);
5776  }
5777  else if(first == '>')
5778  {
5779  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5780  ScannedBlock sb;
5781  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5782  _handle_annotations_before_blck_val_scalar();
5783  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5784  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5785  addrem_flags(RNXT, RVAL);
5786  }
5787  else if(_scan_scalar_plain_seq_blck(&sc))
5788  {
5789  _c4dbgp("seqblck[RVAL]: plain scalar.");
5790  if(!_maybe_scan_following_colon())
5791  {
5792  _c4dbgp("seqblck[RVAL]: set as val");
5793  _handle_annotations_before_blck_val_scalar();
5794  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
5795  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5796  addrem_flags(RNXT, RVAL);
5797  }
5798  else
5799  {
5800  if(startindent > m_evt_handler->m_curr->indref)
5801  {
5802  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5803  addrem_flags(RNXT, RVAL);
5804  _handle_annotations_before_start_mapblck(startline);
5805  _handle_colon();
5806  m_evt_handler->begin_map_val_block();
5807  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5808  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5809  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5810  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5811  _maybe_skip_whitespace_tokens();
5812  goto seqblck_finish;
5813  }
5814  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
5815  {
5816  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5817  m_evt_handler->set_val_scalar_plain_empty();
5818  m_evt_handler->end_seq();
5819  m_evt_handler->add_sibling();
5820  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5821  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5822  addrem_flags(RVAL, RNXT|RKEY);
5823  _maybe_skip_whitespace_tokens();
5824  goto seqblck_finish;
5825  }
5826  else
5827  {
5828  _c4err("parse error");
5829  }
5830  }
5831  }
5832  else if(first == '[')
5833  {
5834  _c4dbgp("seqblck[RVAL]: start child seqflow");
5835  addrem_flags(RNXT, RVAL);
5836  _handle_annotations_before_blck_val_scalar();
5837  m_evt_handler->begin_seq_val_flow();
5838  addrem_flags(FLOW|RVAL, BLCK|RNXT);
5839  _line_progressed(1);
5840  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5841  goto seqblck_finish;
5842  }
5843  else if(first == '{')
5844  {
5845  _c4dbgp("seqblck[RVAL]: start child mapflow");
5846  addrem_flags(RNXT, RVAL);
5847  _handle_annotations_before_blck_val_scalar();
5848  m_evt_handler->begin_map_val_flow();
5849  addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
5850  _line_progressed(1);
5851  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5852  goto seqblck_finish;
5853  }
5854  else if(first == '-')
5855  {
5856  if(startindent == m_evt_handler->m_curr->indref)
5857  {
5858  _c4dbgp("seqblck[RVAL]: prev val was empty");
5859  _handle_annotations_before_blck_val_scalar();
5860  m_evt_handler->set_val_scalar_plain_empty();
5861  // keep in RVAL, but for the next sibling
5862  m_evt_handler->add_sibling();
5863  }
5864  else
5865  {
5866  _c4dbgp("seqblck[RVAL]: start child seqblck");
5867  _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5868  addrem_flags(RNXT, RVAL);
5869  _handle_annotations_before_blck_val_scalar();
5870  m_evt_handler->begin_seq_val_block();
5871  addrem_flags(RVAL, RNXT);
5872  _save_indentation();
5873  // keep going on inside this function
5874  }
5875  _line_progressed(1);
5876  _maybe_skip_whitespace_tokens();
5877  }
5878  else if(first == ':')
5879  {
5880  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5881  addrem_flags(RNXT, RVAL);
5882  _handle_annotations_before_start_mapblck(startline);
5883  _handle_colon();
5884  m_evt_handler->begin_map_val_block();
5885  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5886  m_evt_handler->set_key_scalar_plain_empty();
5887  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5888  _line_progressed(1);
5889  _maybe_skip_whitespace_tokens();
5890  goto seqblck_finish;
5891  }
5892  else if(first == '&')
5893  {
5894  const csubstr anchor = _scan_anchor();
5895  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5896  // we need to buffer the anchors, as there may be two
5897  // consecutive anchors in here
5898  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5899  }
5900  else if(first == '*')
5901  {
5902  csubstr ref = _scan_ref_seq();
5903  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5904  if(!_maybe_scan_following_colon())
5905  {
5906  _c4dbgp("seqblck[RVAL]: set ref as val!");
5907  _handle_annotations_before_blck_val_scalar();
5908  m_evt_handler->set_val_ref(ref);
5909  addrem_flags(RNXT, RVAL);
5910  }
5911  else
5912  {
5913  _c4dbgp("seqblck[RVAL]: ref is key of map");
5914  addrem_flags(RNXT, RVAL);
5915  _handle_annotations_before_start_mapblck(startline);
5916  m_evt_handler->begin_map_val_block();
5917  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5918  m_evt_handler->set_key_ref(ref);
5919  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5920  _set_indentation(startindent);
5921  _maybe_skip_whitespace_tokens();
5922  goto seqblck_finish;
5923  }
5924  }
5925  else if(first == '!')
5926  {
5927  csubstr tag = _scan_tag();
5928  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5929  // we need to buffer the tags, as there may be two
5930  // consecutive tags in here
5931  _add_annotation(&m_pending_tags, tag, startindent, startline);
5932  }
5933  else if(first == '?')
5934  {
5935  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5936  addrem_flags(RNXT, RVAL);
5937  m_was_inside_qmrk = true;
5938  m_evt_handler->begin_map_val_block();
5939  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5940  _save_indentation();
5941  _line_progressed(1);
5942  _maybe_skip_whitespace_tokens();
5943  goto seqblck_finish;
5944  }
5945  else
5946  {
5947  _c4err("parse error");
5948  }
5949  }
5950  else // RNXT
5951  {
5952  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5953  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5954  //
5955  // handle indentation
5956  //
5957  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5958  if(C4_LIKELY(_at_line_begin()))
5959  {
5960  _c4dbgp("seqblck[RNXT]: at line begin");
5961  if(m_evt_handler->m_curr->indentation_ge())
5962  {
5963  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5964  _line_progressed(m_evt_handler->m_curr->indref);
5965  _maybe_skip_whitespace_tokens();
5966  rem = m_evt_handler->m_curr->line_contents.rem;
5967  if(!rem.len)
5968  goto seqblck_again;
5969  }
5970  else if(m_evt_handler->m_curr->indentation_lt())
5971  {
5972  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5973  _handle_indentation_pop_from_block_seq();
5974  if(has_all(RSEQ|BLCK))
5975  {
5976  _c4dbgp("seqblck[RNXT]: still seqblck!");
5977  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5978  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5979  rem = m_evt_handler->m_curr->line_contents.rem;
5980  if(!rem.len)
5981  goto seqblck_again;
5982  }
5983  else
5984  {
5985  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5986  goto seqblck_finish;
5987  }
5988  }
5989  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5990  {
5991  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5992  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5993  rem = m_evt_handler->m_curr->line_contents.rem;
5994  if(!rem.len)
5995  goto seqblck_again;
5996  }
5997  }
5998  else
5999  {
6000  _c4dbgp("seqblck[RNXT]: NOT at line begin");
6001  if(!rem.begins_with_any(" \t"))
6002  {
6003  _c4err("parse error");
6004  }
6005  else
6006  {
6007  _skipchars(" \t");
6008  rem = m_evt_handler->m_curr->line_contents.rem;
6009  if(!rem.len)
6010  {
6011  _c4dbgp("seqblck[RNXT]: again");
6012  goto seqblck_again;
6013  }
6014  }
6015  }
6016  //
6017  // now handle the tokens
6018  //
6019  const char first = rem.str[0];
6020  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
6021  if(first == '-')
6022  {
6023  if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
6024  {
6025  _c4dbgp("seqblck[RNXT]: expect next val");
6026  addrem_flags(RVAL, RNXT);
6027  m_evt_handler->add_sibling();
6028  _line_progressed(1);
6029  _maybe_skip_whitespace_tokens();
6030  }
6031  else
6032  {
6033  _c4dbgp("seqblck[RNXT]: start doc");
6034  _start_doc_suddenly();
6035  _line_progressed(3);
6036  _maybe_skip_whitespace_tokens();
6037  goto seqblck_finish;
6038  }
6039  }
6040  else if(first == ':')
6041  {
6042  // This happens for example in `- [a: b]: c` (after
6043  // terminating the seq, ie, after `]`). All other cases
6044  // (ie colon after scalars) are caught elsewhere (ie, in
6045  // RVAL state).
6046  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6047  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
6048  {
6049  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6050  m_evt_handler->end_seq();
6051  goto seqblck_finish;
6052  }
6053  else
6054  {
6055  _c4err("parse error");
6056  }
6057  }
6058  else if(first == '.')
6059  {
6060  _c4dbgp("seqblck[RNXT]: maybe doc?");
6061  csubstr rs = rem.sub(1);
6062  if(rs == ".." || rs.begins_with(".. "))
6063  {
6064  _c4dbgp("seqblck[RNXT]: end+start doc");
6065  _end_doc_suddenly();
6066  _line_progressed(3);
6067  _maybe_skip_whitespace_tokens();
6068  goto seqblck_finish;
6069  }
6070  else
6071  {
6072  _c4err("parse error");
6073  }
6074  }
6075  else
6076  {
6077  // may be an indentless sequence nested in a map...
6078  //if(m_evt_handler->m_stack.size() >= 2)
6079  #ifdef RYML_DBG
6080  char flagbuf_[128];
6081  for(auto const& s : m_evt_handler->m_stack)
6082  {
6083  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6084  }
6085  #endif
6086  if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6087  {
6088  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6089  _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6090  _handle_indentation_pop(m_evt_handler->m_parent);
6091  _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
6092  m_evt_handler->add_sibling();
6093  addrem_flags(RKEY, RNXT);
6094  goto seqblck_finish;
6095  }
6096  else //if(first != '*')
6097  {
6098  _c4err("parse error");
6099  }
6100  }
6101  }
6102 
6103  seqblck_again:
6104  _c4dbgt("seqblck: go again", 0);
6105  if(_finished_line())
6106  {
6107  _line_ended();
6108  _scan_line();
6109  if(_finished_file())
6110  {
6111  _c4dbgp("seqblck: finish!");
6112  _end_seq_blck();
6113  goto seqblck_finish;
6114  }
6115  _c4dbgnextline();
6116  }
6117  goto seqblck_start;
6118 
6119  seqblck_finish:
6120  _c4dbgp("seqblck: finish");
6121 }
6122 
6123 
6124 //-----------------------------------------------------------------------------
6125 
6126 template<class EventHandler>
6127 void ParseEngine<EventHandler>::_handle_map_block()
6128 {
6129 mapblck_start:
6130  _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6131 
6132  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
6133  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
6134  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
6135  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
6136  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
6137 
6138  _maybe_skip_comment();
6139  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6140  if(!rem.len)
6141  goto mapblck_again;
6142 
6143  if(has_any(RKEY))
6144  {
6145  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6146  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6147  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6148  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6149  //
6150  // handle indentation
6151  //
6152  if(m_evt_handler->m_curr->at_line_beginning())
6153  {
6154  if(m_evt_handler->m_curr->indentation_eq())
6155  {
6156  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6157  _line_progressed(m_evt_handler->m_curr->indref);
6158  rem = m_evt_handler->m_curr->line_contents.rem;
6159  if(!rem.len)
6160  goto mapblck_again;
6161  }
6162  else if(m_evt_handler->m_curr->indentation_lt())
6163  {
6164  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6165  _handle_indentation_pop_from_block_map();
6166  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6167  if(has_all(RMAP|BLCK))
6168  {
6169  _c4dbgp("mapblck[RKEY]: still mapblck!");
6170  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
6171  rem = m_evt_handler->m_curr->line_contents.rem;
6172  if(!rem.len)
6173  goto mapblck_again;
6174  }
6175  else
6176  {
6177  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6178  goto mapblck_finish;
6179  }
6180  }
6181  else
6182  {
6183  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6184  _c4err("invalid indentation");
6185  }
6186  }
6187  //
6188  // now handle the tokens
6189  //
6190  const char first = rem.str[0];
6191  const size_t startline = m_evt_handler->m_curr->pos.line;
6192  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6193  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6194  ScannedScalar sc;
6195  if(first == '\'')
6196  {
6197  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6198  sc = _scan_scalar_squot();
6199  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6200  _handle_annotations_before_blck_key_scalar();
6201  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6202  addrem_flags(RVAL, RKEY);
6203  if(!_maybe_scan_following_colon())
6204  _c4err("could not find ':' colon after key");
6205  _maybe_skip_whitespace_tokens();
6206  }
6207  else if(first == '"')
6208  {
6209  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6210  sc = _scan_scalar_dquot();
6211  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6212  _handle_annotations_before_blck_key_scalar();
6213  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6214  addrem_flags(RVAL, RKEY);
6215  if(!_maybe_scan_following_colon())
6216  _c4err("could not find ':' colon after key");
6217  _maybe_skip_whitespace_tokens();
6218  }
6219  // block scalars (| and >) can not be used as keys unless they
6220  // appear in an explicit QMRK scope (ie, after the ? token),
6221  else if(C4_UNLIKELY(first == '|'))
6222  {
6223  _c4err("block literal keys must be enclosed in '?'");
6224  }
6225  else if(C4_UNLIKELY(first == '>'))
6226  {
6227  _c4err("block literal keys must be enclosed in '?'");
6228  }
6229  else if(_scan_scalar_plain_map_blck(&sc))
6230  {
6231  _c4dbgp("mapblck[RKEY]: plain scalar");
6232  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6233  _handle_annotations_before_blck_key_scalar();
6234  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6235  addrem_flags(RVAL, RKEY);
6236  if(!_maybe_scan_following_colon())
6237  _c4err("could not find ':' colon after key");
6238  _maybe_skip_whitespace_tokens();
6239  }
6240  else if(first == '?')
6241  {
6242  _c4dbgp("mapblck[RKEY]: key token!");
6243  addrem_flags(QMRK, RKEY);
6244  _line_progressed(1);
6245  _maybe_skip_whitespace_tokens();
6246  m_was_inside_qmrk = true;
6247  goto mapblck_again;
6248  }
6249  else if(first == ':')
6250  {
6251  _c4dbgp("mapblck[RKEY]: setting empty key");
6252  _handle_annotations_before_blck_key_scalar();
6253  m_evt_handler->set_key_scalar_plain_empty();
6254  addrem_flags(RVAL, RKEY);
6255  _line_progressed(1);
6256  _maybe_skip_whitespace_tokens();
6257  }
6258  else if(first == '*')
6259  {
6260  csubstr ref = _scan_ref_map();
6261  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6262  _handle_annotations_before_blck_key_scalar();
6263  m_evt_handler->set_key_ref(ref);
6264  addrem_flags(RVAL, RKEY);
6265  if(!_maybe_scan_following_colon())
6266  _c4err("could not find ':' colon after key");
6267  _maybe_skip_whitespace_tokens();
6268  }
6269  else if(first == '&')
6270  {
6271  csubstr anchor = _scan_anchor();
6272  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6273  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6274  }
6275  else if(first == '!')
6276  {
6277  csubstr tag = _scan_tag();
6278  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6279  _add_annotation(&m_pending_tags, tag, startindent, startline);
6280  }
6281  else if(first == '[')
6282  {
6283  // RYML's tree cannot store container keys, but that's
6284  // handled inside the tree handler. Other handlers may be
6285  // able to handle it.
6286  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6287  addrem_flags(RKCL, RKEY);
6288  _handle_annotations_before_blck_key_scalar();
6289  m_evt_handler->begin_seq_key_flow();
6290  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
6291  _line_progressed(1);
6292  _set_indentation(startindent);
6293  goto mapblck_finish;
6294  }
6295  else if(first == '{')
6296  {
6297  // RYML's tree cannot store container keys, but that's
6298  // handled inside the tree handler. Other handlers may be
6299  // able to handle it.
6300  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6301  addrem_flags(RKCL, RKEY);
6302  _handle_annotations_before_blck_key_scalar();
6303  m_evt_handler->begin_map_key_flow();
6304  addrem_flags(FLOW|RKEY, BLCK|RKCL);
6305  _line_progressed(1);
6306  _set_indentation(startindent);
6307  goto mapblck_finish;
6308  }
6309  else if(first == '-')
6310  {
6311  _c4dbgp("mapblck[RKEY]: maybe doc?");
6312  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6313  {
6314  _c4dbgp("mapblck[RKEY]: end+start doc");
6315  _start_doc_suddenly();
6316  _line_progressed(3);
6317  _maybe_skip_whitespace_tokens();
6318  goto mapblck_finish;
6319  }
6320  else
6321  {
6322  _c4err("parse error");
6323  }
6324  }
6325  else if(first == '.')
6326  {
6327  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6328  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6329  {
6330  _c4dbgp("mapblck[RKEY]: end doc");
6331  _end_doc_suddenly();
6332  _line_progressed(3);
6333  _maybe_skip_whitespace_tokens();
6334  goto mapblck_finish;
6335  }
6336  else
6337  {
6338  _c4err("parse error");
6339  }
6340  }
6342  else if(first == '\t')
6343  {
6344  _c4dbgp("mapblck[RKEY]: skip tabs");
6345  _maybe_skipchars('\t');
6346  })
6347  else
6348  {
6349  _c4err("parse error");
6350  }
6351  }
6352  else if(has_any(RKCL)) // read the key colon
6353  {
6354  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6355  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6356  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6357  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6358  //
6359  // handle indentation
6360  //
6361  if(m_evt_handler->m_curr->at_line_beginning())
6362  {
6363  if(m_evt_handler->m_curr->indentation_eq())
6364  {
6365  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6366  _line_progressed(m_evt_handler->m_curr->indref);
6367  rem = m_evt_handler->m_curr->line_contents.rem;
6368  if(!rem.len)
6369  goto mapblck_again;
6370  }
6371  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6372  {
6373  _c4err("invalid indentation");
6374  }
6375  }
6376  const char first = rem.str[0];
6377  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6378  if(first == ':')
6379  {
6380  _c4dbgp("mapblck[RKCL]: found the colon");
6381  addrem_flags(RVAL, RKCL);
6382  _line_progressed(1);
6383  _maybe_skip_whitespace_tokens();
6384  }
6385  else if(first == '?')
6386  {
6387  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6388  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6389  m_evt_handler->set_val_scalar_plain_empty();
6390  m_evt_handler->add_sibling();
6391  addrem_flags(QMRK, RKCL);
6392  _line_progressed(1);
6393  _maybe_skip_whitespace_tokens();
6394  }
6395  else if(first == '-')
6396  {
6397  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6398  {
6399  _c4dbgp("mapblck[RKCL]: end+start doc");
6400  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6401  _start_doc_suddenly();
6402  _line_progressed(3);
6403  _maybe_skip_whitespace_tokens();
6404  goto mapblck_finish;
6405  }
6406  else
6407  {
6408  _c4err("parse error");
6409  }
6410  }
6411  else if(first == '.')
6412  {
6413  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6414  csubstr rs = rem.sub(1);
6415  if(rs == ".." || rs.begins_with(".. "))
6416  {
6417  _c4dbgp("mapblck[RKCL]: end+start doc");
6418  _end_doc_suddenly();
6419  _line_progressed(3);
6420  goto mapblck_finish;
6421  }
6422  else
6423  {
6424  _c4err("parse error");
6425  }
6426  }
6427  else if(m_was_inside_qmrk)
6428  {
6429  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6430  _c4dbgp("mapblck[RKCL]: missing :");
6431  m_evt_handler->set_val_scalar_plain_empty();
6432  m_evt_handler->add_sibling();
6433  m_was_inside_qmrk = false;
6434  addrem_flags(RKEY, RKCL);
6435  }
6436  else
6437  {
6438  _c4err("parse error");
6439  }
6440  }
6441  else if(has_any(RVAL))
6442  {
6443  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6444  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6445  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6446  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6447  //
6448  // handle indentation
6449  //
6450  if(m_evt_handler->m_curr->at_line_beginning())
6451  {
6452  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6453  m_evt_handler->m_curr->more_indented = false;
6454  if(m_evt_handler->m_curr->indref == npos)
6455  {
6456  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6457  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6458  _line_progressed(m_evt_handler->m_curr->indref);
6459  rem = m_evt_handler->m_curr->line_contents.rem;
6460  if(!rem.len)
6461  goto mapblck_again;
6462  }
6463  else if(m_evt_handler->m_curr->indentation_eq())
6464  {
6465  _c4dbgp("mapblck[RVAL]: skip indentation!");
6466  _line_progressed(m_evt_handler->m_curr->indref);
6467  rem = m_evt_handler->m_curr->line_contents.rem;
6468  if(!rem.len)
6469  goto mapblck_again;
6470  // TODO: this is valid:
6471  //
6472  // ```yaml
6473  // a:
6474  // b:
6475  // ---
6476  // a:
6477  // b
6478  // ---
6479  // a:
6480  // b: c
6481  // ```
6482  //
6483  // ... but this is not:
6484  //
6485  // ```yaml
6486  // a:
6487  // v
6488  // ---
6489  // a: b: c
6490  // ```
6491  //
6492  // here, we probably need to set a boolean on the state
6493  // to disambiguate between these cases.
6494  }
6495  else if(m_evt_handler->m_curr->indentation_gt())
6496  {
6497  _c4dbgp("mapblck[RVAL]: more indented!");
6498  m_evt_handler->m_curr->more_indented = true;
6499  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6500  rem = m_evt_handler->m_curr->line_contents.rem;
6501  if(!rem.len)
6502  goto mapblck_again;
6503  }
6504  else if(m_evt_handler->m_curr->indentation_lt())
6505  {
6506  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6507  _handle_indentation_pop_from_block_map();
6508  if(has_all(RMAP|BLCK))
6509  {
6510  _c4dbgp("mapblck[RVAL]: still mapblck!");
6511  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6512  if(has_any(RNXT))
6513  {
6514  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6515  m_evt_handler->add_sibling();
6516  addrem_flags(RKEY, RNXT);
6517  }
6518  goto mapblck_again;
6519  }
6520  else
6521  {
6522  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6523  goto mapblck_finish;
6524  }
6525  }
6526  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6527  {
6528  _c4dbgp("mapblck[RVAL]: empty line!");
6529  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6530  goto mapblck_again;
6531  }
6532  }
6533  //
6534  // now handle the tokens
6535  //
6536  const char first = rem.str[0];
6537  const size_t startline = m_evt_handler->m_curr->pos.line;
6538  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6539  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6540  ScannedScalar sc;
6541  if(first == '\'')
6542  {
6543  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6544  sc = _scan_scalar_squot();
6545  if(!_maybe_scan_following_colon())
6546  {
6547  _c4dbgp("mapblck[RVAL]: set as val");
6548  _handle_annotations_before_blck_val_scalar();
6549  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6550  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6551  addrem_flags(RNXT, RVAL);
6552  }
6553  else
6554  {
6555  if(startindent != m_evt_handler->m_curr->indref)
6556  {
6557  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6558  _handle_annotations_before_start_mapblck(startline);
6559  addrem_flags(RNXT, RVAL);
6560  _handle_colon();
6561  m_evt_handler->begin_map_val_block();
6562  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6563  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6564  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6565  _maybe_skip_whitespace_tokens();
6566  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6567  // keep the child state on RVAL
6568  addrem_flags(RVAL, RNXT);
6569  }
6570  else
6571  {
6572  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6573  m_evt_handler->set_val_scalar_plain_empty();
6574  m_evt_handler->add_sibling();
6575  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6576  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6577  // keep going on RVAL
6578  _maybe_skip_whitespace_tokens();
6579  }
6580  }
6581  }
6582  else if(first == '"')
6583  {
6584  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6585  sc = _scan_scalar_dquot();
6586  if(!_maybe_scan_following_colon())
6587  {
6588  _c4dbgp("mapblck[RVAL]: set as val");
6589  _handle_annotations_before_blck_val_scalar();
6590  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6591  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6592  addrem_flags(RNXT, RVAL);
6593  }
6594  else
6595  {
6596  if(startindent != m_evt_handler->m_curr->indref)
6597  {
6598  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6599  _handle_annotations_before_start_mapblck(startline);
6600  addrem_flags(RNXT, RVAL);
6601  _handle_colon();
6602  m_evt_handler->begin_map_val_block();
6603  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6604  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6605  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6606  _maybe_skip_whitespace_tokens();
6607  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6608  // keep the child state on RVAL
6609  addrem_flags(RVAL, RNXT);
6610  }
6611  else
6612  {
6613  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6614  m_evt_handler->set_val_scalar_plain_empty();
6615  m_evt_handler->add_sibling();
6616  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6617  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6618  // keep going on RVAL
6619  _maybe_skip_whitespace_tokens();
6620  }
6621  }
6622  }
6623  // block scalars can only appear as keys when in QMRK scope
6624  // (ie, after ? tokens), so no need to scan following colon
6625  else if(first == '|')
6626  {
6627  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6628  ScannedBlock sb;
6629  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6630  _handle_annotations_before_blck_val_scalar();
6631  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6632  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6633  addrem_flags(RNXT, RVAL);
6634  }
6635  else if(first == '>')
6636  {
6637  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6638  ScannedBlock sb;
6639  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6640  _handle_annotations_before_blck_val_scalar();
6641  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6642  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6643  addrem_flags(RNXT, RVAL);
6644  }
6645  else if(_scan_scalar_plain_map_blck(&sc))
6646  {
6647  _c4dbgp("mapblck[RVAL]: plain scalar.");
6648  if(!_maybe_scan_following_colon())
6649  {
6650  _c4dbgp("mapblck[RVAL]: set as val");
6651  _handle_annotations_before_blck_val_scalar();
6652  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6653  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6654  addrem_flags(RNXT, RVAL);
6655  }
6656  else
6657  {
6658  if(startindent != m_evt_handler->m_curr->indref)
6659  {
6660  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6661  addrem_flags(RNXT, RVAL);
6662  _handle_annotations_before_start_mapblck(startline);
6663  _handle_colon();
6664  m_evt_handler->begin_map_val_block();
6665  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6666  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6667  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6668  _maybe_skip_whitespace_tokens();
6669  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6670  // keep the child state on RVAL
6671  addrem_flags(RVAL, RNXT);
6672  }
6673  else
6674  {
6675  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6676  _handle_annotations_before_blck_val_scalar();
6677  m_evt_handler->set_val_scalar_plain_empty();
6678  m_evt_handler->add_sibling();
6679  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6680  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6681  // keep going on RVAL
6682  _maybe_skip_whitespace_tokens();
6683  }
6684  }
6685  }
6686  else if(first == '-')
6687  {
6688  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6689  {
6690  _c4dbgp("mapblck[RVAL]: start val seqblck");
6691  addrem_flags(RNXT, RVAL);
6692  _handle_annotations_before_blck_val_scalar();
6693  m_evt_handler->begin_seq_val_block();
6694  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6695  _set_indentation(startindent);
6696  _line_progressed(1);
6697  _maybe_skip_whitespace_tokens();
6698  goto mapblck_finish;
6699  }
6700  else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6701  {
6702  _c4dbgp("mapblck[RVAL]: end+start doc");
6703  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6704  _start_doc_suddenly();
6705  _line_progressed(3);
6706  _maybe_skip_whitespace_tokens();
6707  goto mapblck_finish;
6708  }
6709  else
6710  {
6711  _c4err("parse error");
6712  }
6713  }
6714  else if(first == '[')
6715  {
6716  _c4dbgp("mapblck[RVAL]: start val seqflow");
6717  addrem_flags(RNXT, RVAL);
6718  _handle_annotations_before_blck_val_scalar();
6719  m_evt_handler->begin_seq_val_flow();
6720  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT);
6721  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6722  _line_progressed(1);
6723  goto mapblck_finish;
6724  }
6725  else if(first == '{')
6726  {
6727  _c4dbgp("mapblck[RVAL]: start val mapflow");
6728  addrem_flags(RNXT, RVAL);
6729  _handle_annotations_before_blck_val_scalar();
6730  m_evt_handler->begin_map_val_flow();
6731  addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
6732  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6733  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6734  _line_progressed(1);
6735  goto mapblck_finish;
6736  }
6737  else if(first == '*')
6738  {
6739  csubstr ref = _scan_ref_map();
6740  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6741  if(startindent == m_evt_handler->m_curr->indref)
6742  {
6743  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6744  m_evt_handler->set_val_ref(ref);
6745  addrem_flags(RNXT, RVAL);
6746  }
6747  else
6748  {
6749  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6750  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6751  if(_maybe_scan_following_colon())
6752  {
6753  _c4dbgp("mapblck[RVAL]: start child map, block");
6754  addrem_flags(RNXT, RVAL);
6755  _handle_annotations_before_blck_val_scalar();
6756  m_evt_handler->begin_map_val_block();
6757  m_evt_handler->set_key_ref(ref);
6758  _set_indentation(startindent);
6759  // keep going in RVAL
6760  addrem_flags(RVAL, RNXT);
6761  }
6762  else
6763  {
6764  _c4dbgp("mapblck[RVAL]: was val ref");
6765  _handle_annotations_before_blck_val_scalar();
6766  m_evt_handler->set_val_ref(ref);
6767  addrem_flags(RNXT, RVAL);
6768  }
6769  }
6770  _maybe_skip_whitespace_tokens();
6771  }
6772  else if(first == '&')
6773  {
6774  csubstr anchor = _scan_anchor();
6775  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6776  if(startindent == m_evt_handler->m_curr->indref)
6777  {
6778  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6779  m_evt_handler->set_val_scalar_plain_empty();
6780  m_evt_handler->add_sibling();
6781  addrem_flags(RKEY, RVAL);
6782  }
6783  // we need to buffer the anchors, as there may be two
6784  // consecutive anchors in here
6785  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6786  }
6787  else if(first == '!')
6788  {
6789  csubstr tag = _scan_tag();
6790  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6791  if(startindent == m_evt_handler->m_curr->indref)
6792  {
6793  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6794  _handle_annotations_before_blck_val_scalar();
6795  m_evt_handler->set_val_scalar_plain_empty();
6796  m_evt_handler->add_sibling();
6797  addrem_flags(RKEY, RVAL);
6798  }
6799  // we need to buffer the tags, as there may be two
6800  // consecutive tags in here
6801  _add_annotation(&m_pending_tags, tag, startindent, startline);
6802  }
6803  else if(first == '?')
6804  {
6805  if(startindent == m_evt_handler->m_curr->indref)
6806  {
6807  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6808  _handle_annotations_before_blck_val_scalar();
6809  m_evt_handler->set_val_scalar_plain_empty();
6810  m_evt_handler->add_sibling();
6811  addrem_flags(QMRK, RVAL);
6812  }
6813  else if(startindent > m_evt_handler->m_curr->indref)
6814  {
6815  _c4dbgp("mapblck[RVAL]: start val mapblck");
6816  addrem_flags(RNXT, RVAL);
6817  _handle_annotations_before_blck_val_scalar();
6818  m_evt_handler->begin_map_val_block();
6819  addrem_flags(QMRK|BLCK, RNXT);
6820  _set_indentation(startindent);
6821  }
6822  else
6823  {
6824  _c4err("parse error");
6825  }
6826  m_was_inside_qmrk = true;
6827  _line_progressed(1);
6828  _maybe_skip_whitespace_tokens();
6829  goto mapblck_again;
6830  }
6831  else if(first == ':')
6832  {
6833  if(startindent == m_evt_handler->m_curr->indref)
6834  {
6835  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6836  m_evt_handler->set_val_scalar_plain_empty();
6837  m_evt_handler->add_sibling();
6838  m_evt_handler->set_key_scalar_plain_empty();
6839  }
6840  else if(startindent > m_evt_handler->m_curr->indref)
6841  {
6842  _c4dbgp("mapblck[RVAL]: start val mapblck");
6843  addrem_flags(RNXT, RVAL);
6844  _handle_annotations_before_start_mapblck(startline);
6845  _handle_colon();
6846  m_evt_handler->begin_map_val_block();
6847  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6848  m_evt_handler->set_key_scalar_plain_empty();
6849  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6850  // keep the child state on RVAL
6851  addrem_flags(RVAL, RNXT);
6852  }
6853  else
6854  {
6855  _c4err("parse error");
6856  }
6857  _line_progressed(1);
6858  _maybe_skip_whitespace_tokens();
6859  goto mapblck_again;
6860  }
6861  else if(first == '.')
6862  {
6863  _c4dbgp("mapblck[RVAL]: maybe doc?");
6864  csubstr rs = rem.sub(1);
6865  if(rs == ".." || rs.begins_with(".. "))
6866  {
6867  _c4dbgp("seqblck[RVAL]: end doc expl");
6868  _end_doc_suddenly();
6869  _line_progressed(3);
6870  _maybe_skip_whitespace_tokens();
6871  goto mapblck_finish;
6872  }
6873  else
6874  {
6875  _c4err("parse error");
6876  }
6877  }
6879  else if(first == '\t')
6880  {
6881  _c4dbgp("mapblck[RVAL]: skip tabs");
6882  _maybe_skipchars('\t');
6883  })
6884  else
6885  {
6886  _c4err("parse error");
6887  }
6888  }
6889  else if(has_any(RNXT))
6890  {
6891  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6892  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6893  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6894  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6895  //
6896  // handle indentation
6897  //
6898  if(m_evt_handler->m_curr->at_line_beginning())
6899  {
6900  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6901  if(m_evt_handler->m_curr->indentation_eq())
6902  {
6903  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6904  _line_progressed(m_evt_handler->m_curr->indref);
6905  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6906  m_evt_handler->add_sibling();
6907  addrem_flags(RKEY, RNXT);
6908  goto mapblck_again;
6909  }
6910  else if(m_evt_handler->m_curr->indentation_lt())
6911  {
6912  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6913  _handle_indentation_pop_from_block_map();
6914  if(has_all(RMAP|BLCK))
6915  {
6916  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6917  if(!has_any(RKCL))
6918  {
6919  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6920  m_evt_handler->add_sibling();
6921  addrem_flags(RKEY, RNXT);
6922  }
6923  goto mapblck_again;
6924  }
6925  else
6926  {
6927  goto mapblck_finish;
6928  }
6929  }
6930  }
6931  else
6932  {
6933  _c4dbgp("mapblck[RNXT]: NOT at line begin");
6934  if(!rem.begins_with_any(" \t"))
6935  {
6936  _c4err("parse error");
6937  }
6938  else
6939  {
6940  _skipchars(" \t");
6941  rem = m_evt_handler->m_curr->line_contents.rem;
6942  if(!rem.len)
6943  {
6944  _c4dbgp("seqblck[RNXT]: again");
6945  goto mapblck_again;
6946  }
6947  }
6948  }
6949  //
6950  // handle tokens
6951  //
6952  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6953  const char first = rem.str[0];
6954  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6955  if(first == ':')
6956  {
6957  if(m_evt_handler->m_curr->more_indented)
6958  {
6959  _c4dbgp("mapblck[RNXT]: start child block map");
6960  C4_NOT_IMPLEMENTED();
6961  //m_evt_handler->actually_as_block_map();
6962  _line_progressed(1);
6963  _set_indentation(m_evt_handler->m_curr->scalar_col);
6964  m_evt_handler->m_curr->more_indented = false;
6965  goto mapblck_again;
6966  }
6967  else
6968  {
6969  _c4err("parse error");
6970  }
6971  }
6972  else if(first == ' ')
6973  {
6974  _c4dbgp("mapblck[RNXT]: skip spaces");
6975  _maybe_skip_whitespace_tokens();
6976  }
6977  else
6978  {
6979  _c4err("parse error");
6980  }
6981  }
6982  else if(has_any(QMRK))
6983  {
6984  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6985  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6986  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6987  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6988  //
6989  // handle indentation
6990  //
6991  if(m_evt_handler->m_curr->at_line_beginning())
6992  {
6993  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
6994  if(m_evt_handler->m_curr->indentation_eq())
6995  {
6996  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6997  _line_progressed(m_evt_handler->m_curr->indref);
6998  rem = m_evt_handler->m_curr->line_contents.rem;
6999  if(!rem.len)
7000  goto mapblck_again;
7001  }
7002  else if(m_evt_handler->m_curr->indentation_lt())
7003  {
7004  _c4dbgp("mapblck[QMRK]: smaller indentation!");
7005  _handle_indentation_pop_from_block_map();
7006  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7007  if(has_all(RMAP|BLCK))
7008  {
7009  _c4dbgp("mapblck[QMRK]: still mapblck!");
7010  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
7011  rem = m_evt_handler->m_curr->line_contents.rem;
7012  if(!rem.len)
7013  goto mapblck_again;
7014  }
7015  else
7016  {
7017  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7018  goto mapblck_finish;
7019  }
7020  }
7021  // indentation can be larger in QMRK state
7022  else
7023  {
7024  _c4dbgp("mapblck[QMRK]: larger indentation !");
7025  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7026  rem = m_evt_handler->m_curr->line_contents.rem;
7027  if(!rem.len)
7028  goto mapblck_again;
7029  }
7030  }
7031  //
7032  // now handle the tokens
7033  //
7034  const char first = rem.str[0];
7035  const size_t startline = m_evt_handler->m_curr->pos.line;
7036  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7037  _c4dbgpf("mapblck[QMRK]: '{}'", first);
7038  ScannedScalar sc;
7039  if(first == '\'')
7040  {
7041  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7042  sc = _scan_scalar_squot();
7043  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7044  if(!_maybe_scan_following_colon())
7045  {
7046  _c4dbgp("mapblck[QMRK]: set as key");
7047  _handle_annotations_before_blck_key_scalar();
7048  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7049  addrem_flags(RKCL, QMRK);
7050  }
7051  else
7052  {
7053  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7054  addrem_flags(RKCL, QMRK);
7055  _handle_annotations_before_start_mapblck_as_key();
7056  m_evt_handler->begin_map_key_block();
7057  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7058  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7059  _maybe_skip_whitespace_tokens();
7060  _set_indentation(startindent);
7061  // keep the child state on RVAL
7062  addrem_flags(RVAL, RKCL|QMRK);
7063  }
7064  }
7065  else if(first == '"')
7066  {
7067  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7068  sc = _scan_scalar_dquot();
7069  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7070  if(!_maybe_scan_following_colon())
7071  {
7072  _c4dbgp("mapblck[QMRK]: set as key");
7073  _handle_annotations_before_blck_key_scalar();
7074  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7075  addrem_flags(RKCL, QMRK);
7076  }
7077  else
7078  {
7079  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7080  addrem_flags(RKCL, QMRK);
7081  _handle_annotations_before_start_mapblck_as_key();
7082  m_evt_handler->begin_map_key_block();
7083  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7084  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7085  _maybe_skip_whitespace_tokens();
7086  _set_indentation(startindent);
7087  // keep the child state on RVAL
7088  addrem_flags(RVAL, RKCL|QMRK);
7089  }
7090  }
7091  else if(first == '|')
7092  {
7093  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7094  ScannedBlock sb;
7095  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7096  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7097  _handle_annotations_before_blck_key_scalar();
7098  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7099  addrem_flags(RKCL, QMRK);
7100  }
7101  else if(first == '>')
7102  {
7103  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7104  ScannedBlock sb;
7105  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7106  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7107  _handle_annotations_before_blck_key_scalar();
7108  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7109  addrem_flags(RKCL, QMRK);
7110  }
7111  else if(_scan_scalar_plain_map_blck(&sc))
7112  {
7113  _c4dbgp("mapblck[QMRK]: plain scalar");
7114  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7115  if(!_maybe_scan_following_colon())
7116  {
7117  _c4dbgp("mapblck[QMRK]: set as key");
7118  _handle_annotations_before_blck_key_scalar();
7119  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7120  addrem_flags(RKCL, QMRK);
7121  }
7122  else
7123  {
7124  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7125  addrem_flags(RKCL, QMRK);
7126  _handle_annotations_before_start_mapblck_as_key();
7127  m_evt_handler->begin_map_key_block();
7128  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7129  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7130  _maybe_skip_whitespace_tokens();
7131  _set_indentation(startindent);
7132  // keep the child state on RVAL
7133  addrem_flags(RVAL, RKCL|QMRK);
7134  }
7135  }
7136  else if(first == ':')
7137  {
7138  if(startindent == m_evt_handler->m_curr->indref)
7139  {
7140  _c4dbgp("mapblck[QMRK]: empty key");
7141  addrem_flags(RVAL, QMRK);
7142  _handle_annotations_before_blck_key_scalar();
7143  m_evt_handler->set_key_scalar_plain_empty();
7144  _line_progressed(1);
7145  _maybe_skip_whitespace_tokens();
7146  }
7147  else
7148  {
7149  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7150  addrem_flags(RKCL, QMRK);
7151  _handle_annotations_before_start_mapblck_as_key();
7152  m_evt_handler->begin_map_key_block();
7153  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7154  m_evt_handler->set_key_scalar_plain_empty();
7155  _line_progressed(1);
7156  _maybe_skip_whitespace_tokens();
7157  _set_indentation(startindent);
7158  // keep the child state on RVAL
7159  addrem_flags(RVAL, RKCL|QMRK);
7160  }
7161  }
7162  else if(first == '*')
7163  {
7164  csubstr ref = _scan_ref_map();
7165  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7166  if(!_maybe_scan_following_colon())
7167  {
7168  _c4dbgp("mapblck[QMRK]: set ref as key");
7169  _handle_annotations_before_blck_key_scalar();
7170  m_evt_handler->set_key_ref(ref);
7171  addrem_flags(RKCL, QMRK);
7172  }
7173  else
7174  {
7175  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7176  addrem_flags(RKCL, QMRK);
7177  _handle_annotations_before_blck_key_scalar();
7178  m_evt_handler->begin_map_key_block();
7179  m_evt_handler->set_key_ref(ref);
7180  _set_indentation(startindent);
7181  // keep the child state on RVAL
7182  addrem_flags(RVAL, RKCL|QMRK);
7183  }
7184  _maybe_skip_whitespace_tokens();
7185  }
7186  else if(first == '&')
7187  {
7188  csubstr anchor = _scan_anchor();
7189  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7190  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7191  }
7192  else if(first == '!')
7193  {
7194  csubstr tag = _scan_tag();
7195  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7196  _add_annotation(&m_pending_tags, tag, startindent, startline);
7197  }
7198  else if(first == '-')
7199  {
7200  _c4dbgp("mapblck[QMRK]: maybe doc?");
7201  csubstr rs = rem.sub(1);
7202  if(rs == "--" || rs.begins_with("-- "))
7203  {
7204  _c4dbgp("mapblck[QMRK]: end+start doc");
7205  _start_doc_suddenly();
7206  _line_progressed(3);
7207  }
7208  else
7209  {
7210  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7211  addrem_flags(RKCL, RKEY|QMRK);
7212  _handle_annotations_before_blck_key_scalar();
7213  m_evt_handler->begin_seq_key_block();
7214  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
7215  _set_indentation(startindent);
7216  _line_progressed(1);
7217  }
7218  _maybe_skip_whitespace_tokens();
7219  goto mapblck_finish;
7220  }
7221  else if(first == '[')
7222  {
7223  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7224  addrem_flags(RKCL, RKEY|QMRK);
7225  m_evt_handler->begin_seq_key_flow();
7226  addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
7227  _set_indentation(m_evt_handler->m_parent->indref);
7228  _line_progressed(1);
7229  goto mapblck_finish;
7230  }
7231  else if(first == '{')
7232  {
7233  _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
7234  addrem_flags(RKCL, RKEY|QMRK);
7235  m_evt_handler->begin_map_key_flow();
7236  addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
7237  _set_indentation(m_evt_handler->m_parent->indref);
7238  _line_progressed(1);
7239  goto mapblck_finish;
7240  }
7241  else if(first == '?')
7242  {
7243  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7244  m_evt_handler->set_key_scalar_plain_empty();
7245  m_evt_handler->set_val_scalar_plain_empty();
7246  m_evt_handler->add_sibling();
7247  _line_progressed(1);
7248  }
7249  else if(first == '.')
7250  {
7251  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7252  csubstr rs = rem.sub(1);
7253  if(rs == ".." || rs.begins_with(".. "))
7254  {
7255  _c4dbgp("mapblck[QMRK]: end+start doc");
7256  _end_doc_suddenly();
7257  _line_progressed(3);
7258  goto mapblck_finish;
7259  }
7260  else
7261  {
7262  _c4err("parse error");
7263  }
7264  }
7265  else
7266  {
7267  _c4err("parse error");
7268  }
7269  }
7270 
7271  mapblck_again:
7272  _c4dbgt("mapblck: again", 0);
7273  if(_finished_line())
7274  {
7275  _line_ended();
7276  _scan_line();
7277  if(_finished_file())
7278  {
7279  _c4dbgp("mapblck: file finished!");
7280  _end_map_blck();
7281  goto mapblck_finish;
7282  }
7283  _c4dbgnextline();
7284  }
7285  goto mapblck_start;
7286 
7287  mapblck_finish:
7288  _c4dbgp("mapblck: finish");
7289 }
7290 
7291 
7292 //-----------------------------------------------------------------------------
7293 
7294 template<class EventHandler>
7295 void ParseEngine<EventHandler>::_handle_unk_json()
7296 {
7297  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7298 
7299  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7300  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7301 
7302  _maybe_skip_comment();
7303  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7304  if(!rem.len)
7305  return;
7306 
7307  size_t pos = rem.first_not_of(" \t");
7308  if(pos)
7309  {
7310  pos = pos != npos ? pos : rem.len;
7311  _c4dbgpf("skipping indentation of {}", pos);
7312  _line_progressed(pos);
7313  rem = m_evt_handler->m_curr->line_contents.rem;
7314  if(!rem.len)
7315  return;
7316  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7317  }
7318 
7319  if(rem.begins_with('['))
7320  {
7321  _c4dbgp("it's a seq");
7322  m_evt_handler->check_trailing_doc_token();
7323  _maybe_begin_doc();
7324  m_evt_handler->begin_seq_val_flow();
7325  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7326  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7327  m_doc_empty = false;
7328  _line_progressed(1);
7329  }
7330  else if(rem.begins_with('{'))
7331  {
7332  _c4dbgp("it's a map");
7333  m_evt_handler->check_trailing_doc_token();
7334  _maybe_begin_doc();
7335  m_evt_handler->begin_map_val_flow();
7336  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7337  m_doc_empty = false;
7338  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7339  _line_progressed(1);
7340  }
7341  else if(_handle_bom())
7342  {
7343  _c4dbgp("byte order mark");
7344  }
7345  else
7346  {
7347  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7348  _maybe_skip_whitespace_tokens();
7349  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7350  if(!s.len)
7351  return;
7352  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7353  const char first = s.str[0];
7354  ScannedScalar sc;
7355  if(first == '"')
7356  {
7357  _c4dbgp("runk_json: scanning double-quoted scalar");
7358  m_evt_handler->check_trailing_doc_token();
7359  _maybe_begin_doc();
7360  add_flags(RDOC);
7361  m_doc_empty = false;
7362  sc = _scan_scalar_dquot();
7363  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7364  if(!_maybe_scan_following_colon())
7365  {
7366  _c4dbgp("runk_json: set as val");
7367  _handle_annotations_before_blck_val_scalar();
7368  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7369  }
7370  else
7371  {
7372  _c4err("parse error");
7373  }
7374  }
7375  else if(_scan_scalar_plain_unk(&sc))
7376  {
7377  _c4dbgp("runk_json: got a plain scalar");
7378  m_evt_handler->check_trailing_doc_token();
7379  _maybe_begin_doc();
7380  add_flags(RDOC);
7381  m_doc_empty = false;
7382  if(!_maybe_scan_following_colon())
7383  {
7384  _c4dbgp("runk_json: set as val");
7385  _handle_annotations_before_blck_val_scalar();
7386  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7387  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7388  }
7389  else
7390  {
7391  _c4err("parse error");
7392  }
7393  }
7394  else
7395  {
7396  _c4err("parse error");
7397  }
7398  }
7399 }
7400 
7401 
7402 //-----------------------------------------------------------------------------
7403 
7404 template<class EventHandler>
7405 void ParseEngine<EventHandler>::_handle_unk()
7406 {
7407  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7408 
7409  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7410  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7411 
7412  _maybe_skip_comment();
7413  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7414  if(!rem.len)
7415  return;
7416 
7417  size_t pos = rem.first_not_of(" \t");
7418  if(pos)
7419  {
7420  pos = pos != npos ? pos : rem.len;
7421  _c4dbgpf("skipping {} whitespace characters", pos);
7422  _line_progressed(pos);
7423  rem = m_evt_handler->m_curr->line_contents.rem;
7424  if(!rem.len)
7425  return;
7426  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7427  }
7428 
7429  if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7430  {
7431  _c4dbgp("rtop: zero indent + at line begin");
7432  if(_handle_bom())
7433  {
7434  _c4dbgp("byte order mark!");
7435  rem = m_evt_handler->m_curr->line_contents.rem;
7436  if(!rem.len)
7437  return;
7438  }
7439  const char first = rem.str[0];
7440  if(first == '-')
7441  {
7442  _c4dbgp("rtop: suspecting doc");
7443  if(_is_doc_begin_token(rem))
7444  {
7445  _c4dbgp("rtop: begin doc");
7446  _maybe_end_doc();
7447  _begin2_doc_expl();
7448  _set_indentation(0);
7449  addrem_flags(RDOC|RUNK, NDOC);
7450  _line_progressed(3u);
7451  _maybe_skip_whitespace_tokens();
7452  return;
7453  }
7454  }
7455  else if(first == '.')
7456  {
7457  _c4dbgp("rtop: suspecting doc end");
7458  if(_is_doc_end_token(rem))
7459  {
7460  _c4dbgp("rtop: end doc");
7461  if(has_any(RDOC))
7462  {
7463  _end2_doc_expl();
7464  }
7465  else
7466  {
7467  _c4dbgp("rtop: ignore end doc");
7468  }
7469  addrem_flags(NDOC|RUNK, RDOC);
7470  _line_progressed(3u);
7471  _maybe_skip_whitespace_tokens();
7472  return;
7473  }
7474  }
7475  else if(first == '%')
7476  {
7477  _c4dbgpf("directive: {}", rem);
7478  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7479  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
7480  _handle_directive(rem);
7481  return;
7482  }
7483  }
7484 
7485  /* no else-if! */
7486  char first = rem.str[0];
7487 
7488  if(first == '[')
7489  {
7490  m_evt_handler->check_trailing_doc_token();
7491  _maybe_begin_doc();
7492  m_doc_empty = false;
7493  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7494  if(C4_LIKELY( ! _annotations_require_key_container()))
7495  {
7496  _c4dbgp("it's a seq, flow");
7497  _handle_annotations_before_blck_val_scalar();
7498  m_evt_handler->begin_seq_val_flow();
7499  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7500  _set_indentation(startindent);
7501  }
7502  else
7503  {
7504  _c4dbgp("start new block map, set flow seq as key (!)");
7505  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7506  m_evt_handler->begin_map_val_block();
7507  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7508  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7509  m_evt_handler->begin_seq_key_flow();
7510  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
7511  _set_indentation(startindent);
7512  }
7513  _line_progressed(1);
7514  }
7515  else if(first == '{')
7516  {
7517  m_evt_handler->check_trailing_doc_token();
7518  _maybe_begin_doc();
7519  m_doc_empty = false;
7520  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7521  if(C4_LIKELY( ! _annotations_require_key_container()))
7522  {
7523  _c4dbgp("it's a map, flow");
7524  _handle_annotations_before_blck_val_scalar();
7525  m_evt_handler->begin_map_val_flow();
7526  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7527  _set_indentation(startindent);
7528  }
7529  else
7530  {
7531  _c4dbgp("start new block map, set flow map as key (!)");
7532  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7533  m_evt_handler->begin_map_val_block();
7534  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7535  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7536  m_evt_handler->begin_map_key_flow();
7537  addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
7538  _set_indentation(startindent);
7539  }
7540  _line_progressed(1);
7541  }
7542  else if(first == '-' && _is_blck_token(rem))
7543  {
7544  _c4dbgp("it's a seq, block");
7545  m_evt_handler->check_trailing_doc_token();
7546  _maybe_begin_doc();
7547  _handle_annotations_before_blck_val_scalar();
7548  m_evt_handler->begin_seq_val_block();
7549  addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7550  m_doc_empty = false;
7551  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7552  _line_progressed(1);
7553  _maybe_skip_whitespace_tokens();
7554  }
7555  else if(first == '?' && _is_blck_token(rem))
7556  {
7557  _c4dbgp("it's a map + this key is complex");
7558  m_evt_handler->check_trailing_doc_token();
7559  _maybe_begin_doc();
7560  _handle_annotations_before_blck_val_scalar();
7561  m_evt_handler->begin_map_val_block();
7562  addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7563  m_doc_empty = false;
7564  m_was_inside_qmrk = true;
7565  _save_indentation();
7566  _line_progressed(1);
7567  _maybe_skip_whitespace_tokens();
7568  }
7569  else if(first == ':' && _is_blck_token(rem))
7570  {
7571  if(m_doc_empty)
7572  {
7573  _c4dbgp("it's a map with an empty key");
7574  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7575  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7576  m_evt_handler->check_trailing_doc_token();
7577  _maybe_begin_doc();
7578  _handle_annotations_before_start_mapblck(startline);
7579  _handle_colon();
7580  m_evt_handler->begin_map_val_block();
7581  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7582  m_evt_handler->set_key_scalar_plain_empty();
7583  m_doc_empty = false;
7584  _set_indentation(startindent);
7585  }
7586  else
7587  {
7588  _c4dbgp("actually prev val is a key!");
7589  size_t prev_indentation = m_evt_handler->m_curr->indref;
7590  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7591  _set_indentation(prev_indentation);
7592  }
7593  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7594  _line_progressed(1);
7595  _maybe_skip_whitespace_tokens();
7596  }
7597  else if(first == '&')
7598  {
7599  csubstr anchor = _scan_anchor();
7600  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7601  m_evt_handler->check_trailing_doc_token();
7602  _maybe_begin_doc();
7603  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7604  const size_t line = m_evt_handler->m_curr->pos.line;
7605  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7606  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7607  m_doc_empty = false;
7608  }
7609  else if(first == '*')
7610  {
7611  csubstr ref = _scan_ref_map();
7612  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7613  m_evt_handler->check_trailing_doc_token();
7614  _maybe_begin_doc();
7615  m_doc_empty = false;
7616  if(!_maybe_scan_following_colon())
7617  {
7618  _c4dbgp("runk: set val ref");
7619  _handle_annotations_before_blck_val_scalar();
7620  m_evt_handler->set_val_ref(ref);
7621  }
7622  else
7623  {
7624  _c4dbgp("runk: start new block map, set ref as key");
7625  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7626  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7627  _handle_annotations_before_start_mapblck(startline);
7628  m_evt_handler->begin_map_val_block();
7629  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7630  m_evt_handler->set_key_ref(ref);
7631  _maybe_skip_whitespace_tokens();
7632  _set_indentation(startindent);
7633  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7634  }
7635  }
7636  else if(first == '!')
7637  {
7638  csubstr tag = _scan_tag();
7639  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7640  // we need to buffer the tags, as there may be two
7641  // consecutive tags in here
7642  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7643  const size_t line = m_evt_handler->m_curr->pos.line;
7644  _add_annotation(&m_pending_tags, tag, indentation, line);
7645  }
7646  else
7647  {
7648  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7649  _maybe_skip_whitespace_tokens();
7650  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7651  if(!s.len)
7652  return;
7653  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7654  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7655  first = s.str[0];
7656  ScannedScalar sc;
7657  if(first == '\'')
7658  {
7659  _c4dbgp("runk: scanning single-quoted scalar");
7660  m_evt_handler->check_trailing_doc_token();
7661  _maybe_begin_doc();
7662  add_flags(RDOC);
7663  m_doc_empty = false;
7664  sc = _scan_scalar_squot();
7665  if(!_maybe_scan_following_colon())
7666  {
7667  _c4dbgp("runk: set as val");
7668  _handle_annotations_before_blck_val_scalar();
7669  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7670  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7671  }
7672  else
7673  {
7674  _c4dbgp("runk: start new block map, set scalar as key");
7675  _handle_annotations_before_start_mapblck(startline);
7676  _handle_colon();
7677  m_evt_handler->begin_map_val_block();
7678  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7679  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7680  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7681  _maybe_skip_whitespace_tokens();
7682  _set_indentation(startindent);
7683  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7684  }
7685  }
7686  else if(first == '"')
7687  {
7688  _c4dbgp("runk: scanning double-quoted scalar");
7689  m_evt_handler->check_trailing_doc_token();
7690  _maybe_begin_doc();
7691  add_flags(RDOC);
7692  m_doc_empty = false;
7693  sc = _scan_scalar_dquot();
7694  if(!_maybe_scan_following_colon())
7695  {
7696  _c4dbgp("runk: set as val");
7697  _handle_annotations_before_blck_val_scalar();
7698  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7699  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7700  }
7701  else
7702  {
7703  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7704  _handle_annotations_before_start_mapblck(startline);
7705  m_evt_handler->begin_map_val_block();
7706  _handle_colon();
7707  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7708  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7709  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7710  _maybe_skip_whitespace_tokens();
7711  _set_indentation(startindent);
7712  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7713  }
7714  }
7715  else if(first == '|')
7716  {
7717  _c4dbgp("runk: scanning block-literal scalar");
7718  m_evt_handler->check_trailing_doc_token();
7719  _maybe_begin_doc();
7720  add_flags(RDOC);
7721  m_doc_empty = false;
7722  ScannedBlock sb;
7723  _scan_block(&sb, startindent);
7724  if(C4_LIKELY(!_maybe_scan_following_colon()))
7725  {
7726  _c4dbgp("runk: set as val");
7727  _handle_annotations_before_blck_val_scalar();
7728  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7729  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7730  }
7731  else
7732  {
7733  _c4err("block literal keys must be enclosed in '?'");
7734  }
7735  }
7736  else if(first == '>')
7737  {
7738  _c4dbgp("runk: scanning block-folded scalar");
7739  m_evt_handler->check_trailing_doc_token();
7740  _maybe_begin_doc();
7741  add_flags(RDOC);
7742  m_doc_empty = false;
7743  ScannedBlock sb;
7744  _scan_block(&sb, startindent);
7745  if(C4_LIKELY(!_maybe_scan_following_colon()))
7746  {
7747  _c4dbgp("runk: set as val");
7748  _handle_annotations_before_blck_val_scalar();
7749  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7750  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7751  }
7752  else
7753  {
7754  _c4err("block folded keys must be enclosed in '?'");
7755  }
7756  }
7757  else if(_scan_scalar_plain_unk(&sc))
7758  {
7759  _c4dbgp("runk: got a plain scalar");
7760  m_evt_handler->check_trailing_doc_token();
7761  _maybe_begin_doc();
7762  add_flags(RDOC);
7763  m_doc_empty = false;
7764  if(!_maybe_scan_following_colon())
7765  {
7766  _c4dbgp("runk: set as val");
7767  _handle_annotations_before_blck_val_scalar();
7768  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7769  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7770  }
7771  else
7772  {
7773  _c4dbgp("runk: start new block map, set scalar as key");
7774  _handle_annotations_before_start_mapblck(startline);
7775  _handle_colon();
7776  m_evt_handler->begin_map_val_block();
7777  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7778  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7779  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7780  _maybe_skip_whitespace_tokens();
7781  _set_indentation(startindent);
7782  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7783  }
7784  }
7785  }
7786 }
7787 
7788 
7789 //-----------------------------------------------------------------------------
7790 
7791 template<class EventHandler>
7792 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7793 {
7794  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7795 
7796  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
7797 
7798  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7799  if(has_any(RNXT))
7800  {
7801  _c4dbgp("usty[RNXT]: finishing!");
7802  _end_stream();
7803  }
7804  #endif
7805 
7806  _maybe_skip_comment();
7807  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7808  if(!rem.len)
7809  return;
7810 
7811  size_t pos = rem.first_not_of(" \t");
7812  if(pos)
7813  {
7814  pos = pos != npos ? pos : rem.len;
7815  _c4dbgpf("skipping indentation of {}", pos);
7816  _line_progressed(pos);
7817  rem = m_evt_handler->m_curr->line_contents.rem;
7818  if(!rem.len)
7819  return;
7820  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7821  }
7822 
7823  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7824  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7825  char first = rem.str[0];
7826  if(has_any(RSEQ)) // destination is a sequence
7827  {
7828  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7829  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7830  if(first == '[')
7831  {
7832  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7833  add_flags(RNXT);
7834  m_evt_handler->_push();
7835  addrem_flags(FLOW|RVAL, RNXT|USTY);
7836  _set_indentation(startindent);
7837  _line_progressed(1);
7838  _maybe_skip_whitespace_tokens();
7839  }
7840  else if(first == '-' && _is_blck_token(rem))
7841  {
7842  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7843  add_flags(RNXT);
7844  m_evt_handler->_push();
7845  addrem_flags(BLCK|RVAL, RNXT|USTY);
7846  _set_indentation(startindent);
7847  _line_progressed(1);
7848  _maybe_skip_whitespace_tokens();
7849  }
7850  else
7851  {
7852  _c4err("can only parse a seq into an existing seq");
7853  }
7854  }
7855  else if(has_any(RMAP)) // destination is a map
7856  {
7857  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7858  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7859  if(first == '{')
7860  {
7861  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7862  add_flags(RNXT);
7863  _handle_annotations_before_blck_val_scalar();
7864  m_evt_handler->_push();
7865  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7866  _set_indentation(startindent);
7867  _line_progressed(1);
7868  _maybe_skip_whitespace_tokens();
7869  }
7870  else if(first == '?' && _is_blck_token(rem))
7871  {
7872  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7873  add_flags(RNXT);
7874  _handle_annotations_before_blck_val_scalar();
7875  m_evt_handler->_push();
7876  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7877  m_was_inside_qmrk = true;
7878  _save_indentation();
7879  _line_progressed(1);
7880  _maybe_skip_whitespace_tokens();
7881  }
7882  else if(first == ':' && _is_blck_token(rem))
7883  {
7884  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7885  add_flags(RNXT);
7886  _handle_annotations_before_blck_val_scalar();
7887  m_evt_handler->_push();
7888  m_evt_handler->set_key_scalar_plain_empty();
7889  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7890  _save_indentation();
7891  _line_progressed(1);
7892  _maybe_skip_whitespace_tokens();
7893  }
7894  else if(rem.begins_with('&'))
7895  {
7896  csubstr anchor = _scan_anchor();
7897  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7898  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7899  const size_t line = m_evt_handler->m_curr->pos.line;
7900  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7901  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7902  }
7903  else if(first == '*')
7904  {
7905  csubstr ref = _scan_ref_map();
7906  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7907  if(!_maybe_scan_following_colon())
7908  {
7909  _c4err("cannot read a VAL to a map");
7910  }
7911  else
7912  {
7913  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7914  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7915  add_flags(RNXT);
7916  _handle_annotations_before_start_mapblck(startline);
7917  m_evt_handler->_push();
7918  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7919  m_evt_handler->set_key_ref(ref);
7920  _maybe_skip_whitespace_tokens();
7921  _set_indentation(startindent);
7922  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7923  }
7924  }
7925  else if(first == '!')
7926  {
7927  csubstr tag = _scan_tag();
7928  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7929  // we need to buffer the tags, as there may be two
7930  // consecutive tags in here
7931  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7932  const size_t line = m_evt_handler->m_curr->pos.line;
7933  _add_annotation(&m_pending_tags, tag, indentation, line);
7934  }
7935  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7936  {
7937  _c4err("cannot parse a seq into an existing map");
7938  }
7939  else
7940  {
7941  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7942  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7943  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7944  ScannedScalar sc;
7945  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7946  if(first == '\'')
7947  {
7948  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7949  sc = _scan_scalar_squot();
7950  if(!_maybe_scan_following_colon())
7951  {
7952  _c4err("cannot read a VAL to a map");
7953  }
7954  else
7955  {
7956  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7957  add_flags(RNXT);
7958  _handle_annotations_before_start_mapblck(startline);
7959  m_evt_handler->_push();
7960  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7961  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7962  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7963  _set_indentation(startindent);
7964  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7965  _maybe_skip_whitespace_tokens();
7966  }
7967  }
7968  else if(first == '"')
7969  {
7970  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
7971  sc = _scan_scalar_dquot();
7972  if(!_maybe_scan_following_colon())
7973  {
7974  _c4err("cannot read a VAL to a map");
7975  }
7976  else
7977  {
7978  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
7979  add_flags(RNXT);
7980  _handle_annotations_before_start_mapblck(startline);
7981  m_evt_handler->_push();
7982  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7983  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7984  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7985  _set_indentation(startindent);
7986  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7987  _maybe_skip_whitespace_tokens();
7988  }
7989  }
7990  else if(first == '|')
7991  {
7992  _c4err("block literal keys must be enclosed in '?'");
7993  }
7994  else if(first == '>')
7995  {
7996  _c4err("block literal keys must be enclosed in '?'");
7997  }
7998  else if(_scan_scalar_plain_unk(&sc))
7999  {
8000  _c4dbgp("usty[RMAP]: got a plain scalar");
8001  if(!_maybe_scan_following_colon())
8002  {
8003  _c4err("cannot read a VAL to a map");
8004  }
8005  else
8006  {
8007  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8008  add_flags(RNXT);
8009  _handle_annotations_before_start_mapblck(startline);
8010  m_evt_handler->_push();
8011  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8012  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8013  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8014  _set_indentation(startindent);
8015  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8016  _maybe_skip_whitespace_tokens();
8017  }
8018  }
8019  else
8020  {
8021  _c4err("parse error");
8022  }
8023  }
8024  }
8025  else // destination is unknown
8026  {
8027  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
8028  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8029  if(first == '[')
8030  {
8031  _c4dbgp("usty[UNK]: it's a flow seq");
8032  add_flags(RNXT);
8033  _handle_annotations_before_blck_val_scalar();
8034  m_evt_handler->begin_seq_val_flow();
8035  addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
8036  _set_indentation(startindent);
8037  _line_progressed(1);
8038  _maybe_skip_whitespace_tokens();
8039  }
8040  else if(first == '-' && _is_blck_token(rem))
8041  {
8042  _c4dbgp("usty[UNK]: it's a block seq");
8043  add_flags(RNXT);
8044  _handle_annotations_before_blck_val_scalar();
8045  m_evt_handler->begin_seq_val_block();
8046  addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
8047  _set_indentation(startindent);
8048  _line_progressed(1);
8049  _maybe_skip_whitespace_tokens();
8050  }
8051  else if(first == '{')
8052  {
8053  _c4dbgp("usty[UNK]: it's a flow map");
8054  add_flags(RNXT);
8055  _handle_annotations_before_blck_val_scalar();
8056  m_evt_handler->begin_map_val_flow();
8057  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
8058  _set_indentation(startindent);
8059  _line_progressed(1);
8060  _maybe_skip_whitespace_tokens();
8061  }
8062  else if(first == '?' && _is_blck_token(rem))
8063  {
8064  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8065  add_flags(RNXT);
8066  _handle_annotations_before_blck_val_scalar();
8067  m_evt_handler->begin_map_val_block();
8068  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
8069  m_was_inside_qmrk = true;
8070  _save_indentation();
8071  _line_progressed(1);
8072  _maybe_skip_whitespace_tokens();
8073  }
8074  else if(first == ':' && _is_blck_token(rem))
8075  {
8076  _c4dbgp("usty[UNK]: it's a map with an empty key");
8077  add_flags(RNXT);
8078  _handle_annotations_before_blck_val_scalar();
8079  m_evt_handler->begin_map_val_block();
8080  m_evt_handler->set_key_scalar_plain_empty();
8081  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8082  _save_indentation();
8083  _line_progressed(1);
8084  _maybe_skip_whitespace_tokens();
8085  }
8086  else if(first == '&')
8087  {
8088  csubstr anchor = _scan_anchor();
8089  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8090  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8091  const size_t line = m_evt_handler->m_curr->pos.line;
8092  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8093  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8094  }
8095  else if(first == '*')
8096  {
8097  csubstr ref = _scan_ref_map();
8098  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8099  if(!_maybe_scan_following_colon())
8100  {
8101  _c4dbgp("usty[UNK]: set val ref");
8102  _handle_annotations_before_blck_val_scalar();
8103  m_evt_handler->set_val_ref(ref);
8104  }
8105  else
8106  {
8107  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8108  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8109  add_flags(RNXT);
8110  _handle_annotations_before_start_mapblck(startline);
8111  m_evt_handler->begin_map_val_block();
8112  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8113  m_evt_handler->set_key_ref(ref);
8114  _maybe_skip_whitespace_tokens();
8115  _set_indentation(startindent);
8116  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8117  }
8118  }
8119  else if(first == '!')
8120  {
8121  csubstr tag = _scan_tag();
8122  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8123  // we need to buffer the tags, as there may be two
8124  // consecutive tags in here
8125  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8126  const size_t line = m_evt_handler->m_curr->pos.line;
8127  _add_annotation(&m_pending_tags, tag, indentation, line);
8128  }
8129  else
8130  {
8131  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
8132  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8133  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8134  first = rem.str[0];
8135  ScannedScalar sc;
8136  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8137  if(first == '\'')
8138  {
8139  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8140  sc = _scan_scalar_squot();
8141  if(!_maybe_scan_following_colon())
8142  {
8143  _c4dbgp("usty[UNK]: set as val");
8144  _handle_annotations_before_blck_val_scalar();
8145  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8146  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8147  _end_stream();
8148  }
8149  else
8150  {
8151  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8152  add_flags(RNXT);
8153  _handle_annotations_before_start_mapblck(startline);
8154  m_evt_handler->begin_map_val_block();
8155  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8156  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8157  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8158  _set_indentation(startindent);
8159  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8160  _maybe_skip_whitespace_tokens();
8161  }
8162  }
8163  else if(first == '"')
8164  {
8165  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8166  sc = _scan_scalar_dquot();
8167  if(!_maybe_scan_following_colon())
8168  {
8169  _c4dbgp("usty[UNK]: set as val");
8170  _handle_annotations_before_blck_val_scalar();
8171  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8172  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8173  _end_stream();
8174  }
8175  else
8176  {
8177  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8178  add_flags(RNXT);
8179  _handle_annotations_before_start_mapblck(startline);
8180  m_evt_handler->begin_map_val_block();
8181  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8182  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8183  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8184  _set_indentation(startindent);
8185  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8186  _maybe_skip_whitespace_tokens();
8187  }
8188  }
8189  else if(first == '|')
8190  {
8191  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8192  ScannedBlock sb;
8193  _scan_block(&sb, startindent);
8194  _c4dbgp("usty[UNK]: set as val");
8195  _handle_annotations_before_blck_val_scalar();
8196  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8197  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8198  _end_stream();
8199  }
8200  else if(first == '>')
8201  {
8202  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8203  ScannedBlock sb;
8204  _scan_block(&sb, startindent);
8205  _c4dbgp("usty[UNK]: set as val");
8206  _handle_annotations_before_blck_val_scalar();
8207  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8208  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8209  _end_stream();
8210  }
8211  else if(_scan_scalar_plain_unk(&sc))
8212  {
8213  _c4dbgp("usty[UNK]: got a plain scalar");
8214  if(!_maybe_scan_following_colon())
8215  {
8216  _c4dbgp("usty[UNK]: set as val");
8217  _handle_annotations_before_blck_val_scalar();
8218  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8219  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8220  _end_stream();
8221  }
8222  else
8223  {
8224  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8225  add_flags(RNXT);
8226  _handle_annotations_before_start_mapblck(startline);
8227  m_evt_handler->begin_map_val_block();
8228  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8229  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8230  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8231  _set_indentation(startindent);
8232  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8233  _maybe_skip_whitespace_tokens();
8234  }
8235  }
8236  else
8237  {
8238  _c4err("parse error");
8239  }
8240  }
8241  }
8242 }
8243 
8244 
8245 //-----------------------------------------------------------------------------
8246 
8247 template<class EventHandler>
8248 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8249 {
8250  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8251  m_file = filename;
8252  m_buf = src;
8253  _reset();
8254  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8255  m_evt_handler->begin_stream();
8256  while( ! _finished_file())
8257  {
8258  _scan_line();
8259  while( ! _finished_line())
8260  {
8261  _c4dbgnextline();
8262  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8263  if(has_any(RSEQ))
8264  {
8265  _handle_seq_json();
8266  }
8267  else if(has_any(RMAP))
8268  {
8269  _handle_map_json();
8270  }
8271  else if(has_any(RUNK))
8272  {
8273  _handle_unk_json();
8274  }
8275  else
8276  {
8277  _c4err("internal error");
8278  }
8279  }
8280  if(_finished_file())
8281  break; // it may have finished because of multiline blocks
8282  _line_ended();
8283  }
8284  _end_stream();
8285  m_evt_handler->finish_parse();
8286 }
8287 
8288 
8289 //-----------------------------------------------------------------------------
8290 
8291 template<class EventHandler>
8292 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8293 {
8294  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8295  m_file = filename;
8296  m_buf = src;
8297  _reset();
8298  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8299  m_evt_handler->begin_stream();
8300  while( ! _finished_file())
8301  {
8302  _scan_line();
8303  while( ! _finished_line())
8304  {
8305  _c4dbgnextline();
8306  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8307  if(has_any(FLOW))
8308  {
8309  if(has_none(RSEQIMAP))
8310  {
8311  if(has_any(RSEQ))
8312  {
8313  _handle_seq_flow();
8314  }
8315  else
8316  {
8317  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8318  _handle_map_flow();
8319  }
8320  }
8321  else
8322  {
8323  _handle_seq_imap();
8324  }
8325  }
8326  else if(has_any(BLCK))
8327  {
8328  if(has_any(RSEQ))
8329  {
8330  _handle_seq_block();
8331  }
8332  else
8333  {
8334  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8335  _handle_map_block();
8336  }
8337  }
8338  else if(has_any(RUNK))
8339  {
8340  _handle_unk();
8341  }
8342  else if(has_any(USTY))
8343  {
8344  _handle_usty();
8345  }
8346  else
8347  {
8348  _c4err("internal error");
8349  }
8350  }
8351  if(_finished_file())
8352  break; // it may have finished because of multiline blocks
8353  _line_ended();
8354  }
8355  _end_stream();
8356  m_evt_handler->finish_parse();
8357 }
8358 /** @endcond */
8359 
8360 } // namespace yml
8361 } // namespace c4
8362 
8363 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
8364 
8365 #undef _c4dbgnextline
8366 
8367 #if defined(_MSC_VER)
8368 # pragma warning(pop)
8369 #elif defined(__clang__)
8370 # pragma clang diagnostic pop
8371 #elif defined(__GNUC__)
8372 # pragma GCC diagnostic pop
8373 #endif
8374 
8375 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_ERRMSG_SIZE
size for the error message buffer
Definition: common.hpp:24
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:49
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1547
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:890
size_t to_chars(substr buf, uint8_t v) noexcept
Definition: charconv.hpp:2328
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ npos
a null string position
Definition: common.hpp:267
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
Definition: parse.cpp:132
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
Encoding_e
Definition: common.hpp:427
@ UTF16BE
Definition: common.hpp:431
@ UTF8
Definition: common.hpp:429
@ UTF16LE
Definition: common.hpp:430
@ NOBOM
Definition: common.hpp:428
@ UTF32BE
Definition: common.hpp:433
@ UTF32LE
Definition: common.hpp:432
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark