rapidyaml  0.11.0
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 
9 #include <ctype.h>
10 
11 #include "c4/yml/detail/dbgprint.hpp"
13 #ifdef RYML_DBG
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
16 #define _c4err(...) \
17  do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
18 #else
19 #define _c4err(...) \
20  this->_err(RYML_LOC_HERE(), __VA_ARGS__)
21 #endif
22 
23 
24 #if defined(RYML_WITH_TAB_TOKENS)
25 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITHOUT_TAB_TOKENS(...)
27 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
28 #else
29 #define _RYML_WITH_TAB_TOKENS(...)
30 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
31 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
32 #endif
33 
34 
35 // scaffold:
36 #define _c4dbgnextline() \
37  do { \
38  _c4dbgq("\n-----------"); \
39  _c4dbgt("handling line={}, offset={}B", \
40  m_evt_handler->m_curr->pos.line, \
41  m_evt_handler->m_curr->pos.offset); \
42  } while(0)
43 
44 
45 #if defined(_MSC_VER)
46 # pragma warning(push)
47 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
48 # pragma warning(disable: 4702/*unreachable code*/)
49 #elif defined(__clang__)
50 # pragma clang diagnostic push
51 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma clang diagnostic ignored "-Wformat-nonliteral"
53 # pragma clang diagnostic ignored "-Wold-style-cast"
54 #elif defined(__GNUC__)
55 # pragma GCC diagnostic push
56 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
57 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 # pragma GCC diagnostic ignored "-Wold-style-cast"
59 # if __GNUC__ >= 7
60 # pragma GCC diagnostic ignored "-Wduplicated-branches"
61 # endif
62 #endif
63 
64 // NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
65 
66 namespace c4 {
67 namespace yml {
68 
69 namespace { // NOLINT
70 
71 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
72 {
73  _RYML_ASSERT_BASIC(s.len > 0);
74  _RYML_ASSERT_BASIC(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
75  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
76 }
77 
78 inline bool _is_doc_begin_token(csubstr s)
79 {
80  _RYML_ASSERT_BASIC(s.begins_with('-'));
81  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
82  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
83  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
84  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
85 }
86 
87 inline bool _is_doc_end_token(csubstr s)
88 {
89  _RYML_ASSERT_BASIC(s.begins_with('.'));
90  _RYML_ASSERT_BASIC(!s.ends_with("\n"));
91  _RYML_ASSERT_BASIC(!s.ends_with("\r"));
92  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
93  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
94 }
95 
96 inline bool _is_doc_token(csubstr s) noexcept
97 {
98  //
99  // NOTE: this function was failing under some scenarios when
100  // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
101  // related to optimizer assumptions on the input string and
102  // possibly caused from UB around assignment to that string (the
103  // call site was in _scan_block()). For more details see:
104  //
105  // https://github.com/biojppm/rapidyaml/issues/440
106  //
107  // The current version does not suffer this problem, but it may
108  // appear again.
109  //
110  //
111  // UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
112  // (but not any other optimization level, nor any other compiler
113  // or version), because the assignment to s is being hoisted out
114  // of the loop which calls this function. Then the length doesn't
115  // enter the s.len >= 3 when it should. Adding a
116  // C4_DONT_OPTIMIZE(var) makes the problem go away.
117  //
118  if(s.len >= 3)
119  {
120  switch(s.str[0])
121  {
122  case '-':
123  //return _is_doc_begin_token(s); // this was failing with gcc -O2
124  return (s.str[1] == '-' && s.str[2] == '-')
125  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
126  case '.':
127  //return _is_doc_end_token(s); // this was failing with gcc -O2
128  return (s.str[1] == '.' && s.str[2] == '.')
129  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
130  }
131  }
132  return false;
133 }
134 
135 inline size_t _is_special_json_scalar(csubstr s)
136 {
137  _RYML_ASSERT_BASIC(s.len);
138  switch(s.str[0])
139  {
140  case 'f':
141  if(s.len >= 5 && s.begins_with("false"))
142  return 5u;
143  break;
144  case 't':
145  if(s.len >= 4 && s.begins_with("true"))
146  return 4u;
147  break;
148  case 'n':
149  if(s.len >= 4 && s.begins_with("null"))
150  return 4u;
151  break;
152  }
153  return 0u;
154 }
155 
156 
157 //-----------------------------------------------------------------------------
158 
159 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
160 {
161  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
162 }
163 
164 //! look for the next newline chars, and jump to the right of those
165 inline substr from_next_line(substr rem)
166 {
167  size_t nlpos = rem.first_of("\r\n");
168  if(nlpos == csubstr::npos)
169  return {};
170  const char nl = rem[nlpos];
171  rem = rem.right_of(nlpos);
172  if(rem.empty())
173  return {};
174  if(_extend_from_combined_newline(nl, rem.front()))
175  rem = rem.sub(1);
176  return rem;
177 }
178 
179 
180 //-----------------------------------------------------------------------------
181 
182 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
183 {
184  _RYML_ASSERT_BASIC(r[*i] == '\n');
185  size_t numnl_following = 0;
186  ++(*i);
187  for( ; *i < r.len; ++(*i))
188  {
189  if(r.str[*i] == '\n')
190  ++numnl_following;
191  // skip leading whitespace
192  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
193  ;
194  else
195  break;
196  }
197  return numnl_following;
198 }
199 
200 /** @p i is set to the first non whitespace character after the line
201  * @return the number of empty lines after the initial position */
202 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
203 {
204  _RYML_ASSERT_BASIC(r[*i] == '\n');
205  size_t numnl_following = 0;
206  ++(*i);
207  if(indentation == 0)
208  {
209  for( ; *i < r.len; ++(*i))
210  {
211  if(r.str[*i] == '\n')
212  ++numnl_following;
213  // skip leading whitespace
214  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
215  ;
216  else
217  break;
218  }
219  }
220  else
221  {
222  for( ; *i < r.len; ++(*i))
223  {
224  if(r.str[*i] == '\n')
225  {
226  ++numnl_following;
227  // skip the indentation after the newline
228  size_t stop = *i + indentation;
229  for( ; *i < r.len; ++(*i))
230  {
231  if(r.str[*i] != ' ' && r.str[*i] != '\r')
232  break;
233  _RYML_ASSERT_BASIC(*i < stop);
234  }
235  C4_UNUSED(stop);
236  }
237  // skip leading whitespace
238  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
239  ;
240  else
241  break;
242  }
243  }
244  return numnl_following;
245 }
246 
247 } // anon namespace
248 
249 
250 //-----------------------------------------------------------------------------
251 //-----------------------------------------------------------------------------
252 //-----------------------------------------------------------------------------
253 
254 template<class EventHandler>
256 {
257  _free();
258  _clr();
259 }
260 
261 template<class EventHandler>
263  : m_options(opts)
264  , m_file()
265  , m_buf()
266  , m_evt_handler(evt_handler)
267  , m_pending_anchors()
268  , m_pending_tags()
269  , m_was_inside_qmrk(false)
270  , m_doc_empty(false)
271  , m_prev_colon(npos)
272  , m_encoding(NOBOM)
273  , m_newline_offsets()
274  , m_newline_offsets_size(0)
275  , m_newline_offsets_capacity(0)
276  , m_newline_offsets_buf()
277 {
278  _RYML_CHECK_BASIC(evt_handler);
279 }
280 
281 template<class EventHandler>
283  : m_options(that.m_options)
284  , m_file(that.m_file)
285  , m_buf(that.m_buf)
286  , m_evt_handler(that.m_evt_handler)
287  , m_pending_anchors(that.m_pending_anchors)
288  , m_pending_tags(that.m_pending_tags)
289  , m_was_inside_qmrk(false)
290  , m_doc_empty(false)
291  , m_prev_colon(npos)
292  , m_encoding(NOBOM)
293  , m_newline_offsets(that.m_newline_offsets)
294  , m_newline_offsets_size(that.m_newline_offsets_size)
295  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
296  , m_newline_offsets_buf(that.m_newline_offsets_buf)
297 {
298  that._clr();
299 }
300 
301 template<class EventHandler>
303  : m_options(that.m_options)
304  , m_file(that.m_file)
305  , m_buf(that.m_buf)
306  , m_evt_handler(that.m_evt_handler)
307  , m_pending_anchors(that.m_pending_anchors)
308  , m_pending_tags(that.m_pending_tags)
309  , m_was_inside_qmrk(false)
310  , m_doc_empty(false)
311  , m_prev_colon(npos)
312  , m_encoding(NOBOM)
313  , m_newline_offsets()
314  , m_newline_offsets_size()
315  , m_newline_offsets_capacity()
316  , m_newline_offsets_buf()
317 {
318  if(that.m_newline_offsets_capacity)
319  {
320  _resize_locations(that.m_newline_offsets_capacity);
321  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
322  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
323  m_newline_offsets_size = that.m_newline_offsets_size;
324  }
325 }
326 
327 template<class EventHandler>
329 {
330  _free();
331  m_options = (that.m_options);
332  m_file = (that.m_file);
333  m_buf = (that.m_buf);
334  m_evt_handler = that.m_evt_handler;
335  m_pending_anchors = that.m_pending_anchors;
336  m_pending_tags = that.m_pending_tags;
337  m_was_inside_qmrk = that.m_was_inside_qmrk;
338  m_doc_empty = that.m_doc_empty;
339  m_prev_colon = that.m_prev_colon;
340  m_encoding = that.m_encoding;
341  m_newline_offsets = (that.m_newline_offsets);
342  m_newline_offsets_size = (that.m_newline_offsets_size);
343  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
344  m_newline_offsets_buf = (that.m_newline_offsets_buf);
345  that._clr();
346  return *this;
347 }
348 
349 template<class EventHandler>
351 {
352  if(&that != this)
353  {
354  _free();
355  m_options = (that.m_options);
356  m_file = (that.m_file);
357  m_buf = (that.m_buf);
358  m_evt_handler = that.m_evt_handler;
359  m_pending_anchors = that.m_pending_anchors;
360  m_pending_tags = that.m_pending_tags;
361  m_was_inside_qmrk = that.m_was_inside_qmrk;
362  m_doc_empty = that.m_doc_empty;
363  m_prev_colon = that.m_prev_colon;
364  m_encoding = that.m_encoding;
365  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
366  _resize_locations(that.m_newline_offsets_capacity);
367  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
368  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
369  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
370  m_newline_offsets_size = that.m_newline_offsets_size;
371  m_newline_offsets_buf = that.m_newline_offsets_buf;
372  }
373  return *this;
374 }
375 
376 template<class EventHandler>
378 {
379  m_options = {};
380  m_file = {};
381  m_buf = {};
382  m_evt_handler = {};
383  m_pending_anchors = {};
384  m_pending_tags = {};
385  m_was_inside_qmrk = false;
386  m_doc_empty = true;
387  m_prev_colon = npos;
388  m_encoding = NOBOM;
389  m_newline_offsets = {};
390  m_newline_offsets_size = {};
391  m_newline_offsets_capacity = {};
392  m_newline_offsets_buf = {};
393 }
394 
395 template<class EventHandler>
396 void ParseEngine<EventHandler>::_free()
397 {
398  if(m_newline_offsets)
399  {
400  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
401  m_newline_offsets = nullptr;
402  m_newline_offsets_size = 0u;
403  m_newline_offsets_capacity = 0u;
404  m_newline_offsets_buf = nullptr;
405  }
406 }
407 
408 
409 //-----------------------------------------------------------------------------
410 
411 template<class EventHandler>
412 void ParseEngine<EventHandler>::_reset()
413 {
414  m_pending_anchors = {};
415  m_pending_tags = {};
416  m_doc_empty = true;
417  m_was_inside_qmrk = false;
418  m_prev_colon = npos;
419  m_bom_len = 0;
420  m_encoding = NOBOM;
421  m_bom_line = 0;
422  if(m_options.locations())
423  {
424  _prepare_locations();
425  }
426 }
427 
428 
429 //-----------------------------------------------------------------------------
430 
431 template<class EventHandler>
432 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
433 {
434  #define _ryml_relocate(s) \
435  if((s).is_sub(prev_arena)) \
436  { \
437  (s).str = next_arena.str + ((s).str - prev_arena.str); \
438  }
439  _ryml_relocate(m_buf);
440  _ryml_relocate(m_newline_offsets_buf);
441  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
442  _ryml_relocate(m_pending_tags.annotations[i].str);
443  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
444  _ryml_relocate(m_pending_anchors.annotations[i].str);
445  #undef _ryml_relocate
446 }
447 
448 template<class EventHandler>
449 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
450 {
451  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
452 }
453 
454 
455 //-----------------------------------------------------------------------------
456 
457 #ifdef RYML_DBG
458 template<class EventHandler>
459 template<class DumpFn>
460 C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
461 {
462  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
463  auto const& lc = st->line_contents;
464  csubstr contents = lc.full.first(lc.num_cols);
465  if(contents.len)
466  {
467  // print the yaml src line
468  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
469  if(m_file.len)
470  {
471  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
472  offs += m_file.len + 1;
473  }
474  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
475  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
476  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
477  _dbg_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
478  // highlight the remaining portion of the previous line
479  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
480  size_t lastcol = firstcol + lc.rem.len;
481  for(size_t i = 0; i < offs + firstcol; ++i)
482  std::forward<DumpFn>(dumpfn)(" ");
483  std::forward<DumpFn>(dumpfn)("^");
484  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
485  std::forward<DumpFn>(dumpfn)("~");
486  _dbg_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
487  }
488  else
489  {
490  std::forward<DumpFn>(dumpfn)("\n");
491  }
492  // next line: print the state flags
493  {
494  char flagbuf_[128];
495  _dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
496  }
497 }
498 #endif
499 
500 
501 //-----------------------------------------------------------------------------
502 
503 template<class EventHandler>
504 template<class ...Args>
505 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
506 {
507  m_evt_handler->cancel_parse();
508  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
509 }
510 
511 template<class EventHandler>
512 template<class ...Args>
513 C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
514 {
515  m_evt_handler->cancel_parse();
516  err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
517 }
518 
519 
520 //-----------------------------------------------------------------------------
521 #ifdef RYML_DBG
522 template<class EventHandler>
523 template<class ...Args>
524 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
525 {
526  if(_dbg_enabled())
527  {
528  _dbg_printf(fmt, args...);
529  _dbg_dumper("\n");
530  _fmt_msg(_dbg_dumper);
531  }
532 }
533 #endif
534 
535 
536 //-----------------------------------------------------------------------------
537 template<class EventHandler>
538 bool ParseEngine<EventHandler>::_finished_file() const
539 {
540  bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
541  if(ret)
542  {
543  _c4dbgp("finished file!!!");
544  }
545  return ret;
546 }
547 
548 template<class EventHandler>
549 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
550 {
551  return m_evt_handler->m_curr->line_contents.rem.empty();
552 }
553 
554 
555 //-----------------------------------------------------------------------------
556 
557 template<class EventHandler>
558 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
559 {
560  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
561  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
562  {
563  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
564  if(pos == npos)
565  pos = rem.len; // maybe the line is just all whitespace
566  _c4dbgpf("skip {} whitespace characters", pos);
567  _line_progressed(pos);
568  }
569 }
570 
571 template<class EventHandler>
572 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
573 {
574  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
575  if(rem.len && rem.str[0] == c)
576  {
577  size_t pos = rem.first_not_of(c);
578  if(pos == npos)
579  pos = rem.len; // maybe the line is just all c
580  _c4dbgpf("skip {}x'{}'", pos, c);
581  _line_progressed(pos);
582  }
583 }
584 
585 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
586 template<class EventHandler>
587 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
588 {
589  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
590  if(rem.len && rem.str[0] == c)
591  {
592  size_t pos = rem.first_not_of(c);
593  if(pos == npos)
594  pos = rem.len; // maybe the line is just all c
595  if(pos > max_to_skip)
596  pos = max_to_skip;
597  _c4dbgpf("skip {}x'{}'", pos, c);
598  _line_progressed(pos);
599  }
600 }
601 #endif
602 
603 template<class EventHandler>
604 template<size_t N>
605 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
606 {
607  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
608  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
609  if(pos == npos)
610  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
611  _c4dbgpf("skip {} characters", pos);
612  _line_progressed(pos);
613 }
614 
615 template<class EventHandler>
616 void ParseEngine<EventHandler>::_skip_comment()
617 {
618  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
619  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
620  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
621  csubstr line = m_evt_handler->m_curr->line_contents.full;
622  // raise an error if the comment is not preceded by whitespace
623  if(!line.begins_with('#'))
624  {
625  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str > line.str);
626  const char c = line[(size_t)(rem.str - line.str - 1)];
627  if(C4_UNLIKELY(c != ' ' && c != '\t'))
628  _c4err("comment not preceded by whitespace");
629  }
630  else
631  {
632  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str == line.str);
633  }
634  _c4dbgpf("comment was '{}'", rem);
635  _line_progressed(rem.len);
636 }
637 
638 template<class EventHandler>
639 void ParseEngine<EventHandler>::_maybe_skip_comment()
640 {
641  csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
642  if(s.begins_with('#'))
643  {
644  _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
645  _skip_comment();
646  }
647 }
648 
649 template<class EventHandler>
650 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
651 {
652  if(m_evt_handler->m_curr->line_contents.rem.len)
653  {
654  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
655  {
656  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
657  if(pos == npos)
658  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
659  _c4dbgpf("skip {}x'{}'", pos, ' ');
660  _line_progressed(pos);
661  }
662  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
663  {
664  _c4dbgp("found ':' colon next");
665  _line_progressed(1);
666  return true;
667  }
668  }
669  return false;
670 }
671 
672 template<class EventHandler>
673 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
674 {
675  if(m_evt_handler->m_curr->line_contents.rem.len)
676  {
677  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
678  {
679  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
680  if(pos == npos)
681  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
682  _c4dbgpf("skip {}x'{}'", pos, ' ');
683  _line_progressed(pos);
684  }
685  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
686  {
687  _c4dbgp("found ',' comma next");
688  _line_progressed(1);
689  return true;
690  }
691  }
692  return false;
693 }
694 
695 
696 //-----------------------------------------------------------------------------
697 
698 template<class EventHandler>
699 csubstr ParseEngine<EventHandler>::_scan_anchor()
700 {
701  csubstr s = m_evt_handler->m_curr->line_contents.rem;
702  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
703  csubstr anchor = s.range(1, s.first_of(' '));
704  _line_progressed(1u + anchor.len);
705  _maybe_skipchars(' ');
706  return anchor;
707 }
708 
709 template<class EventHandler>
710 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
711 {
712  csubstr s = m_evt_handler->m_curr->line_contents.rem;
713  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
714  csubstr ref = s.first(s.first_of(",] :"));
715  _line_progressed(ref.len);
716  return ref;
717 }
718 
719 template<class EventHandler>
720 csubstr ParseEngine<EventHandler>::_scan_ref_map()
721 {
722  csubstr s = m_evt_handler->m_curr->line_contents.rem;
723  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
724  csubstr ref = s.first(s.first_of(",} "));
725  _line_progressed(ref.len);
726  return ref;
727 }
728 
729 template<class EventHandler>
730 csubstr ParseEngine<EventHandler>::_scan_tag()
731 {
732  csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
733  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
734  csubstr t;
735  if(rem.begins_with("!!"))
736  {
737  _c4dbgp("begins with '!!'");
738  if(has_any(RFLOW))
739  t = rem.left_of(rem.first_of(" ,"));
740  else
741  t = rem.left_of(rem.first_of(' '));
742  }
743  else if(rem.begins_with("!<"))
744  {
745  _c4dbgp("begins with '!<'");
746  t = rem.left_of(rem.first_of('>'), true);
747  }
748  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
749  else if(rem.begins_with("!h!"))
750  {
751  _c4dbgp("begins with '!h!'");
752  t = rem.left_of(rem.first_of(' '));
753  }
754  #endif
755  else
756  {
757  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
758  _c4dbgp("begins with '!'");
759  if(has_any(RFLOW))
760  t = rem.left_of(rem.first_of(" ,"));
761  else
762  t = rem.left_of(rem.first_of(' '));
763  }
764  _line_progressed(t.len);
765  _maybe_skip_whitespace_tokens();
766  return t;
767 }
768 
769 
770 //-----------------------------------------------------------------------------
771 
772 template<class EventHandler>
773 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
774 {
775  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.empty());
776 
777  // it's not a scalar if it starts with any of these characters:
778  switch(s.str[0])
779  {
780  // these are all legal tokens which mean no scalar is starting:
781  case '[':
782  case ']':
783  case '{':
784  case '}':
785  case '!':
786  case '&':
787  case '*':
788  case '|':
789  case '>':
790  case '#':
791  case ',':
792  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
793  return false;
794  // '-' and ':' are illegal at the beginning if not followed by a scalar character
795  case '-':
796  case ':':
797  if(s.len > 1)
798  {
799  switch(s.str[1])
800  {
801  case ' ':
802  case '\n':
803  case '}':
804  case ']':
805  case '\r':
806  _RYML_WITH_TAB_TOKENS(case '\t':)
807  if(s.str[0] == ':')
808  {
809  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
810  return false;
811  }
812  break;
813  case '{':
814  case '[':
815  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
816  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
817  break;
818  default:
819  break;
820  }
821  }
822  else
823  {
824  return false;
825  }
826  break;
827  case '?':
828  if(s.len > 1)
829  {
830  switch(s.str[1])
831  {
832  case ' ':
833  case '\n':
834  case '\r':
835  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
836  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
837  return false;
838  case '{':
839  case '}':
840  case '[':
841  case ']':
842  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
843  break;
844  default:
845  break;
846  }
847  }
848  else
849  {
850  return false;
851  }
852  break;
853  // everything else is a legal starting character
854  default:
855  break;
856  }
857 
858  return true;
859 }
860 
861 template<class EventHandler>
862 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
863 {
864  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
865  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
866  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
867  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
868  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
869 
870  substr s = m_buf.sub(m_evt_handler->m_curr->pos.offset);
871  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
872  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
873 
874  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem));
875 
876  if(!s.len || !_is_valid_start_scalar_plain_flow(s))
877  return false;
878 
879  _c4dbgp("scanning seqflow scalar...");
880 
881  bool needs_filter = false;
882  size_t col = 0; // zero-based column
883  size_t offs = 0;
884  size_t offsp1;
885  for( ; offs < s.len; ++offs, ++col)
886  {
887  const char c = s.str[offs];
888  switch(c)
889  {
890  case ',':
891  case ']':
892  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
893  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
894  goto ended_scalar;
895  case '\n':
896  _c4dbgpf("found newline. offs={} col={}", offs, col);
897  offsp1 = offs + 1;
898  if(s.len > offsp1)
899  {
900  csubstr next_line = s.sub(offsp1).triml(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
901  if(next_line.begins_with_any(",]#")) // any of the characters we're interested in
902  {
903  _c4dbgpf("found terminating character beginning next line: '{}'", next_line.str[0]);
904  goto ended_scalar;
905  }
906  }
907  col = (size_t)-1; // so that col is 0 in the next loop iteration
908  needs_filter = true;
909  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
910  _line_ended();
911  _scan_line();
912  break;
913  case '\r':
914  --col; // don't count \r when calling _line_progressed()
915  needs_filter = true;
916  break;
917  case ':':
918  _c4dbgp("found suspicious ':'");
919  offsp1 = offs + 1;
920  if(s.len > offsp1)
921  {
922  char next = s.str[offsp1];
923  _c4dbgpf("next char is '{}'", _c4prc(next));
924  if(next == '\r')
925  {
926  csubstr after = s.sub(offsp1).triml('\r');
927  if(after.len)
928  {
929  next = after.str[0];
930  _c4dbgpf("skip \\r to '{}'", _c4prc(next));
931  }
932  }
933  // no else here.
934  if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t') || next == ',' || next == '\n' || next == ']')
935  {
936  _c4dbgp("map starting!");
937  goto ended_scalar;
938  }
939  else
940  {
941  _c4dbgp("':' nothing to see here");
942  }
943  }
944  else
945  {
946  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.len == offsp1);
947  _line_progressed(col);
948  _c4err("missing termination: '{}'", c); // noreturn
949  }
950  break;
951  case '#':
952  {
953  _c4dbgp("found suspicious '#'");
954  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
955  char prev = s.str[offs - 1];
956  if(prev == ' ' _RYML_WITH_TAB_TOKENS(|| prev == '\t'))
957  {
958  _c4dbgpf("found terminating character at {}: '{}'", offs, c);
959  goto ended_scalar;
960  }
961  }
962  break;
963  case '[':
964  case '{':
965  case '}':
966  _line_progressed(col);
967  _c4err("invalid character: '{}'", c); // noreturn
968  default:
969  ;
970  }
971  }
972 
973 ended_scalar:
974 
975  _line_progressed(col);
976  s = s.first(offs);
977  sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
978  sc->needs_filter = needs_filter;
979 
980  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
981 
982  return true;
983 }
984 
985 template<class EventHandler>
986 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
987 {
988  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
989  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
990  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
991  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
992  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
993 
994  substr s = m_evt_handler->m_curr->line_contents.rem;
995  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
996 
997  if(!s.len)
998  return false;
999 
1000  if(!_is_valid_start_scalar_plain_flow(s))
1001  return false;
1002 
1003  _c4dbgp("scanning scalar...");
1004 
1005  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1006  bool needs_filter = false;
1007  while(true)
1008  {
1009  for(size_t i = 0; i < s.len; ++i)
1010  {
1011  const char c = s.str[i];
1012  switch(c)
1013  {
1014  case ',':
1015  case '}':
1016  _line_progressed(i);
1017  _c4dbgpf("found terminating character: '{}'", c);
1018  goto ended_scalar;
1019  case ':':
1020  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
1021  {
1022  _line_progressed(i);
1023  _c4dbgpf("found terminating character: '{}'", c);
1024  goto ended_scalar;
1025  }
1026  break;
1027  case '{':
1028  case '[':
1029  _line_progressed(i);
1030  _c4err("invalid character: '{}'", c); // noreturn
1031  break;
1032  case ']':
1033  _line_progressed(i);
1034  if(has_any(RSEQIMAP))
1035  goto ended_scalar;
1036  else
1037  _c4err("invalid character: '{}'", c); // noreturn
1038  break;
1039  case '#':
1040  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
1041  {
1042  _line_progressed(i);
1043  _c4dbgpf("found terminating character: '{}'", c);
1044  goto ended_scalar;
1045  }
1046  break;
1047  default:
1048  ;
1049  }
1050  }
1051  _c4dbgp("next line!");
1052  _line_progressed(s.len);
1053  if(!_finished_file())
1054  {
1055  _c4dbgp("next line!");
1056  _line_ended();
1057  _scan_line();
1058  }
1059  else
1060  {
1061  _c4dbgp("file finished!");
1062  goto ended_scalar;
1063  }
1064  s = m_evt_handler->m_curr->line_contents.rem;
1065  needs_filter = true;
1066  }
1067 
1068 ended_scalar:
1069 
1070  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1071  sc->needs_filter = needs_filter;
1072 
1073  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1074 
1075  return sc->scalar.len > 0u;
1076 }
1077 
1078 template<class EventHandler>
1079 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1080 {
1081  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1082  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
1083  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1084  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
1085 
1086  substr s = m_evt_handler->m_curr->line_contents.rem;
1087  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1088 
1089  if(!s.len)
1090  return false;
1091 
1092  _c4dbgp("scanning scalar...");
1093 
1094  switch(s.str[0])
1095  {
1096  case ']':
1097  case '{':
1098  case ',':
1099  _c4dbgp("not a scalar.");
1100  return false;
1101  }
1102 
1103  {
1104  const size_t len = _is_special_json_scalar(s);
1105  if(len)
1106  {
1107  sc->scalar = s.first(len);
1108  sc->needs_filter = false;
1109  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1110  _line_progressed(len);
1111  return true;
1112  }
1113  }
1114 
1115  // must be a number
1116  size_t i = 0;
1117  for( ; i < s.len; ++i)
1118  {
1119  const char c = s.str[i];
1120  switch(c)
1121  {
1122  case ',':
1123  case ']':
1124  case ' ':
1125  case '\t':
1126  _c4dbgpf("found terminating character: '{}'", c);
1127  goto ended_scalar;
1128  case '#':
1129  if(!i || s.str[i-1] == ' ')
1130  {
1131  _c4dbgpf("found terminating character: '{}'", c);
1132  goto ended_scalar;
1133  }
1134  break;
1135  default:
1136  ;
1137  }
1138  }
1139 
1140 ended_scalar:
1141 
1142  if(C4_LIKELY(i > 0))
1143  {
1144  _line_progressed(i);
1145  sc->scalar = s.first(i);
1146  sc->needs_filter = false;
1147  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1148  return true;
1149  }
1150 
1151  return false;
1152 }
1153 
1154 template<class EventHandler>
1155 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1156 {
1157  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1158  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK));
1159  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1160  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW));
1161  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1162 
1163  substr s = m_evt_handler->m_curr->line_contents.rem;
1164  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1165 
1166  if(!s.len)
1167  return false;
1168 
1169  _c4dbgp("scanning scalar...");
1170 
1171  {
1172  const size_t len = _is_special_json_scalar(s);
1173  if(len)
1174  {
1175  sc->scalar = s.first(len);
1176  sc->needs_filter = false;
1177  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1178  _line_progressed(len);
1179  return true;
1180  }
1181  }
1182 
1183  // must be a number
1184  size_t i = 0;
1185  for( ; i < s.len; ++i)
1186  {
1187  const char c = s.str[i];
1188  switch(c)
1189  {
1190  case ',':
1191  case '}':
1192  case ' ':
1193  case '\t':
1194  _c4dbgpf("found terminating character: '{}'", c);
1195  goto ended_scalar;
1196  case '#':
1197  if(!i || s.str[i-1] == ' ')
1198  {
1199  _c4dbgpf("found terminating character: '{}'", c);
1200  goto ended_scalar;
1201  }
1202  break;
1203  default:
1204  ;
1205  }
1206  }
1207 
1208 ended_scalar:
1209 
1210  if(C4_LIKELY(i > 0))
1211  {
1212  _line_progressed(i);
1213  sc->scalar = s.first(i);
1214  sc->needs_filter = false;
1215  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1216  return true;
1217  }
1218 
1219  return false;
1220 }
1221 
1222 template<class EventHandler>
1223 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1224 {
1225  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1226  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1227 }
1228 
1229 template<class EventHandler>
1230 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1231 {
1232  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1233  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1234 }
1235 
1236 template<class EventHandler>
1237 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1238 {
1239  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1240  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1241  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY));
1242 
1243  substr s = m_evt_handler->m_curr->line_contents.rem;
1244  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1245 
1246  if(!s.len)
1247  return false;
1248 
1249  switch(s.str[0])
1250  {
1251  case '-':
1252  if(_is_blck_token(s))
1253  {
1254  return false;
1255  }
1256  else if(_is_doc_begin(s))
1257  {
1258  _c4dbgp("token is doc start");
1259  return false;
1260  }
1261  break;
1262  case ':':
1263  case '?':
1264  if(_is_blck_token(s))
1265  return false;
1266  break;
1267  case '[':
1268  case '{':
1269  case '&':
1270  case '*':
1271  case '!':
1272  _RYML_WITH_TAB_TOKENS(case '\t':)
1273  return false;
1274  case '.':
1275  if(_is_doc_end(s))
1276  {
1277  _c4dbgp("token is doc end");
1278  return false;
1279  }
1280  break;
1281  }
1282 
1283  _c4dbgpf("plain scalar! indentation={}", indentation);
1284 
1285  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1286  const size_t start_line = m_evt_handler->m_curr->pos.line;
1287 
1288  bool needs_filter = false;
1289  while(true)
1290  {
1291  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1292  for(size_t i = 0; i < s.len; ++i)
1293  {
1294  const char curr = s.str[i];
1295  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1296  switch(curr)
1297  {
1298  case ':':
1299  _c4dbgpf("[{}]: got suspicious ':'", i);
1300  // are there more characters?
1301  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1302  {
1303  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1304  _line_progressed(i);
1305  // ': ' is accepted only on the first line
1306  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1307  {
1308  _c4dbgp("start line. scalar ends here");
1309  goto ended_scalar;
1310  }
1311  else
1312  {
1313  _c4err("parse error");
1314  }
1315  }
1316  else
1317  {
1318  size_t j = i;
1319  while(j + 1 < s.len && s.str[j+1] == ':')
1320  {
1321  _c4dbgp("skip colon");
1322  ++j;
1323  }
1324  i = j > i ? j-1 : i;
1325  _c4dbgp("nothing to see here");
1326  }
1327  break;
1328  case '#':
1329  _c4dbgp("got suspicious '#'");
1330  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1331  {
1332  _c4dbgp("comment! scalar ends here");
1333  _line_progressed(i);
1334  goto ended_scalar;
1335  }
1336  else
1337  {
1338  _c4dbgp("nothing to see here");
1339  }
1340  break;
1341  }
1342  }
1343  _line_progressed(s.len);
1344  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1345  next_peeked = next_peeked.trimr("\n\r");
1346  const size_t next_indentation = next_peeked.first_not_of(' ');
1347  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1348  if(next_indentation < indentation)
1349  {
1350  _c4dbgp("smaller indentation! scalar ended");
1351  goto ended_scalar;
1352  }
1353  else if(next_indentation == 0 && next_peeked.len > 0)
1354  {
1355  const char first = next_peeked.str[0];
1356  switch(first)
1357  {
1358  case '-':
1359  next_peeked = next_peeked.trimr("\n\r");
1360  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1361  if(_is_doc_begin_token(next_peeked))
1362  {
1363  _c4dbgp("doc begin! scalar ended");
1364  goto ended_scalar;
1365  }
1366  break;
1367  case '.':
1368  next_peeked = next_peeked.trimr("\n\r");
1369  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1370  if(_is_doc_end_token(next_peeked))
1371  {
1372  _c4dbgp("doc end! scalar ended");
1373  goto ended_scalar;
1374  }
1375  break;
1376  }
1377  }
1378  // load with next line
1379  _c4dbgp("next line!");
1380  if(!_finished_file())
1381  {
1382  _c4dbgp("next line!");
1383  _line_ended();
1384  _scan_line();
1385  }
1386  else
1387  {
1388  _c4dbgp("file finished!");
1389  goto ended_scalar;
1390  }
1391  s = m_evt_handler->m_curr->line_contents.rem;
1392  needs_filter = true;
1393  }
1394 
1395 ended_scalar:
1396 
1397  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1398  sc->needs_filter = needs_filter;
1399 
1400  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1401 
1402  return true;
1403 }
1404 
1405 template<class EventHandler>
1406 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1407 {
1408  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1409  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1410  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1411  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1412  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK));
1413  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1414  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1415 }
1416 
1417 template<class EventHandler>
1418 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1419 {
1420  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1421  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1422  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1423  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK));
1424  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1425  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1426 }
1427 
1428 template<class EventHandler>
1429 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1430 {
1431  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1432  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1433 }
1434 
1435 
1436 //-----------------------------------------------------------------------------
1437 
1438 template<class EventHandler>
1439 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1440 {
1441  substr rem{}; // declare here because of the goto
1442  size_t nlpos{}; // declare here because of the goto
1443  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1444  if(pos >= m_buf.len)
1445  goto next_is_empty;
1446 
1447  // look for the next newline chars, and jump to the right of those
1448  rem = from_next_line(m_buf.sub(pos));
1449  if(rem.empty())
1450  goto next_is_empty;
1451 
1452  // now get everything up to and including the following newline chars
1453  nlpos = rem.first_of("\r\n");
1454  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1455  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1456  rem = rem.left_of(nlpos, /*include_pos*/true);
1457 
1458  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1459  return rem;
1460 
1461 next_is_empty:
1462  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1463  return {};
1464 }
1465 
1466 //-----------------------------------------------------------------------------
1467 
1468 template<class EventHandler>
1469 void ParseEngine<EventHandler>::_scan_line()
1470 {
1471  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1472  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1473  else
1474  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf.last(0), 0);
1475 }
1476 
1477 template<class EventHandler>
1478 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1479 {
1480  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1481  m_evt_handler->m_curr->pos.line,
1482  m_evt_handler->m_curr->line_contents.full.len,
1483  ahead, m_evt_handler->m_curr->pos.col,
1484  m_evt_handler->m_curr->pos.col+ahead,
1485  m_evt_handler->m_curr->pos.offset,
1486  m_evt_handler->m_curr->pos.offset+ahead);
1487  m_evt_handler->m_curr->pos.offset += ahead;
1488  m_evt_handler->m_curr->pos.col += ahead;
1489  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1);
1490  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1491 }
1492 
1493 template<class EventHandler>
1494 void ParseEngine<EventHandler>::_line_ended()
1495 {
1496  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1497  m_evt_handler->m_curr->pos.line,
1498  m_evt_handler->m_curr->line_contents.full.len,
1499  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1500  m_evt_handler->m_curr->pos.col, 1);
1501  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1);
1502  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1503  ++m_evt_handler->m_curr->pos.line;
1504  m_evt_handler->m_curr->pos.col = 1;
1505 }
1506 
1507 template<class EventHandler>
1508 void ParseEngine<EventHandler>::_line_ended_undo()
1509 {
1510  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1511  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1512  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols);
1513  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1514  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1515  m_evt_handler->m_curr->pos.offset -= delta;
1516  --m_evt_handler->m_curr->pos.line;
1517  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1518  // don't forget to undo also the changes to the remainder of the line
1519  //_RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
1520  m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1521 }
1522 
1523 
1524 //-----------------------------------------------------------------------------
1525 template<class EventHandler>
1526 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1527 {
1528  m_evt_handler->m_curr->indref = indentation;
1529  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1530 }
1531 
1532 template<class EventHandler>
1533 void ParseEngine<EventHandler>::_save_indentation()
1534 {
1535  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
1536  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1537  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1538 }
1539 
1540 
1541 //-----------------------------------------------------------------------------
1542 
1543 template<class EventHandler>
1544 void ParseEngine<EventHandler>::_end_map_flow()
1545 {
1546  bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1547  _c4dbgpf("mapflow: end, multiline={}", multiline);
1548  m_evt_handler->end_map_flow(multiline);
1549 }
1550 
1551 template<class EventHandler>
1552 void ParseEngine<EventHandler>::_end_seq_flow()
1553 {
1554  bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1555  _c4dbgpf("seqflow: end, multiline={}", multiline);
1556  m_evt_handler->end_seq_flow(multiline);
1557 }
1558 
1559 template<class EventHandler>
1560 void ParseEngine<EventHandler>::_end_map_blck()
1561 {
1562  _c4dbgp("mapblck: end");
1563  if(has_any(RKCL|RVAL))
1564  {
1565  _c4dbgp("mapblck: set missing val");
1566  _handle_annotations_before_blck_val_scalar();
1567  m_evt_handler->set_val_scalar_plain_empty();
1568  }
1569  else if(has_any(QMRK))
1570  {
1571  _c4dbgp("mapblck: set missing keyval");
1572  _handle_annotations_before_blck_key_scalar();
1573  m_evt_handler->set_key_scalar_plain_empty();
1574  _handle_annotations_before_blck_val_scalar();
1575  m_evt_handler->set_val_scalar_plain_empty();
1576  }
1577  m_evt_handler->end_map_block();
1578 }
1579 
1580 template<class EventHandler>
1581 void ParseEngine<EventHandler>::_end_seq_blck()
1582 {
1583  if(has_any(RVAL))
1584  {
1585  _c4dbgp("seqblck: set missing val");
1586  _handle_annotations_before_blck_val_scalar();
1587  m_evt_handler->set_val_scalar_plain_empty();
1588  }
1589  m_evt_handler->end_seq_block();
1590 }
1591 
1592 template<class EventHandler>
1593 void ParseEngine<EventHandler>::_end2_map()
1594 {
1595  _c4dbgp("map: end");
1596  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1597  if(has_any(RBLCK))
1598  {
1599  _end_map_blck();
1600  }
1601  else
1602  {
1603  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1604  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1605  m_evt_handler->_pop();
1606  }
1607 }
1608 
1609 template<class EventHandler>
1610 void ParseEngine<EventHandler>::_end2_seq()
1611 {
1612  _c4dbgp("seq: end");
1613  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1614  if(has_any(RBLCK))
1615  {
1616  _end_seq_blck();
1617  }
1618  else
1619  {
1620  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW));
1621  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1622  m_evt_handler->_pop();
1623  }
1624 }
1625 
1626 template<class EventHandler>
1627 void ParseEngine<EventHandler>::_begin2_doc()
1628 {
1629  _c4dbgp("begin_doc");
1630  m_doc_empty = true;
1631  add_flags(RDOC);
1632  m_evt_handler->begin_doc();
1633  m_evt_handler->m_curr->indref = 0; // ?
1634 }
1635 
1636 template<class EventHandler>
1637 void ParseEngine<EventHandler>::_begin2_doc_expl()
1638 {
1639  _c4dbgp("begin_doc_expl");
1640  m_doc_empty = true;
1641  add_flags(RDOC);
1642  m_evt_handler->begin_doc_expl();
1643  m_evt_handler->m_curr->indref = 0; // ?
1644 }
1645 
1646 template<class EventHandler>
1647 void ParseEngine<EventHandler>::_end2_doc()
1648 {
1649  _c4dbgp("doc: end");
1650  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1651  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1652  {
1653  _c4dbgp("doc was empty; add empty val");
1654  _handle_annotations_before_blck_val_scalar();
1655  m_evt_handler->set_val_scalar_plain_empty();
1656  }
1657  m_evt_handler->end_doc();
1658  m_bom_len = 0;
1659 }
1660 
1661 template<class EventHandler>
1662 void ParseEngine<EventHandler>::_end2_doc_expl()
1663 {
1664  _c4dbgp("doc: end");
1665  if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1666  {
1667  _c4dbgp("doc: no children; add empty val");
1668  _handle_annotations_before_blck_val_scalar();
1669  m_evt_handler->set_val_scalar_plain_empty();
1670  }
1671  m_evt_handler->end_doc_expl();
1672  m_bom_len = 0;
1673 }
1674 
1675 template<class EventHandler>
1676 void ParseEngine<EventHandler>::_maybe_begin_doc()
1677 {
1678  if(has_none(RDOC))
1679  {
1680  _c4dbgp("doc must be started");
1681  _begin2_doc();
1682  }
1683 }
1684 template<class EventHandler>
1685 void ParseEngine<EventHandler>::_maybe_end_doc()
1686 {
1687  if(has_any(RDOC))
1688  {
1689  _c4dbgp("doc must be finished");
1690  _end2_doc();
1691  }
1692  else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1693  {
1694  _c4dbgp("no doc to finish, but pending annotations");
1695  m_evt_handler->begin_doc();
1696  _handle_annotations_before_blck_val_scalar();
1697  m_evt_handler->set_val_scalar_plain_empty();
1698  m_evt_handler->end_doc();
1699  }
1700 }
1701 
1702 template<class EventHandler>
1703 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1704 {
1705  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1706  if(m_evt_handler->m_stack[0].flags & RDOC)
1707  {
1708  _c4dbgp("root is RDOC");
1709  if(m_evt_handler->m_curr->level != 0)
1710  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1711  }
1712  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1713  {
1714  _c4dbgp("root is STREAM");
1715  if(m_evt_handler->m_curr->level != 1)
1716  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1717  }
1718  else
1719  {
1720  _c4err("internal error");
1721  }
1722  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1723 }
1724 
1725 template<class EventHandler>
1726 void ParseEngine<EventHandler>::_end_doc_suddenly()
1727 {
1728  _c4dbgp("end doc suddenly");
1729  _end_doc_suddenly__pop();
1730  _end2_doc_expl();
1731  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1732 }
1733 
1734 template<class EventHandler>
1735 void ParseEngine<EventHandler>::_start_doc_suddenly()
1736 {
1737  _c4dbgp("start doc suddenly");
1738  _end_doc_suddenly__pop();
1739  _end2_doc();
1740  _begin2_doc_expl();
1741 }
1742 
1743 template<class EventHandler>
1744 void ParseEngine<EventHandler>::_end_stream()
1745 {
1746  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1747  if(has_all(RSEQ|RFLOW))
1748  _c4err("missing terminating ]");
1749  else if(has_all(RMAP|RFLOW))
1750  _c4err("missing terminating }");
1751  if(m_evt_handler->m_stack.size() > 1)
1752  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1753  if(has_all(RDOC))
1754  {
1755  _end2_doc();
1756  }
1757  else if(has_all(RTOP|RUNK))
1758  {
1759  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1760  {
1761  if(m_doc_empty)
1762  {
1763  m_evt_handler->begin_doc();
1764  _handle_annotations_before_blck_val_scalar();
1765  m_evt_handler->set_val_scalar_plain_empty();
1766  m_evt_handler->end_doc();
1767  }
1768  }
1769  }
1770  m_evt_handler->end_stream();
1771 }
1772 
1773 
1774 template<class EventHandler>
1775 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1776 {
1777  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1778  while(m_evt_handler->m_curr != popto)
1779  {
1780  if(has_any(RSEQ))
1781  {
1782  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1783  _end2_seq();
1784  }
1785  else if(has_any(RMAP))
1786  {
1787  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1788  _end2_map();
1789  }
1790  else
1791  {
1792  break;
1793  }
1794  }
1795  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1796 }
1797 
1798 template<class EventHandler>
1799 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1800 {
1801  // search the stack frame to jump to based on its indentation
1802  using state_type = typename EventHandler::state;
1803  state_type const* popto = nullptr;
1804  auto &stack = m_evt_handler->m_stack;
1805  _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1806  _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1807  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1808  #ifdef RYML_DBG
1809  if(_dbg_enabled())
1810  {
1811  char flagbuf_[128];
1812  for(state_type const& s : stack)
1813  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1814  }
1815  #endif
1816  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1817  {
1818  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1819  if(s->indref == ind)
1820  {
1821  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1822  popto = s;
1823  break;
1824  }
1825  }
1826  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1827  {
1828  _c4err("parse error: incorrect indentation?");
1829  }
1830  _handle_indentation_pop(popto);
1831 }
1832 
1833 template<class EventHandler>
1834 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1835 {
1836  // search the stack frame to jump to based on its indentation
1837  using state_type = typename EventHandler::state;
1838  auto &stack = m_evt_handler->m_stack;
1839  _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1840  _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1841  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1842  state_type const* popto = nullptr;
1843  #ifdef RYML_DBG
1844  char flagbuf_[128];
1845  if(_dbg_enabled())
1846  {
1847  for(state_type const& s : stack)
1848  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1849  }
1850  #endif
1851  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1852  {
1853  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1854  if(s->indref < ind)
1855  {
1856  break;
1857  }
1858  else if(s->indref == ind)
1859  {
1860  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1861  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1862  {
1863  break;
1864  }
1865  popto = s;
1866  if(has_all(RSEQ|RBLCK, s))
1867  {
1868  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1869  const size_t first = rem.first_not_of(' ');
1870  _RYML_ASSERT_BASIC_(stack.m_callbacks, first == ind || first == npos);
1871  rem = rem.right_of(first, true);
1872  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1873  if(rem.begins_with('-') && _is_blck_token(rem))
1874  {
1875  _c4dbgp("parent was indentless seq");
1876  break;
1877  }
1878  }
1879  }
1880  }
1881  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1882  {
1883  _c4err("parse error: incorrect indentation?");
1884  }
1885  _handle_indentation_pop(popto);
1886 }
1887 
1888 
1889 //-----------------------------------------------------------------------------
1890 template<class EventHandler>
1891 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1892 {
1893  // quoted scalars can spread over multiple lines!
1894  // nice explanation here: http://yaml-multiline.info/
1895 
1896  // a span to the end of the file
1897  size_t b = m_evt_handler->m_curr->pos.offset;
1898  substr s = m_buf.sub(b);
1899  if(s.begins_with(' '))
1900  {
1901  s = s.triml(' ');
1902  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1903  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1904  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1905  }
1906  b = m_evt_handler->m_curr->pos.offset; // take this into account
1907  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1908 
1909  // skip the opening quote
1910  _line_progressed(1);
1911  s = s.sub(1);
1912 
1913  bool needs_filter = false;
1914 
1915  size_t numlines = 1; // we already have one line
1916  size_t pos = npos; // find the pos of the matching quote
1917  while( ! _finished_file())
1918  {
1919  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1920  bool line_is_blank = true;
1921  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1922  for(size_t i = 0; i < line.len; ++i)
1923  {
1924  const char curr = line.str[i];
1925  if(curr == '\'') // single quotes are escaped with two single quotes
1926  {
1927  const char next = i+1 < line.len ? line.str[i+1] : '~';
1928  if(next != '\'') // so just look for the first quote
1929  { // without another after it
1930  pos = i;
1931  break;
1932  }
1933  else
1934  {
1935  needs_filter = true; // needs filter to remove escaped quotes
1936  ++i; // skip the escaped quote
1937  }
1938  }
1939  else if(curr != ' ')
1940  {
1941  line_is_blank = false;
1942  }
1943  }
1944 
1945  // leading whitespace also needs filtering
1946  needs_filter = needs_filter
1947  || (numlines > 1)
1948  || line_is_blank
1949  || (_at_line_begin() && line.begins_with(' '));
1950 
1951  if(pos == npos)
1952  {
1953  _line_progressed(line.len);
1954  ++numlines;
1955  }
1956  else
1957  {
1958  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1959  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
1960  _line_progressed(pos + 1); // progress beyond the quote
1961  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1962  break;
1963  }
1964 
1965  _line_ended();
1966  _scan_line();
1967  }
1968 
1969  if(pos == npos)
1970  {
1971  _c4err("reached end of file while looking for closing quote");
1972  }
1973  else
1974  {
1975  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
1976  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1977  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
1978  s = s.sub(0, pos-1);
1979  }
1980 
1981  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
1982 
1983  return ScannedScalar { s, needs_filter };
1984 }
1985 
1986 
1987 //-----------------------------------------------------------------------------
1988 template<class EventHandler>
1989 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1990 {
1991  // quoted scalars can spread over multiple lines!
1992  // nice explanation here: http://yaml-multiline.info/
1993 
1994  // a span to the end of the file
1995  size_t b = m_evt_handler->m_curr->pos.offset;
1996  substr s = m_buf.sub(b);
1997  if(s.begins_with(' '))
1998  {
1999  s = s.triml(' ');
2000  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
2001  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
2002  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
2003  }
2004  b = m_evt_handler->m_curr->pos.offset; // take this into account
2005  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
2006 
2007  // skip the opening quote
2008  _line_progressed(1);
2009  s = s.sub(1);
2010 
2011  bool needs_filter = false;
2012 
2013  size_t numlines = 1; // we already have one line
2014  size_t pos = npos; // find the pos of the matching quote
2015  auto *st = m_evt_handler->m_curr; // prevent erroneous hoist of the assignment out of the loop
2016  while( ! _finished_file())
2017  {
2018  const csubstr line = st->line_contents.rem;
2019  #if defined(__GNUC__) && (__GNUC__ == 11 || __GNUC__ == 8)
2020  C4_DONT_OPTIMIZE(line); // prevent erroneous hoist of the assignment out of the loop
2021  #endif
2022  bool line_is_blank = true;
2023  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", st->pos.line, line);
2024  for(size_t i = 0; i < line.len; ++i)
2025  {
2026  const char curr = line.str[i];
2027  if(curr != ' ')
2028  line_is_blank = false;
2029  // every \ is an escape
2030  if(curr == '\\')
2031  {
2032  const char next = i+1 < line.len ? line.str[i+1] : '~';
2033  needs_filter = true;
2034  if(next == '"' || next == '\\')
2035  ++i;
2036  }
2037  else if(curr == '"')
2038  {
2039  pos = i;
2040  break;
2041  }
2042  }
2043 
2044  // leading whitespace also needs filtering
2045  needs_filter = needs_filter
2046  || (numlines > 1)
2047  || line_is_blank
2048  || (_at_line_begin() && line.begins_with(' '));
2049 
2050  if(pos == npos)
2051  {
2052  _line_progressed(line.len);
2053  ++numlines;
2054  }
2055  else
2056  {
2057  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2058  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[st->pos.offset + pos] == '"');
2059  _line_progressed(pos + 1); // progress beyond the quote
2060  pos = st->pos.offset - b - 1; // but we stop before it
2061  break;
2062  }
2063 
2064  _line_ended();
2065  _scan_line();
2066  }
2067 
2068  if(pos == npos)
2069  {
2070  _c4err("reached end of file looking for closing quote");
2071  }
2072  else
2073  {
2074  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
2075  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
2076  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2077  s = s.sub(0, pos-1);
2078  }
2079 
2080  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2081 
2082  return ScannedScalar{s, needs_filter};
2083 }
2084 
2085 
2086 //-----------------------------------------------------------------------------
2087 template<class EventHandler>
2088 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2089 {
2090  _c4dbgpf("blck: indref={}", indref);
2091  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, indref != npos);
2092 
2093  // nice explanation here: http://yaml-multiline.info/
2094  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2095  csubstr trimmed = s.triml(' ');
2096  if(trimmed.str > s.str)
2097  {
2098  _c4dbgp("skipping whitespace");
2099  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2100  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2101  s = trimmed;
2102  }
2103  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2104 
2105  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2106 
2107  // parse the spec
2108  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2109  size_t indentation = npos; // have to find out if no spec is given
2110  csubstr digits;
2111  if(s.len > 1)
2112  {
2113  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2114  csubstr t = s.sub(1);
2115  _c4dbgpf("blck: spec is multichar: '{}'", t);
2116  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2117  size_t pos = t.first_of("-+");
2118  _c4dbgpf("blck: spec chomp char at {}", pos);
2119  if(pos != npos)
2120  {
2121  if(t[pos] == '-')
2122  chomp = CHOMP_STRIP;
2123  else if(t[pos] == '+')
2124  chomp = CHOMP_KEEP;
2125  if(pos == 0)
2126  t = t.sub(1);
2127  else
2128  t = t.first(pos);
2129  }
2130  // from here to the end, only digits are considered
2131  digits = t.left_of(t.first_not_of("0123456789"));
2132  if( ! digits.empty())
2133  {
2134  if(C4_UNLIKELY(digits.len > 1))
2135  _c4err("parse error: invalid indentation");
2136  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2137  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2138  _c4err("parse error: could not read indentation as decimal");
2139  if(C4_UNLIKELY( ! indentation))
2140  _c4err("parse error: null indentation");
2141  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2142  indentation += m_evt_handler->m_curr->indref;
2143  }
2144  }
2145 
2146  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2147 
2148  // finish the current line
2149  _line_progressed(s.len);
2150  _line_ended();
2151  _scan_line();
2152 
2153  // start with a zero-length block, already pointing at the right place
2154  substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
2155  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str);
2156 
2157  // read every full line into a raw block,
2158  // from which newlines are to be stripped as needed.
2159  //
2160  // If no explicit indentation was given, pick it from the first
2161  // non-empty line. See
2162  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2163  size_t num_lines = 0;
2164  size_t first = m_evt_handler->m_curr->pos.line;
2165  size_t provisional_indentation = npos;
2166  LineContents lc;
2167  while(( ! _finished_file()))
2168  {
2169  // peek next line, but do not advance immediately
2170  lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2171  #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2172  C4_DONT_OPTIMIZE(lc.rem);
2173  #endif
2174  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.rem.trimr("\r\n").len, lc.rem.trimr("\r\n"));
2175  // evaluate termination conditions
2176  if(indentation != npos)
2177  {
2178  _c4dbgpf("blck: indentation={}", indentation);
2179  // stop when the line is deindented and not empty
2180  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2181  {
2182  if(raw_block.len)
2183  {
2184  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2185  }
2186  else
2187  {
2188  _c4err("indentation decreased without any scalar");
2189  }
2190  break;
2191  }
2192  else if(indentation == 0)
2193  {
2194  _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2195  if(_is_doc_token(lc.rem))
2196  {
2197  _c4dbgp("blck: stop. indentation=0 and doc ended");
2198  break;
2199  }
2200  }
2201  }
2202  else
2203  {
2204  const size_t fns = lc.rem.first_not_of(' ');
2205  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2206  if(fns != npos) // non-empty line
2207  {
2209  if(C4_UNLIKELY(lc.full.begins_with('\t')))
2210  _c4err("parse error");
2211  )
2212  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2213  if(provisional_indentation == npos)
2214  {
2215  if(lc.indentation < indref)
2216  {
2217  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2218  if(raw_block.len == 0)
2219  {
2220  _c4dbgp("blck: was empty, undo next line");
2221  _line_ended_undo();
2222  }
2223  break;
2224  }
2225  else if(lc.indentation == m_evt_handler->m_curr->indref)
2226  {
2227  if(has_any(RSEQ|RMAP))
2228  {
2229  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2230  break;
2231  }
2232  }
2233  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2234  indentation = lc.indentation;
2235  }
2236  else
2237  {
2238  if(lc.indentation >= provisional_indentation)
2239  {
2240  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2241  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2242  indentation = lc.indentation;
2243  }
2244  else
2245  {
2246  break;
2247  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2248  }
2249  }
2250  }
2251  else // empty line
2252  {
2253  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2254  if(provisional_indentation != npos)
2255  {
2256  if(lc.rem.len >= provisional_indentation)
2257  {
2258  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2259  provisional_indentation = lc.rem.len;
2260  }
2261  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2262  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2263  {
2264  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2265  provisional_indentation = lc.indentation;
2266  }
2267  #endif
2268  }
2269  else
2270  {
2271  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2272  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2273  if(provisional_indentation == npos)
2274  {
2275  provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2276  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2277  }
2278  if(provisional_indentation < indref)
2279  {
2280  provisional_indentation = indref;
2281  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2282  }
2283  }
2284  }
2285  }
2286  // advance now that we know the folded scalar continues
2287  m_evt_handler->m_curr->line_contents = lc;
2288  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2289  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2290  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2291  _line_ended();
2292  ++num_lines;
2293  }
2294  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2295  C4_UNUSED(num_lines);
2296  C4_UNUSED(first);
2297 
2298  if(indentation == npos)
2299  {
2300  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2301  indentation = provisional_indentation;
2302  }
2303 
2304  if(num_lines)
2305  _line_ended_undo();
2306 
2307  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2308 
2309  sb->scalar = raw_block;
2310  sb->indentation = indentation;
2311  sb->chomp = chomp;
2312 }
2313 
2314 
2315 //-----------------------------------------------------------------------------
2316 //-----------------------------------------------------------------------------
2317 //-----------------------------------------------------------------------------
2318 /** @cond dev */
2319 
2320 // a debugging scaffold:
2321 #if 0
2322 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2323 #else
2324 #define _c4dbgfws(...)
2325 #endif
2326 
2327 template<class EventHandler>
2328 template<class FilterProcessor>
2329 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2330 {
2331  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2332  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2333 
2334  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2335  if(first_pos != npos)
2336  {
2337  const char first_char = proc.src[first_pos];
2338  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2339  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2340  {
2341  _c4dbgfws("whitespace is trailing on line", "");
2342  proc.skip(first_pos - proc.rpos);
2343  }
2344  else // a legit whitespace
2345  {
2346  proc.copy();
2347  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2348  }
2349  return true;
2350  }
2351  _c4dbgfws("whitespace is trailing on line", "");
2352  return false;
2353 }
2354 
2355 template<class EventHandler>
2356 template<class FilterProcessor>
2357 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2358 {
2359  if(!_filter_ws_handle_to_first_non_space(proc))
2360  {
2361  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2362  proc.copy(proc.src.len - proc.rpos);
2363  }
2364 }
2365 
2366 template<class EventHandler>
2367 template<class FilterProcessor>
2368 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2369 {
2370  if(!_filter_ws_handle_to_first_non_space(proc))
2371  {
2372  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2373  proc.skip(proc.src.len - proc.rpos);
2374  }
2375 }
2376 
2377 #undef _c4dbgfws
2378 
2379 
2380 //-----------------------------------------------------------------------------
2381 //-----------------------------------------------------------------------------
2382 //-----------------------------------------------------------------------------
2383 /* plain scalars */
2384 
2385 // a debugging scaffold:
2386 #if 0
2387 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2388 #else
2389 #define _c4dbgfps(fmt, ...)
2390 #endif
2391 
2392 template<class EventHandler>
2393 template<class FilterProcessor>
2394 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2395 {
2396  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
2397 
2398  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2399  size_t ii = proc.rpos;
2400  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2401  if(numnl_following)
2402  {
2403  proc.set('\n', numnl_following);
2404  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2405  }
2406  else
2407  {
2408  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2409  if(ret != npos)
2410  {
2411  proc.set(' ');
2412  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2413  }
2414  else
2415  {
2416  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2417  ii = proc.src.len;
2418  }
2419  }
2420  proc.rpos = ii;
2421 }
2422 
2423 template<class EventHandler>
2424 template<class FilterProcessor>
2425 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2426 {
2427  _RYML_ASSERT_BASIC_(this->callbacks(), indentation != npos);
2428  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2429 
2430  while(proc.has_more_chars())
2431  {
2432  const char curr = proc.curr();
2433  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2434  switch(curr)
2435  {
2436  case ' ':
2437  _RYML_WITH_TAB_TOKENS(case '\t':)
2438  _c4dbgfps("whitespace", curr);
2439  _filter_ws_skip_trailing(proc);
2440  break;
2441  case '\n':
2442  _c4dbgfps("newline", curr);
2443  _filter_nl_plain(proc, /*indentation*/indentation);
2444  break;
2445  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2446  _c4dbgfps("carriage return, ignore", curr);
2447  proc.skip();
2448  break;
2449  default:
2450  proc.copy();
2451  break;
2452  }
2453  }
2454 
2455  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2456 
2457  return proc.result();
2458 }
2459 
2460 #undef _c4dbgfps
2461 
2462 
2463 template<class EventHandler>
2464 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2465 {
2466  FilterProcessorSrcDst proc(scalar, dst);
2467  return _filter_plain(proc, indentation);
2468 }
2469 
2470 template<class EventHandler>
2471 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2472 {
2473  FilterProcessorInplaceEndExtending proc(dst, cap);
2474  return _filter_plain(proc, indentation);
2475 }
2476 
2477 
2478 //-----------------------------------------------------------------------------
2479 //-----------------------------------------------------------------------------
2480 //-----------------------------------------------------------------------------
2481 /* single quoted */
2482 
2483 // a debugging scaffold:
2484 #if 0
2485 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2486 #else
2487 #define _c4dbgfsq(fmt, ...)
2488 #endif
2489 
2490 template<class EventHandler>
2491 template<class FilterProcessor>
2492 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2493 {
2494  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
2495 
2496  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2497  size_t ii = proc.rpos;
2498  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2499  if(numnl_following)
2500  {
2501  proc.set('\n', numnl_following);
2502  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2503  }
2504  else
2505  {
2506  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2507  if(ret != npos)
2508  {
2509  proc.set(' ');
2510  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2511  }
2512  else
2513  {
2514  proc.set(' ');
2515  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2516  }
2517  }
2518  proc.rpos = ii;
2519 }
2520 
2521 template<class EventHandler>
2522 template<class FilterProcessor>
2523 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2524 {
2525  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2526 
2527  // from the YAML spec for double-quoted scalars:
2528  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2529  while(proc.has_more_chars())
2530  {
2531  const char curr = proc.curr();
2532  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2533  switch(curr)
2534  {
2535  case ' ':
2536  case '\t':
2537  _c4dbgfsq("whitespace", curr);
2538  _filter_ws_copy_trailing(proc);
2539  break;
2540  case '\n':
2541  _c4dbgfsq("newline", curr);
2542  _filter_nl_squoted(proc);
2543  break;
2544  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2545  _c4dbgfsq("skip cr", curr);
2546  proc.skip();
2547  break;
2548  case '\'':
2549  _c4dbgfsq("squote", curr);
2550  if(proc.next() == '\'')
2551  {
2552  _c4dbgfsq("two consecutive squotes", curr);
2553  proc.skip();
2554  proc.copy();
2555  }
2556  else
2557  {
2558  _c4err("filter error");
2559  }
2560  break;
2561  default:
2562  proc.copy();
2563  break;
2564  }
2565  }
2566 
2567  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2568 
2569  return proc.result();
2570 }
2571 
2572 #undef _c4dbgfsq
2573 
2574 template<class EventHandler>
2575 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2576 {
2577  FilterProcessorSrcDst proc(scalar, dst);
2578  return _filter_squoted(proc);
2579 }
2580 
2581 template<class EventHandler>
2582 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
2583 {
2584  FilterProcessorInplaceEndExtending proc(dst, cap);
2585  return _filter_squoted(proc);
2586 }
2587 
2588 
2589 //-----------------------------------------------------------------------------
2590 //-----------------------------------------------------------------------------
2591 //-----------------------------------------------------------------------------
2592 /* double quoted */
2593 
2594 // a debugging scaffold:
2595 #if 0
2596 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2597 #else
2598 #define _c4dbgfdq(...)
2599 #endif
2600 
2601 template<class EventHandler>
2602 template<class FilterProcessor>
2603 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2604 {
2605  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
2606 
2607  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2608  size_t ii = proc.rpos;
2609  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2610  if(numnl_following)
2611  {
2612  proc.set('\n', numnl_following);
2613  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2614  }
2615  else
2616  {
2617  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2618  if(ret != npos)
2619  {
2620  proc.set(' ');
2621  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2622  }
2623  else
2624  {
2625  proc.set(' ');
2626  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2627  }
2628  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2629  {
2630  _c4dbgfdq("backslash at [{}]", ii);
2631  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2632  if(next == ' ' || next == '\t')
2633  {
2634  _c4dbgfdq("extend skip to backslash", "");
2635  ++ii;
2636  }
2637  }
2638  }
2639  proc.rpos = ii;
2640 }
2641 
2642 template<class EventHandler>
2643 template<class FilterProcessor>
2644 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2645 {
2646  char next = proc.next();
2647  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2648  if(next == '\r')
2649  {
2650  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2651  {
2652  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2653  next = '\n';
2654  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2655  }
2656  }
2657 
2658  if(next == '\n')
2659  {
2660  size_t ii = proc.rpos + 2;
2661  for( ; ii < proc.src.len; ++ii)
2662  {
2663  // skip leading whitespace
2664  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2665  ;
2666  else
2667  break;
2668  }
2669  proc.skip(ii - proc.rpos);
2670  }
2671  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2672  {
2673  // escapes for json compatibility
2674  proc.translate_esc(next);
2675  _c4dbgfdq("here, used '{}'", _c4prc(next));
2676  }
2677  else if(next == '\r')
2678  {
2679  proc.skip();
2680  }
2681  else if(next == 'n')
2682  {
2683  proc.translate_esc('\n');
2684  }
2685  else if(next == 'r')
2686  {
2687  proc.translate_esc('\r');
2688  }
2689  else if(next == 't')
2690  {
2691  proc.translate_esc('\t');
2692  }
2693  else if(next == '\\')
2694  {
2695  proc.translate_esc('\\');
2696  }
2697  else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2698  {
2699  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2700  _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2701  char readbuf[8];
2702  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2703  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2704  uint32_t codepoint_val = {};
2705  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2706  _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2707  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2708  if(C4_UNLIKELY(numbytes == 0))
2709  _c4err("failed to decode code point={}", proc.rpos);
2710  _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2711  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/3u);
2712  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2713  }
2714  else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2715  {
2716  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2717  _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2718  char readbuf[8];
2719  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2720  uint32_t codepoint_val = {};
2721  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2722  _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2723  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2724  if(C4_UNLIKELY(numbytes == 0))
2725  _c4err("failed to decode code point={}", proc.rpos);
2726  _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2727  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2728  }
2729  else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2730  {
2731  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2732  _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2733  char readbuf[8];
2734  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2735  uint32_t codepoint_val = {};
2736  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2737  _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2738  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2739  if(C4_UNLIKELY(numbytes == 0))
2740  _c4err("failed to decode code point={}", proc.rpos);
2741  _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2742  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2743  }
2744  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2745  else if(next == '0')
2746  {
2747  proc.translate_esc('\0');
2748  }
2749  else if(next == 'b') // backspace
2750  {
2751  proc.translate_esc('\b');
2752  }
2753  else if(next == 'f') // form feed
2754  {
2755  proc.translate_esc('\f');
2756  }
2757  else if(next == 'a') // bell character
2758  {
2759  proc.translate_esc('\a');
2760  }
2761  else if(next == 'v') // vertical tab
2762  {
2763  proc.translate_esc('\v');
2764  }
2765  else if(next == 'e') // escape character
2766  {
2767  proc.translate_esc('\x1b');
2768  }
2769  else if(next == '_') // unicode non breaking space \u00a0
2770  {
2771  // https://www.compart.com/en/unicode/U+00a0
2772  const char payload[] = {
2773  _RYML_CHCONST(-0x3e, 0xc2),
2774  _RYML_CHCONST(-0x60, 0xa0),
2775  };
2776  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2777  }
2778  else if(next == 'N') // unicode next line \u0085
2779  {
2780  // https://www.compart.com/en/unicode/U+0085
2781  const char payload[] = {
2782  _RYML_CHCONST(-0x3e, 0xc2),
2783  _RYML_CHCONST(-0x7b, 0x85),
2784  };
2785  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2786  }
2787  else if(next == 'L') // unicode line separator \u2028
2788  {
2789  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2790  const char payload[] = {
2791  _RYML_CHCONST(-0x1e, 0xe2),
2792  _RYML_CHCONST(-0x80, 0x80),
2793  _RYML_CHCONST(-0x58, 0xa8),
2794  };
2795  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2796  }
2797  else if(next == 'P') // unicode paragraph separator \u2029
2798  {
2799  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2800  const char payload[] = {
2801  _RYML_CHCONST(-0x1e, 0xe2),
2802  _RYML_CHCONST(-0x80, 0x80),
2803  _RYML_CHCONST(-0x57, 0xa9),
2804  };
2805  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2806  }
2807  else if(next == '\0')
2808  {
2809  proc.skip();
2810  }
2811  else
2812  {
2813  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2814  }
2815  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2816 }
2817 
2818 
2819 template<class EventHandler>
2820 template<class FilterProcessor>
2821 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2822 {
2823  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2824  // from the YAML spec for double-quoted scalars:
2825  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2826  while(proc.has_more_chars())
2827  {
2828  const char curr = proc.curr();
2829  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2830  switch(curr)
2831  {
2832  case ' ':
2833  case '\t':
2834  {
2835  _c4dbgfdq("whitespace", curr);
2836  _filter_ws_copy_trailing(proc);
2837  break;
2838  }
2839  case '\n':
2840  {
2841  _c4dbgfdq("newline", curr);
2842  _filter_nl_dquoted(proc);
2843  break;
2844  }
2845  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2846  {
2847  _c4dbgfdq("carriage return, ignore", curr);
2848  proc.skip();
2849  break;
2850  }
2851  case '\\':
2852  {
2853  _filter_dquoted_backslash(proc);
2854  break;
2855  }
2856  default:
2857  {
2858  proc.copy();
2859  break;
2860  }
2861  }
2862  }
2863  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2864  return proc.result();
2865 }
2866 
2867 #undef _c4dbgfdq
2868 
2869 
2870 template<class EventHandler>
2871 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2872 {
2873  FilterProcessorSrcDst proc(scalar, dst);
2874  return _filter_dquoted(proc);
2875 }
2876 
2877 template<class EventHandler>
2878 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2879 {
2880  FilterProcessorInplaceMidExtending proc(dst, cap);
2881  return _filter_dquoted(proc);
2882 }
2883 
2884 
2885 //-----------------------------------------------------------------------------
2886 //-----------------------------------------------------------------------------
2887 //-----------------------------------------------------------------------------
2888 // block filtering helpers
2889 
2890 C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
2891 {
2892  if(indentation + 1 > s.len)
2893  return npos;
2894  for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
2895  {
2896  if(s.str[i] == '\n')
2897  {
2898  csubstr rem = s.sub(i + 1);
2899  size_t first = rem.first_not_of(' ');
2900  first = (first != npos) ? first : rem.len;
2901  if(first > indentation)
2902  return i;
2903  }
2904  }
2905  return npos;
2906 }
2907 
2908 template<class EventHandler>
2909 template<class FilterProcessor>
2910 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2911 {
2912  _RYML_ASSERT_BASIC_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2913  _RYML_ASSERT_BASIC_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2914 
2915  // a debugging scaffold:
2916  #if 0
2917  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2918  #else
2919  #define _c4dbgchomp(...)
2920  #endif
2921 
2922  // advance to the last line having spaces beyond the indentation
2923  {
2924  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2925  if(last != npos)
2926  {
2927  _c4dbgchomp("found newline and larger indentation. last={}", last);
2928  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2929  _RYML_ASSERT_BASIC_(this->callbacks(), last <= proc.src.len);
2930  // remove indentation spaces, copy the rest
2931  while((proc.rpos < last) && proc.has_more_chars())
2932  {
2933  const char curr = proc.curr();
2934  _c4dbgchomp("curr='{}'", _c4prc(curr));
2935  switch(curr)
2936  {
2937  case '\n':
2938  {
2939  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2940  proc.copy();
2941  // are there spaces after the newline?
2942  csubstr at_next_line = proc.rem();
2943  if(at_next_line.begins_with(' '))
2944  {
2945  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2946  // there are spaces.
2947  size_t first_non_space = at_next_line.first_not_of(' ');
2948  _c4dbgchomp("first_non_space={}", first_non_space);
2949  if(first_non_space == npos)
2950  {
2951  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2952  first_non_space = at_next_line.len;
2953  }
2954  if(first_non_space <= indentation)
2955  {
2956  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2957  proc.skip(first_non_space);
2958  }
2959  else
2960  {
2961  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2962  proc.skip(indentation);
2963  // copy the spaces after the indentation
2964  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2965  proc.copy(first_non_space - indentation);
2966  }
2967  }
2968  break;
2969  }
2970  case '\r':
2971  proc.skip();
2972  break;
2973  default:
2974  _c4err("parse error");
2975  break;
2976  }
2977  }
2978  }
2979  }
2980 
2981  // from now on, we only have line ends (or indentation spaces)
2982  switch(chomp)
2983  {
2984  case CHOMP_CLIP:
2985  {
2986  bool had_one = false;
2987  while(proc.has_more_chars())
2988  {
2989  const char curr = proc.curr();
2990  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
2991  switch(curr)
2992  {
2993  case '\n':
2994  {
2995  _c4dbgchomp("copy newline!", curr);
2996  proc.copy();
2997  proc.set_at_end();
2998  had_one = true;
2999  break;
3000  }
3001  case ' ':
3002  case '\r':
3003  _c4dbgchomp("skip!", curr);
3004  proc.skip();
3005  break;
3006  }
3007  }
3008  if(!had_one) // there were no newline characters. add one.
3009  {
3010  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3011  proc.set('\n');
3012  }
3013  break;
3014  }
3015  case CHOMP_KEEP:
3016  {
3017  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3018  while(proc.has_more_chars())
3019  {
3020  const char curr = proc.curr();
3021  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3022  switch(curr)
3023  {
3024  case '\n':
3025  _c4dbgchomp("copy newline!", curr);
3026  proc.copy();
3027  break;
3028  case ' ':
3029  case '\r':
3030  _c4dbgchomp("skip!", curr);
3031  proc.skip();
3032  break;
3033  }
3034  }
3035  break;
3036  }
3037  case CHOMP_STRIP:
3038  {
3039  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3040  // nothing to do!
3041  break;
3042  }
3043  }
3044 
3045  #undef _c4dbgchomp
3046 }
3047 
3048 
3049 // a debugging scaffold:
3050 #if 0
3051 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3052 #else
3053 #define _c4dbgfb(...)
3054 #endif
3055 
3056 template<class EventHandler>
3057 template<class FilterProcessor>
3058 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3059 {
3060  csubstr rem = proc.rem(); // remaining
3061  if(rem.len)
3062  {
3063  size_t first = rem.first_not_of(' ');
3064  if(first != npos)
3065  {
3066  _c4dbgfb("{} spaces follow before next nonws character", first);
3067  if(first < indentation)
3068  {
3069  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3070  proc.skip(first);
3071  }
3072  else
3073  {
3074  _c4dbgfb("skip {} spaces from indentation", indentation);
3075  proc.skip(indentation);
3076  }
3077  }
3078  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3079  else
3080  {
3081  _c4dbgfb("all spaces to the end: {} spaces", first);
3082  first = rem.len;
3083  if(first)
3084  {
3085  if(first < indentation)
3086  {
3087  _c4dbgfb("skip everything", first);
3088  proc.skip(proc.src.len - proc.rpos);
3089  }
3090  else
3091  {
3092  _c4dbgfb("skip {} spaces from indentation", indentation);
3093  proc.skip(indentation);
3094  }
3095  }
3096  }
3097  #endif
3098  }
3099 }
3100 
3101 template<class EventHandler>
3102 template<class FilterProcessor>
3103 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3104 {
3105  csubstr contents = proc.src.trimr(" \n\r");
3106  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3107  if(!contents.len)
3108  {
3109  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3110  if(chomp == CHOMP_KEEP && proc.src.len)
3111  {
3112  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3113  while(proc.has_more_chars())
3114  {
3115  const char curr = proc.curr();
3116  if(curr == '\n')
3117  proc.copy();
3118  else
3119  proc.skip();
3120  }
3121  if(!proc.wpos)
3122  {
3123  proc.set('\n');
3124  }
3125  }
3126  }
3127  return contents.len;
3128 }
3129 
3130 template<class EventHandler>
3131 template<class FilterProcessor>
3132 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3133 {
3134  _c4dbgfb("contents_len={}", contents_len);
3135 
3136  _RYML_ASSERT_BASIC_(this->callbacks(), contents_len > 0u);
3137 
3138  // extend contents to just before the first newline at the end,
3139  // in case it is preceded by spaces
3140  size_t firstnewl = proc.src.first_of('\n', contents_len);
3141  if(firstnewl != npos)
3142  {
3143  contents_len = firstnewl;
3144  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3145  }
3146  else
3147  {
3148  contents_len = proc.src.len;
3149  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3150  }
3151 
3152  return contents_len;
3153 }
3154 
3155 #undef _c4dbgfb
3156 
3157 
3158 //-----------------------------------------------------------------------------
3159 //-----------------------------------------------------------------------------
3160 //-----------------------------------------------------------------------------
3161 
3162 // a debugging scaffold:
3163 #if 0
3164 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3165 #else
3166 #define _c4dbgfbl(...)
3167 #endif
3168 
3169 template<class EventHandler>
3170 template<class FilterProcessor>
3171 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3172 {
3173  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3174 
3175  size_t contents_len = _handle_all_whitespace(proc, chomp);
3176  if(!contents_len)
3177  return proc.result();
3178 
3179  contents_len = _extend_to_chomp(proc, contents_len);
3180 
3181  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3182 
3183  _filter_block_indentation(proc, indentation);
3184 
3185  // now filter the bulk
3186  while(proc.has_more_chars(/*maxpos*/contents_len))
3187  {
3188  const char curr = proc.curr();
3189  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3190  switch(curr)
3191  {
3192  case '\n':
3193  {
3194  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3195  proc.copy(); // copy the newline
3196  _filter_block_indentation(proc, indentation);
3197  break;
3198  }
3199  case '\r':
3200  proc.skip();
3201  break;
3202  default:
3203  proc.copy();
3204  break;
3205  }
3206  }
3207 
3208  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3209 
3210  _filter_chomp(proc, chomp, indentation);
3211 
3212  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3213 
3214  return proc.result();
3215 }
3216 
3217 #undef _c4dbgfbl
3218 
3219 template<class EventHandler>
3220 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3221 {
3222  FilterProcessorSrcDst proc(scalar, dst);
3223  return _filter_block_literal(proc, indentation, chomp);
3224 }
3225 
3226 template<class EventHandler>
3227 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3228 {
3229  FilterProcessorInplaceEndExtending proc(scalar, cap);
3230  return _filter_block_literal(proc, indentation, chomp);
3231 }
3232 
3233 
3234 //-----------------------------------------------------------------------------
3235 //-----------------------------------------------------------------------------
3236 //-----------------------------------------------------------------------------
3237 
3238 // a debugging scaffold:
3239 #if 0
3240 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3241 #else
3242 #define _c4dbgfbf(...)
3243 #endif
3244 
3245 
3246 template<class EventHandler>
3247 template<class FilterProcessor>
3248 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3249 {
3250  _filter_block_indentation(proc, indentation);
3251  while(proc.has_more_chars(len))
3252  {
3253  const char curr = proc.curr();
3254  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3255  switch(curr)
3256  {
3257  case '\n':
3258  _c4dbgfbf("newline.", curr);
3259  proc.copy();
3260  _filter_block_indentation(proc, indentation);
3261  break;
3262  case '\r':
3263  proc.skip();
3264  break;
3265  case ' ':
3266  case '\t':
3267  {
3268  size_t first = proc.rem().first_not_of(" \t");
3269  _c4dbgfbf("space. first={}", first);
3270  if(first == npos)
3271  first = proc.rem().len;
3272  _c4dbgfbf("... indentation increased to {}", first);
3273  _filter_block_folded_indented_block(proc, indentation, len, first);
3274  break;
3275  }
3276  default:
3277  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3278  return;
3279  }
3280  }
3281 }
3282 
3283 template<class EventHandler>
3284 template<class FilterProcessor>
3285 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3286 {
3287  switch(num_newl)
3288  {
3289  case 1u:
3290  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3291  wpos_at_first_newl = proc.wpos;
3292  proc.skip();
3293  proc.set(' ');
3294  break;
3295  case 2u:
3296  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3297  _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl != npos);
3298  _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3299  _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3300  proc.skip();
3301  proc.set_at(wpos_at_first_newl, '\n');
3302  _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3303  break;
3304  default:
3305  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3306  proc.copy();
3307  break;
3308  }
3309  return wpos_at_first_newl;
3310 }
3311 
3312 template<class EventHandler>
3313 template<class FilterProcessor>
3314 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3315 {
3316  _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() == '\n');
3317  size_t num_newl = 0;
3318  size_t wpos_at_first_newl = npos;
3319  while(proc.has_more_chars(len))
3320  {
3321  const char curr = proc.curr();
3322  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3323  switch(curr)
3324  {
3325  case '\n':
3326  {
3327  _c4dbgfbf("newline. sofar={}", num_newl);
3328  // NOTE: vs2022-32bit-release builds were giving wrong
3329  // results in this block, if it was written as either
3330  // as a switch(num_newl) or its equivalent if-form.
3331  //
3332  // For this reason, we're using a dedicated function
3333  // (**_compress), which seems to work around the issue.
3334  //
3335  // The manifested problem was that somewhere between the
3336  // assignment to curr and this point, proc.wpos (the
3337  // write-position of the processor) jumped to npos, which
3338  // made the write wrap-around! To make things worse,
3339  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3340  // problem go away!
3341  //
3342  // The only way to make the problem appear with prints
3343  // enabled was by disabling all prints in this function
3344  // (including in the block which was moved to the compress
3345  // function) and then selectively enabling only some of
3346  // those prints.
3347  //
3348  // This may be due to some bug in the cl-x86 optimizer; or
3349  // it may be triggered by some UB which may be
3350  // inadvertedly present in this function or in the filter
3351  // processor. This is despite our best efforts to weed out
3352  // any such UB problem: neither clang-tidy nor none of the
3353  // sanitizers, or gcc's -fanalyzer pointed to any problems
3354  // in this code.
3355  //
3356  // In the end, moving this block to a separate function
3357  // was the only way to bury the problem. But it may
3358  // resurface again, as The Undead, rising to from the
3359  // grave to haunt us with his terrible presence.
3360  //
3361  // We may have to revisit this. With a stake, and lots of
3362  // garlic.
3363  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3364  _filter_block_indentation(proc, indentation);
3365  break;
3366  }
3367  case ' ':
3368  case '\t':
3369  {
3370  size_t first = proc.rem().first_not_of(" \t");
3371  _c4dbgfbf("space. first={}", first);
3372  if(first == npos)
3373  first = proc.rem().len;
3374  _c4dbgfbf("... indentation increased to {}", first);
3375  if(num_newl)
3376  {
3377  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3378  proc.set_at(wpos_at_first_newl, '\n');
3379  }
3380  if(num_newl > 1u)
3381  {
3382  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3383  proc.set('\n');
3384  }
3385  _filter_block_folded_indented_block(proc, indentation, len, first);
3386  num_newl = 0;
3387  wpos_at_first_newl = npos;
3388  break;
3389  }
3390  case '\r':
3391  proc.skip();
3392  break;
3393  default:
3394  _c4dbgfbf("not space, not newline. stop.", 0);
3395  return;
3396  }
3397  }
3398 }
3399 
3400 
3401 template<class EventHandler>
3402 template<class FilterProcessor>
3403 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3404 {
3405  _RYML_ASSERT_BASIC_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3406  if(curr_indentation)
3407  proc.copy(curr_indentation);
3408  while(proc.has_more_chars(len))
3409  {
3410  const char curr = proc.curr();
3411  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3412  switch(curr)
3413  {
3414  case '\n':
3415  {
3416  proc.copy();
3417  _filter_block_indentation(proc, indentation);
3418  csubstr rem = proc.rem();
3419  const size_t first = rem.first_not_of(' ');
3420  _c4dbgfbf("newline. firstns={}", first);
3421  if(first == 0)
3422  {
3423  const char c = rem[first];
3424  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3425  if(c == '\n' || c == '\r')
3426  {
3427  ;
3428  }
3429  else
3430  {
3431  _c4dbgfbf("done with indented block", first);
3432  goto endloop;
3433  }
3434  }
3435  else if(first != npos)
3436  {
3437  proc.copy(first);
3438  _c4dbgfbf("copy all {} spaces", first);
3439  }
3440  break;
3441  }
3442  break;
3443  case '\r':
3444  proc.skip();
3445  break;
3446  default:
3447  proc.copy();
3448  break;
3449  }
3450  }
3451  endloop:
3452  return;
3453 }
3454 
3455 
3456 template<class EventHandler>
3457 template<class FilterProcessor>
3458 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3459 {
3460  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3461 
3462  size_t contents_len = _handle_all_whitespace(proc, chomp);
3463  if(!contents_len)
3464  return proc.result();
3465 
3466  contents_len = _extend_to_chomp(proc, contents_len);
3467 
3468  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3469 
3470  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3471 
3472  // now filter the bulk
3473  while(proc.has_more_chars(/*maxpos*/contents_len))
3474  {
3475  const char curr = proc.curr();
3476  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3477  switch(curr)
3478  {
3479  case '\n':
3480  {
3481  _c4dbgfbf("found newline", curr);
3482  _filter_block_folded_newlines(proc, indentation, contents_len);
3483  break;
3484  }
3485  case '\r':
3486  proc.skip();
3487  break;
3488  default:
3489  proc.copy();
3490  break;
3491  }
3492  }
3493 
3494  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3495 
3496  _filter_chomp(proc, chomp, indentation);
3497 
3498  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3499 
3500  return proc.result();
3501 }
3502 
3503 #undef _c4dbgfbf
3504 
3505 template<class EventHandler>
3506 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3507 {
3508  FilterProcessorSrcDst proc(scalar, dst);
3509  return _filter_block_folded(proc, indentation, chomp);
3510 }
3511 
3512 template<class EventHandler>
3513 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3514 {
3515  FilterProcessorInplaceEndExtending proc(scalar, cap);
3516  return _filter_block_folded(proc, indentation, chomp);
3517 }
3518 
3519 
3520 //-----------------------------------------------------------------------------
3521 //-----------------------------------------------------------------------------
3522 //-----------------------------------------------------------------------------
3523 
3524 template<class EventHandler>
3525 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3526 {
3527  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3528  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3529  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.valid());
3530  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3531  return r.get();
3532 }
3533 
3534 //-----------------------------------------------------------------------------
3535 
3536 template<class EventHandler>
3537 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3538 {
3539  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3540  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3541  _RYML_ASSERT_BASIC_(this->callbacks(), r.valid());
3542  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3543  return r.get();
3544 }
3545 
3546 
3547 //-----------------------------------------------------------------------------
3548 
3549 template<class EventHandler>
3550 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3551 {
3552  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3553  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3554  if(C4_LIKELY(r.valid()))
3555  {
3556  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3557  return r.get();
3558  }
3559  else
3560  {
3561  const size_t len = r.required_len();
3562  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3563  substr dst = m_evt_handler->alloc_arena(len, &s);
3564  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3565  if(dst.str)
3566  {
3567  _RYML_ASSERT_BASIC_(this->callbacks(), dst.len == len);
3568  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3569  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3570  _RYML_ASSERT_BASIC_(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3571  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3572  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3573  return rsd.get();
3574  }
3575  return dst;
3576  }
3577 }
3578 
3579 
3580 //-----------------------------------------------------------------------------
3581 
3582 template<class EventHandler>
3583 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3584 {
3585  if(s.is_sub(m_buf))
3586  {
3587  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str > m_buf.str);
3588  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= m_buf.str);
3589  if(s.len)
3590  memmove(s.str - 1, s.str, s.len);
3591  --s.str;
3592  s.str[s.len] = '\n';
3593  ++s.len;
3594  return s;
3595  }
3596  else
3597  {
3598  substr dst = m_evt_handler->alloc_arena(s.len + 1);
3599  if(s.len)
3600  memcpy(dst.str, s.str, s.len);
3601  dst[s.len] = '\n';
3602  return dst;
3603  }
3604 }
3605 
3606 template<class EventHandler>
3607 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3608 {
3609  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3610  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3611  csubstr result;
3612  if(C4_LIKELY(r.valid()))
3613  {
3614  result = r.get();
3615  }
3616  else
3617  {
3618  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3619  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3620  // this can only happen when adding a single newline in clip mode.
3621  // so we shift left the scalar by one place
3622  result = _move_scalar_left_and_add_newline(s);
3623  }
3624  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", result.len, result);
3625  return result;
3626 }
3627 
3628 
3629 //-----------------------------------------------------------------------------
3630 template<class EventHandler>
3631 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3632 {
3633  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3634  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3635  csubstr result;
3636  if(C4_LIKELY(r.valid()))
3637  {
3638  result = r.get();
3639  }
3640  else
3641  {
3642  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3643  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3644  // this can only happen when adding a single newline in clip mode.
3645  // so we shift left the scalar by one place
3646  result = _move_scalar_left_and_add_newline(s);
3647  }
3648  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", result.len, result);
3649  return result;
3650 }
3651 
3652 
3653 //-----------------------------------------------------------------------------
3654 
3655 template<class EventHandler>
3656 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3657 {
3658  if(sc.needs_filter)
3659  {
3660  if(m_options.scalar_filtering())
3661  {
3662  return _filter_scalar_plain(sc.scalar, indentation);
3663  }
3664  else
3665  {
3666  _c4dbgp("plain scalar left unfiltered");
3667  m_evt_handler->mark_key_scalar_unfiltered();
3668  }
3669  }
3670  else
3671  {
3672  _c4dbgp("plain scalar doesn't need filtering");
3673  }
3674  return sc.scalar;
3675 }
3676 
3677 template<class EventHandler>
3678 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3679 {
3680  if(sc.needs_filter)
3681  {
3682  if(m_options.scalar_filtering())
3683  {
3684  return _filter_scalar_plain(sc.scalar, indentation);
3685  }
3686  else
3687  {
3688  _c4dbgp("plain scalar left unfiltered");
3689  m_evt_handler->mark_val_scalar_unfiltered();
3690  }
3691  }
3692  else
3693  {
3694  _c4dbgp("plain scalar doesn't need filtering");
3695  }
3696  return sc.scalar;
3697 }
3698 
3699 
3700 //-----------------------------------------------------------------------------
3701 
3702 template<class EventHandler>
3703 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3704 {
3705  if(sc.needs_filter)
3706  {
3707  if(m_options.scalar_filtering())
3708  {
3709  return _filter_scalar_squot(sc.scalar);
3710  }
3711  else
3712  {
3713  _c4dbgp("squo key scalar left unfiltered");
3714  m_evt_handler->mark_key_scalar_unfiltered();
3715  }
3716  }
3717  else
3718  {
3719  _c4dbgp("squo key scalar doesn't need filtering");
3720  }
3721  return sc.scalar;
3722 }
3723 
3724 template<class EventHandler>
3725 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3726 {
3727  if(sc.needs_filter)
3728  {
3729  if(m_options.scalar_filtering())
3730  {
3731  return _filter_scalar_squot(sc.scalar);
3732  }
3733  else
3734  {
3735  _c4dbgp("squo val scalar left unfiltered");
3736  m_evt_handler->mark_val_scalar_unfiltered();
3737  }
3738  }
3739  else
3740  {
3741  _c4dbgp("squo val scalar doesn't need filtering");
3742  }
3743  return sc.scalar;
3744 }
3745 
3746 
3747 //-----------------------------------------------------------------------------
3748 
3749 template<class EventHandler>
3750 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3751 {
3752  if(sc.needs_filter)
3753  {
3754  if(m_options.scalar_filtering())
3755  {
3756  return _filter_scalar_dquot(sc.scalar);
3757  }
3758  else
3759  {
3760  _c4dbgp("dquo scalar left unfiltered");
3761  m_evt_handler->mark_key_scalar_unfiltered();
3762  }
3763  }
3764  else
3765  {
3766  _c4dbgp("dquo scalar doesn't need filtering");
3767  }
3768  return sc.scalar;
3769 }
3770 
3771 template<class EventHandler>
3772 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3773 {
3774  if(sc.needs_filter)
3775  {
3776  if(m_options.scalar_filtering())
3777  {
3778  return _filter_scalar_dquot(sc.scalar);
3779  }
3780  else
3781  {
3782  _c4dbgp("dquo scalar left unfiltered");
3783  m_evt_handler->mark_val_scalar_unfiltered();
3784  }
3785  }
3786  else
3787  {
3788  _c4dbgp("dquo scalar doesn't need filtering");
3789  }
3790  return sc.scalar;
3791 }
3792 
3793 
3794 //-----------------------------------------------------------------------------
3795 
3796 template<class EventHandler>
3797 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3798 {
3799  if(m_options.scalar_filtering())
3800  {
3801  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3802  }
3803  else
3804  {
3805  _c4dbgp("literal scalar left unfiltered");
3806  m_evt_handler->mark_key_scalar_unfiltered();
3807  }
3808  return sb.scalar;
3809 }
3810 
3811 template<class EventHandler>
3812 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3813 {
3814  if(m_options.scalar_filtering())
3815  {
3816  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3817  }
3818  else
3819  {
3820  _c4dbgp("literal scalar left unfiltered");
3821  m_evt_handler->mark_val_scalar_unfiltered();
3822  }
3823  return sb.scalar;
3824 }
3825 
3826 
3827 //-----------------------------------------------------------------------------
3828 
3829 template<class EventHandler>
3830 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3831 {
3832  if(m_options.scalar_filtering())
3833  {
3834  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3835  }
3836  else
3837  {
3838  _c4dbgp("folded scalar left unfiltered");
3839  m_evt_handler->mark_key_scalar_unfiltered();
3840  }
3841  return sb.scalar;
3842 }
3843 
3844 template<class EventHandler>
3845 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3846 {
3847  if(m_options.scalar_filtering())
3848  {
3849  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3850  }
3851  else
3852  {
3853  _c4dbgp("folded scalar left unfiltered");
3854  m_evt_handler->mark_val_scalar_unfiltered();
3855  }
3856  return sb.scalar;
3857 }
3858 
3859 
3860 //-----------------------------------------------------------------------------
3861 //-----------------------------------------------------------------------------
3862 //-----------------------------------------------------------------------------
3863 
3864 #ifdef RYML_DBG // !!! <----------------------------------
3865 
3866 template<class EventHandler>
3867 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3868 {
3869  char buf1_[64], buf2_[64], buf3_[64];
3870  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3871  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3872  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3873  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3874  s->flags |= on;
3875 }
3876 
3877 template<class EventHandler>
3878 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3879 {
3880  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3881  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3882  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3883  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3884  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3885  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3886  s->flags |= on;
3887  s->flags &= ~off;
3888 }
3889 
3890 template<class EventHandler>
3891 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3892 {
3893  char buf1_[64], buf2_[64], buf3_[64];
3894  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3895  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3896  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3897  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3898  s->flags &= ~off;
3899 }
3900 
3901 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3902 {
3903  size_t pos = 0;
3904  bool gotone = false;
3905 
3906  #define _prflag(fl) \
3907  if((flags & fl) == (fl)) \
3908  { \
3909  if(gotone) \
3910  { \
3911  if(pos + 1 < buf.len) \
3912  buf[pos] = '|'; \
3913  ++pos; \
3914  } \
3915  csubstr fltxt = #fl; \
3916  if(pos + fltxt.len <= buf.len) \
3917  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3918  pos += fltxt.len; \
3919  gotone = true; \
3920  }
3921 
3922  _prflag(RTOP);
3923  _prflag(RUNK);
3924  _prflag(RMAP);
3925  _prflag(RSEQ);
3926  _prflag(RFLOW);
3927  _prflag(RBLCK);
3928  _prflag(QMRK);
3929  _prflag(RKEY);
3930  _prflag(RVAL);
3931  _prflag(RKCL);
3932  _prflag(RNXT);
3933  _prflag(SSCL);
3934  _prflag(QSCL);
3935  _prflag(RSET);
3936  _prflag(RDOC);
3937  _prflag(NDOC);
3938  _prflag(USTY);
3939  _prflag(RSEQIMAP);
3940 
3941  #undef _prflag
3942 
3943  if(pos == 0)
3944  if(buf.len > 0)
3945  buf[pos++] = '0';
3946 
3947  _RYML_CHECK_BASIC(pos <= buf.len);
3948 
3949  return buf.first(pos);
3950 }
3951 
3952 #endif // RYML_DBG !!! <----------------------------------
3953 
3954 
3955 //-----------------------------------------------------------------------------
3956 //-----------------------------------------------------------------------------
3957 //-----------------------------------------------------------------------------
3958 
3959 template<class EventHandler>
3960 csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
3961 {
3962  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3963  return m_buf.sub(loc.offset);
3964 }
3965 
3966 template<class EventHandler>
3967 Location ParseEngine<EventHandler>::val_location(const char *val) const
3968 {
3969  if(C4_UNLIKELY(val == nullptr))
3970  return {m_file, 0, 0, 0};
3971  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3972  // NOTE: if any of these checks fails, the parser needs to be
3973  // instantiated with locations enabled.
3974  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3975  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3976  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3977  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3978  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
3979  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3980  // NOTE: the pointer needs to belong to the buffer that was used to parse.
3981  csubstr src = m_buf;
3982  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
3983  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
3984  // ok. search the first stored newline after the given ptr
3985  using lineptr_type = size_t const* C4_RESTRICT;
3986  lineptr_type lineptr = nullptr;
3987  size_t offset = (size_t)(val - src.begin());
3988  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
3989  {
3990  // just do a linear search if the size is small.
3991  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3992  {
3993  if(*curr > offset)
3994  {
3995  lineptr = curr;
3996  break;
3997  }
3998  }
3999  }
4000  else
4001  {
4002  // do a bisection search if the size is not small.
4003  //
4004  // We could use std::lower_bound but this is simple enough and
4005  // spares the costly include of <algorithm>.
4006  size_t count = m_newline_offsets_size;
4007  size_t step;
4008  lineptr_type it;
4009  lineptr = m_newline_offsets;
4010  while(count)
4011  {
4012  step = count >> 1;
4013  it = lineptr + step;
4014  if(*it < offset)
4015  {
4016  lineptr = ++it;
4017  count -= step + 1;
4018  }
4019  else
4020  {
4021  count = step;
4022  }
4023  }
4024  }
4025  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4026  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4027  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4028  Location loc;
4029  loc.name = m_file;
4030  loc.offset = offset;
4031  loc.line = (size_t)(lineptr - m_newline_offsets);
4032  if(lineptr > m_newline_offsets)
4033  loc.col = (offset - *(lineptr-1) - 1u);
4034  else
4035  loc.col = offset;
4036  return loc;
4037 }
4038 
4039 template<class EventHandler>
4040 void ParseEngine<EventHandler>::_prepare_locations()
4041 {
4042  m_newline_offsets_buf = m_buf;
4043  size_t numnewlines = 1u + m_buf.count('\n');
4044  _resize_locations(numnewlines);
4045  m_newline_offsets_size = 0;
4046  for(size_t i = 0; i < m_buf.len; i++)
4047  if(m_buf[i] == '\n')
4048  m_newline_offsets[m_newline_offsets_size++] = i;
4049  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4050  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4051 }
4052 
4053 template<class EventHandler>
4054 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4055 {
4056  if(numnewlines > m_newline_offsets_capacity)
4057  {
4058  if(m_newline_offsets)
4059  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4060  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4061  m_newline_offsets_capacity = numnewlines;
4062  }
4063 }
4064 
4065 template<class EventHandler>
4066 bool ParseEngine<EventHandler>::_locations_dirty() const
4067 {
4068  return !m_newline_offsets_size;
4069 }
4070 
4071 
4072 //-----------------------------------------------------------------------------
4073 //-----------------------------------------------------------------------------
4074 //-----------------------------------------------------------------------------
4075 
4076 template<class EventHandler>
4077 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4078 {
4079  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4080  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4081  {
4082  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4083  {
4084  _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4085  _skipchars(" \t");
4086  }
4087  // comments
4088  if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4089  {
4090  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4091  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4092  }
4093  }
4094 }
4095 
4096 
4097 //-----------------------------------------------------------------------------
4098 
4099 
4100 template<class EventHandler>
4101 void ParseEngine<EventHandler>::_handle_colon()
4102 {
4103  size_t curr = m_evt_handler->m_curr->pos.line;
4104  if(m_prev_colon != npos)
4105  {
4106  if(curr == m_prev_colon)
4107  _c4err("two colons on same line");
4108  }
4109  m_prev_colon = curr;
4110 }
4111 
4112 template<class EventHandler>
4113 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4114 {
4115  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4116  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) // NOLINT(bugprone-sizeof-expression)
4117  _c4err("too many annotations");
4118  dst->annotations[dst->num_entries].str = str;
4119  dst->annotations[dst->num_entries].indentation = indentation;
4120  dst->annotations[dst->num_entries].line = line;
4121  ++dst->num_entries;
4122 }
4123 
4124 template<class EventHandler>
4125 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4126 {
4127  dst->num_entries = 0;
4128 }
4129 
4130 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4131 template<class EventHandler>
4132 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4133 {
4134  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4135  {
4136  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4137  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4138  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4139  size_t to_skip = m_evt_handler->m_curr->indref;
4140  if(m_pending_anchors.num_entries)
4141  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4142  if(m_pending_tags.num_entries)
4143  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4144  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4145  _maybe_skipchars_up_to(' ', to_skip);
4146  return true;
4147  }
4148  return false;
4149 }
4150 #endif
4151 
4152 template<class EventHandler>
4153 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4154 {
4155  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4156 }
4157 
4158 template<class EventHandler>
4159 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4160 {
4161  if(!tag.begins_with("!<"))
4162  {
4163  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4164  _c4err("tags must not contain any of '[]{},'");
4165  }
4166  else
4167  {
4168  if(C4_UNLIKELY(!tag.ends_with('>')))
4169  _c4err("malformed tag");
4170  }
4171 }
4172 
4173 template<class EventHandler>
4174 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4175 {
4176  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4177  if(m_pending_tags.num_entries)
4178  {
4179  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4180  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4181  {
4182  _check_tag(m_pending_tags.annotations[0].str);
4183  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4184  _clear_annotations(&m_pending_tags);
4185  }
4186  else
4187  {
4188  _c4err("too many tags");
4189  }
4190  }
4191  if(m_pending_anchors.num_entries)
4192  {
4193  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4194  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4195  {
4196  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4197  _clear_annotations(&m_pending_anchors);
4198  }
4199  else
4200  {
4201  _c4err("too many anchors");
4202  }
4203  }
4204 }
4205 
4206 template<class EventHandler>
4207 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4208 {
4209  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4210  if(m_pending_tags.num_entries)
4211  {
4212  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4213  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4214  {
4215  _check_tag(m_pending_tags.annotations[0].str);
4216  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4217  _clear_annotations(&m_pending_tags);
4218  }
4219  else
4220  {
4221  _c4err("too many tags");
4222  }
4223  }
4224  if(m_pending_anchors.num_entries)
4225  {
4226  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4227  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4228  {
4229  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4230  _clear_annotations(&m_pending_anchors);
4231  }
4232  else
4233  {
4234  _c4err("too many anchors");
4235  }
4236  }
4237 }
4238 
4239 template<class EventHandler>
4240 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4241 {
4242  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4243  if(m_pending_tags.num_entries == 2)
4244  {
4245  _c4dbgp("2 tags, setting entry 0");
4246  _check_tag(m_pending_tags.annotations[0].str);
4247  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4248  }
4249  else if(m_pending_tags.num_entries == 1)
4250  {
4251  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4252  if(m_pending_tags.annotations[0].line < current_line)
4253  {
4254  _c4dbgp("...tag is for the map. setting it.");
4255  _check_tag(m_pending_tags.annotations[0].str);
4256  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4257  _clear_annotations(&m_pending_tags);
4258  }
4259  }
4260  //
4261  if(m_pending_anchors.num_entries == 2)
4262  {
4263  _c4dbgp("2 anchors, setting entry 0");
4264  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4265  }
4266  else if(m_pending_anchors.num_entries == 1)
4267  {
4268  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4269  if(m_pending_anchors.annotations[0].line < current_line)
4270  {
4271  _c4dbgp("...anchor is for the map. setting it.");
4272  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4273  _clear_annotations(&m_pending_anchors);
4274  }
4275  }
4276 }
4277 
4278 template<class EventHandler>
4279 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4280 {
4281  _c4dbgp("annotations_before_start_mapblck_as_key");
4282  if(m_pending_tags.num_entries == 2)
4283  {
4284  _check_tag(m_pending_tags.annotations[0].str);
4285  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4286  }
4287  if(m_pending_anchors.num_entries == 2)
4288  {
4289  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4290  }
4291 }
4292 
4293 template<class EventHandler>
4294 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4295 {
4296  _c4dbgp("annotations_after_start_mapblck");
4297  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4298  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4299  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4300  {
4301  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4302  switch(m_pending_tags.num_entries)
4303  {
4304  case 1u:
4305  _check_tag(m_pending_tags.annotations[0].str);
4306  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4307  _clear_annotations(&m_pending_tags);
4308  break;
4309  case 2u:
4310  _check_tag(m_pending_tags.annotations[1].str);
4311  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4312  _clear_annotations(&m_pending_tags);
4313  break;
4314  }
4315  switch(m_pending_anchors.num_entries)
4316  {
4317  case 1u:
4318  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4319  _clear_annotations(&m_pending_anchors);
4320  break;
4321  case 2u:
4322  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4323  _clear_annotations(&m_pending_anchors);
4324  break;
4325  }
4326  }
4327  _set_indentation(key_indentation);
4328 }
4329 
4330 template<class EventHandler>
4331 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4332 {
4333  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4334  // select the left-most annotation on the max line
4335  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4336  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4337  {
4338  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4339  if(ann.line > curr->line)
4340  curr = &ann;
4341  else if(ann.indentation < curr->indentation)
4342  curr = &ann;
4343  }
4344  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4345  {
4346  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4347  if(ann.line > curr->line)
4348  curr = &ann;
4349  else if(ann.indentation < curr->indentation)
4350  curr = &ann;
4351  }
4352  return curr->line < val_line ? val_indentation : curr->indentation;
4353 }
4354 
4355 template<class EventHandler>
4356 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4357 {
4358  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4359  const size_t pos = rem.find('#');
4360  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4361  if(pos == npos) // no comments
4362  {
4363  m_evt_handler->add_directive(rem);
4364  _line_progressed(rem.len);
4365  }
4366  else
4367  {
4368  csubstr to_comment = rem.first(pos);
4369  csubstr trimmed = to_comment.trimr(" \t");
4370  m_evt_handler->add_directive(trimmed);
4371  _line_progressed(pos);
4372  _skip_comment();
4373  }
4374 }
4375 
4376 template<class EventHandler>
4377 bool ParseEngine<EventHandler>::_handle_bom()
4378 {
4379  const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4380  if(rem.len)
4381  {
4382  const csubstr rest = rem.sub(1);
4383  // https://yaml.org/spec/1.2.2/#52-character-encodings
4384  #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4385  if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4386  {
4387  _c4dbgp("byte order mark: UTF32BE");
4388  _handle_bom(UTF32BE);
4389  _line_progressed(4);
4390  m_bom_len = 4;
4391  return true;
4392  }
4393  else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4394  {
4395  _c4dbgp("byte order mark: UTF32LE");
4396  _handle_bom(UTF32LE);
4397  _line_progressed(4);
4398  m_bom_len = 4;
4399  return true;
4400  }
4401  else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4402  {
4403  _c4dbgp("byte order mark: UTF16BE");
4404  _handle_bom(UTF16BE);
4405  _line_progressed(2);
4406  m_bom_len = 2;
4407  return true;
4408  }
4409  else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4410  {
4411  _c4dbgp("byte order mark: UTF16LE");
4412  _handle_bom(UTF16LE);
4413  _line_progressed(2);
4414  m_bom_len = 2;
4415  return true;
4416  }
4417  else if(rem.begins_with("\xef\xbb\xbf"))
4418  {
4419  _c4dbgp("byte order mark: UTF8");
4420  _handle_bom(UTF8);
4421  _line_progressed(3);
4422  m_bom_len = 3;
4423  return true;
4424  }
4425  #undef _rymlisascii
4426  }
4427  return false;
4428 }
4429 
4430 template<class EventHandler>
4431 void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4432 {
4433  if(m_encoding == NOBOM)
4434  {
4435  if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == m_buf.str))
4436  m_encoding = enc;
4437  else
4438  _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4439  }
4440  else if(enc != m_encoding)
4441  {
4442  _c4err("byte order mark can only be set once");
4443  }
4444 }
4445 
4446 
4447 //-----------------------------------------------------------------------------
4448 
4449 template<class EventHandler>
4450 void ParseEngine<EventHandler>::_handle_seq_json()
4451 {
4452 seqjson_start:
4453  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4454 
4455  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4456  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4457  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
4458  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4459  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4460 
4461  _handle_flow_skip_whitespace();
4462  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4463  if(!rem.len)
4464  goto seqjson_again;
4465 
4466  if(has_any(RVAL))
4467  {
4468  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4469  const char first = rem.str[0];
4470  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4471  switch(first)
4472  {
4473  case '"':
4474  {
4475  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4476  ScannedScalar sc = _scan_scalar_dquot();
4477  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4478  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4479  addrem_flags(RNXT, RVAL);
4480  break;
4481  }
4482  case '[':
4483  {
4484  _c4dbgp("seqjson[RVAL]: start child seqjson");
4485  addrem_flags(RNXT, RVAL);
4486  m_evt_handler->begin_seq_val_flow();
4487  addrem_flags(RVAL, RNXT);
4488  _line_progressed(1);
4489  break;
4490  }
4491  case '{':
4492  {
4493  _c4dbgp("seqjson[RVAL]: start child mapjson");
4494  addrem_flags(RNXT, RVAL);
4495  m_evt_handler->begin_map_val_flow();
4496  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4497  _line_progressed(1);
4498  goto seqjson_finish;
4499  }
4500  case ']': // this happens on a trailing comma like ", ]"
4501  {
4502  _c4dbgp("seqjson[RVAL]: end!");
4503  rem_flags(RSEQ);
4504  _end_seq_flow();
4505  _line_progressed(1);
4506  if(!has_all(RSEQ|RFLOW))
4507  goto seqjson_finish;
4508  break;
4509  }
4510  default:
4511  {
4512  ScannedScalar sc;
4513  if(_scan_scalar_seq_json(&sc))
4514  {
4515  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4516  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4517  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4518  addrem_flags(RNXT, RVAL);
4519  }
4520  else
4521  {
4522  _c4err("parse error");
4523  }
4524  }
4525  }
4526  }
4527  else // RNXT
4528  {
4529  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4530  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4531  const char first = rem.str[0];
4532  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4533  switch(first)
4534  {
4535  case ',':
4536  {
4537  _c4dbgp("seqjson[RNXT]: expect next val");
4538  addrem_flags(RVAL, RNXT);
4539  m_evt_handler->add_sibling();
4540  _line_progressed(1);
4541  break;
4542  }
4543  case ']':
4544  {
4545  _c4dbgp("seqjson[RNXT]: end!");
4546  _end_seq_flow();
4547  _line_progressed(1);
4548  goto seqjson_finish;
4549  }
4550  default:
4551  _c4err("parse error");
4552  }
4553  }
4554 
4555  seqjson_again:
4556  _c4dbgt("seqjson: go again", 0);
4557  if(_finished_line())
4558  {
4559  if(C4_LIKELY(!_finished_file()))
4560  {
4561  _line_ended();
4562  _scan_line();
4563  _c4dbgnextline();
4564  }
4565  else
4566  {
4567  _c4err("missing terminating ]");
4568  }
4569  }
4570  goto seqjson_start;
4571 
4572  seqjson_finish:
4573  _c4dbgp("seqjson: finish");
4574 }
4575 
4576 
4577 //-----------------------------------------------------------------------------
4578 
4579 template<class EventHandler>
4580 void ParseEngine<EventHandler>::_handle_map_json()
4581 {
4582 mapjson_start:
4583  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4584 
4585  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4586  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
4587  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4588  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4589  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4590 
4591  _handle_flow_skip_whitespace();
4592  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4593  if(!rem.len)
4594  goto mapjson_again;
4595 
4596  if(has_any(RKEY))
4597  {
4598  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4599  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4600  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4601  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4602  const char first = rem.str[0];
4603  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4604  switch(first)
4605  {
4606  case '"':
4607  {
4608  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4609  ScannedScalar sc = _scan_scalar_dquot();
4610  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4611  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4612  addrem_flags(RKCL, RKEY);
4613  break;
4614  }
4615  case '}': // this happens on a trailing comma like ", }"
4616  {
4617  _c4dbgp("mapjson[RKEY]: end!");
4618  _end_map_flow();
4619  _line_progressed(1);
4620  goto mapjson_finish;
4621  }
4622  default:
4623  _c4err("parse error");
4624  }
4625  }
4626  else if(has_any(RVAL))
4627  {
4628  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4629  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4630  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4631  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4632  const char first = rem.str[0];
4633  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4634  switch(first)
4635  {
4636  case '"':
4637  {
4638  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4639  ScannedScalar sc = _scan_scalar_dquot();
4640  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4641  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4642  addrem_flags(RNXT, RVAL);
4643  break;
4644  }
4645  case '[':
4646  {
4647  _c4dbgp("mapjson[RVAL]: start val seqjson");
4648  addrem_flags(RNXT, RVAL);
4649  m_evt_handler->begin_seq_val_flow();
4650  _set_indentation(m_evt_handler->m_parent->indref);
4651  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4652  _line_progressed(1);
4653  goto mapjson_finish;
4654  }
4655  case '{':
4656  {
4657  _c4dbgp("mapjson[RVAL]: start val mapjson");
4658  addrem_flags(RNXT, RVAL);
4659  m_evt_handler->begin_map_val_flow();
4660  _set_indentation(m_evt_handler->m_parent->indref);
4661  addrem_flags(RKEY, RNXT);
4662  _line_progressed(1);
4663  // keep going in this function
4664  break;
4665  }
4666  default:
4667  {
4668  ScannedScalar sc;
4669  if(_scan_scalar_map_json(&sc))
4670  {
4671  _c4dbgp("mapjson[RVAL]: plain scalar.");
4672  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4673  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4674  addrem_flags(RNXT, RVAL);
4675  }
4676  else
4677  {
4678  _c4err("parse error");
4679  }
4680  break;
4681  }
4682  }
4683  }
4684  else if(has_any(RKCL)) // read the key colon
4685  {
4686  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4687  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4688  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4689  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4690  const char first = rem.str[0];
4691  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4692  if(first == ':')
4693  {
4694  _c4dbgp("mapjson[RKCL]: found the colon");
4695  addrem_flags(RVAL, RKCL);
4696  _line_progressed(1);
4697  }
4698  else
4699  {
4700  _c4err("parse error");
4701  }
4702  }
4703  else if(has_any(RNXT))
4704  {
4705  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4706  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4707  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4708  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4709  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4710  if(rem.begins_with(','))
4711  {
4712  _c4dbgp("mapjson[RNXT]: expect next keyval");
4713  m_evt_handler->add_sibling();
4714  addrem_flags(RKEY, RNXT);
4715  _line_progressed(1);
4716  }
4717  else if(rem.begins_with('}'))
4718  {
4719  _c4dbgp("mapjson[RNXT]: end!");
4720  _end_map_flow();
4721  _line_progressed(1);
4722  goto mapjson_finish;
4723  }
4724  else
4725  {
4726  _c4err("parse error");
4727  }
4728  }
4729 
4730  mapjson_again:
4731  _c4dbgt("mapjson: go again", 0);
4732  if(_finished_line())
4733  {
4734  if(C4_LIKELY(!_finished_file()))
4735  {
4736  _line_ended();
4737  _scan_line();
4738  _c4dbgnextline();
4739  }
4740  else
4741  {
4742  _c4err("missing terminating }");
4743  }
4744  }
4745  goto mapjson_start;
4746 
4747  mapjson_finish:
4748  _c4dbgp("mapjson: finish");
4749 }
4750 
4751 
4752 //-----------------------------------------------------------------------------
4753 
4754 template<class EventHandler>
4755 void ParseEngine<EventHandler>::_handle_seq_imap()
4756 {
4757 seqimap_start:
4758  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4759 
4760  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4761  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4762  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4763  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4764  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4765 
4766  _handle_flow_skip_whitespace();
4767  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4768  if(!rem.len)
4769  goto seqimap_again;
4770 
4771  if(has_any(RVAL))
4772  {
4773  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4774  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4775  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4776  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4777  const char first = rem.str[0];
4778  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4779  ScannedScalar sc;
4780  if(first == '\'')
4781  {
4782  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4783  sc = _scan_scalar_squot();
4784  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4785  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4786  _end_map_flow();
4787  goto seqimap_finish;
4788  }
4789  else if(first == '"')
4790  {
4791  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4792  sc = _scan_scalar_dquot();
4793  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4794  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4795  _end_map_flow();
4796  goto seqimap_finish;
4797  }
4798  // block scalars (ie | and >) cannot appear in flow containers
4799  else if(_scan_scalar_plain_map_flow(&sc))
4800  {
4801  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4802  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4803  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4804  _end_map_flow();
4805  goto seqimap_finish;
4806  }
4807  else if(first == '[')
4808  {
4809  _c4dbgp("seqimap[RVAL]: start child seqflow");
4810  addrem_flags(RNXT, RVAL);
4811  m_evt_handler->begin_seq_val_flow();
4812  addrem_flags(RVAL, RNXT|RSEQIMAP);
4813  _set_indentation(m_evt_handler->m_parent->indref);
4814  _line_progressed(1);
4815  goto seqimap_finish;
4816  }
4817  else if(first == '{')
4818  {
4819  _c4dbgp("seqimap[RVAL]: start child mapflow");
4820  addrem_flags(RNXT, RVAL);
4821  m_evt_handler->begin_map_val_flow();
4822  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4823  _set_indentation(m_evt_handler->m_parent->indref);
4824  _line_progressed(1);
4825  goto seqimap_finish;
4826  }
4827  else if(first == ',' || first == ']')
4828  {
4829  _c4dbgp("seqimap[RVAL]: finish without val.");
4830  m_evt_handler->set_val_scalar_plain_empty();
4831  _end_map_flow();
4832  goto seqimap_finish;
4833  }
4834  else if(first == '&')
4835  {
4836  csubstr anchor = _scan_anchor();
4837  _c4dbgp("seqimap[RVAL]: anchor!");
4838  m_evt_handler->set_val_anchor(anchor);
4839  }
4840  else if(first == '*')
4841  {
4842  csubstr ref = _scan_ref_seq();
4843  _c4dbgp("seqimap[RVAL]: ref!");
4844  m_evt_handler->set_val_ref(ref);
4845  addrem_flags(RNXT, RVAL);
4846  }
4847  else
4848  {
4849  _c4err("parse error");
4850  }
4851  }
4852  else if(has_any(RNXT))
4853  {
4854  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4855  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4856  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4857  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4858  const char first = rem.str[0];
4859  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4860  if(first == ',' || first == ']')
4861  {
4862  // we may get here because a map or a seq started and we
4863  // return later
4864  _c4dbgp("seqimap: done");
4865  _end_map_flow();
4866  goto seqimap_finish;
4867  }
4868  else
4869  {
4870  _c4err("parse error");
4871  }
4872  }
4873  else if(has_any(QMRK))
4874  {
4875  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4876  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4877  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4878  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4879  const char first = rem.str[0];
4880  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4881  ScannedScalar sc;
4882  if(first == '\'')
4883  {
4884  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4885  sc = _scan_scalar_squot();
4886  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4887  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4888  addrem_flags(RKCL, QMRK);
4889  goto seqimap_again;
4890  }
4891  else if(first == '"')
4892  {
4893  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4894  sc = _scan_scalar_dquot();
4895  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4896  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4897  addrem_flags(RKCL, QMRK);
4898  goto seqimap_again;
4899  }
4900  // block scalars (ie | and >) cannot appear in flow containers
4901  else if(_scan_scalar_plain_map_flow(&sc))
4902  {
4903  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4904  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4905  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4906  addrem_flags(RKCL, QMRK);
4907  goto seqimap_again;
4908  }
4909  else if(first == '[')
4910  {
4911  _c4dbgp("seqimap[QMRK]: start child seqflow");
4912  addrem_flags(RKCL, QMRK);
4913  m_evt_handler->begin_seq_key_flow();
4914  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4915  _set_indentation(m_evt_handler->m_parent->indref);
4916  _line_progressed(1);
4917  goto seqimap_finish;
4918  }
4919  else if(first == '{')
4920  {
4921  _c4dbgp("seqimap[QMRK]: start child mapflow");
4922  addrem_flags(RKCL, QMRK);
4923  m_evt_handler->begin_map_key_flow();
4924  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4925  _set_indentation(m_evt_handler->m_parent->indref);
4926  _line_progressed(1);
4927  goto seqimap_finish;
4928  }
4929  else if(first == ',' || first == ']')
4930  {
4931  _c4dbgp("seqimap[QMRK]: finish without key.");
4932  m_evt_handler->set_key_scalar_plain_empty();
4933  m_evt_handler->set_val_scalar_plain_empty();
4934  _end_map_flow();
4935  goto seqimap_finish;
4936  }
4937  else if(first == '&')
4938  {
4939  csubstr anchor = _scan_anchor();
4940  _c4dbgp("seqimap[QMRK]: anchor!");
4941  m_evt_handler->set_key_anchor(anchor);
4942  }
4943  else if(first == '*')
4944  {
4945  csubstr ref = _scan_ref_seq();
4946  _c4dbgp("seqimap[QMRK]: ref!");
4947  m_evt_handler->set_key_ref(ref);
4948  addrem_flags(RKCL, QMRK);
4949  }
4950  else
4951  {
4952  _c4err("parse error");
4953  }
4954  }
4955  else if(has_any(RKCL))
4956  {
4957  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4958  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4959  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4960  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4961  const char first = rem.str[0];
4962  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4963  if(first == ':')
4964  {
4965  _c4dbgp("seqimap[RKCL]: found ':'");
4966  addrem_flags(RVAL, RKCL);
4967  _line_progressed(1);
4968  goto seqimap_again;
4969  }
4970  else if(first == ',' || first == ']')
4971  {
4972  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
4973  m_evt_handler->set_val_scalar_plain_empty();
4974  _end_map_flow();
4975  goto seqimap_finish;
4976  }
4977  else
4978  {
4979  _c4err("parse error");
4980  }
4981  }
4982 
4983  seqimap_again:
4984  _c4dbgt("seqimap: go again", 0);
4985  if(_finished_line())
4986  {
4987  if(C4_LIKELY(!_finished_file()))
4988  {
4989  _line_ended();
4990  _scan_line();
4991  _c4dbgnextline();
4992  }
4993  else
4994  {
4995  _c4err("parse error");
4996  }
4997  }
4998  goto seqimap_start;
4999 
5000  seqimap_finish:
5001  _c4dbgp("seqimap: finish");
5002 }
5003 
5004 
5005 //-----------------------------------------------------------------------------
5006 
5007 template<class EventHandler>
5008 void ParseEngine<EventHandler>::_handle_seq_flow()
5009 {
5010 seqflow_start:
5011  _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5012 
5013  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5014  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5015  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
5016  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5017  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
5018  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
5019 
5020  _handle_flow_skip_whitespace();
5021  // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
5022  if(!m_evt_handler->m_curr->line_contents.rem.len)
5023  goto seqflow_again;
5024 
5025  if(has_any(RVAL))
5026  {
5027  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5028  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5029  ScannedScalar sc;
5030  if(first == '\'')
5031  {
5032  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5033  sc = _scan_scalar_squot();
5034  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5035  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5036  addrem_flags(RNXT, RVAL);
5037  }
5038  else if(first == '"')
5039  {
5040  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5041  sc = _scan_scalar_dquot();
5042  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5043  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5044  addrem_flags(RNXT, RVAL);
5045  }
5046  // block scalars (ie | and >) cannot appear in flow containers
5047  else if(_scan_scalar_plain_seq_flow(&sc))
5048  {
5049  _c4dbgp("seqflow[RVAL]: it's a scalar.");
5050  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5051  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5052  addrem_flags(RNXT, RVAL);
5053  }
5054  else if(first == '[')
5055  {
5056  _c4dbgp("seqflow[RVAL]: start child seqflow");
5057  addrem_flags(RNXT, RVAL);
5058  m_evt_handler->begin_seq_val_flow();
5059  _set_indentation(m_evt_handler->m_parent->indref);
5060  addrem_flags(RVAL, RNXT);
5061  _line_progressed(1);
5062  }
5063  else if(first == '{')
5064  {
5065  _c4dbgp("seqflow[RVAL]: start child mapflow");
5066  addrem_flags(RNXT, RVAL);
5067  m_evt_handler->begin_map_val_flow();
5068  _set_indentation(m_evt_handler->m_parent->indref);
5069  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5070  _line_progressed(1);
5071  goto seqflow_finish;
5072  }
5073  else if(first == ']') // this happens on a trailing comma like ", ]"
5074  {
5075  _c4dbgp("seqflow[RVAL]: end!");
5076  _line_progressed(1);
5077  _end_seq_flow();
5078  goto seqflow_finish;
5079  }
5080  else if(first == '*')
5081  {
5082  csubstr ref = _scan_ref_seq();
5083  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5084  m_evt_handler->set_val_ref(ref);
5085  addrem_flags(RNXT, RVAL);
5086  }
5087  else if(first == '&')
5088  {
5089  csubstr anchor = _scan_anchor();
5090  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5091  m_evt_handler->set_val_anchor(anchor);
5092  if(_maybe_scan_following_comma())
5093  {
5094  _c4dbgp("seqflow[RVAL]: empty scalar!");
5095  m_evt_handler->set_val_scalar_plain_empty();
5096  m_evt_handler->add_sibling();
5097  }
5098  }
5099  else if(first == '!')
5100  {
5101  csubstr tag = _scan_tag();
5102  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5103  _check_tag(tag);
5104  m_evt_handler->set_val_tag(tag);
5105  if(_maybe_scan_following_comma())
5106  {
5107  _c4dbgp("seqflow[RVAL]: empty scalar!");
5108  m_evt_handler->set_val_scalar_plain_empty();
5109  m_evt_handler->add_sibling();
5110  }
5111  }
5112  else if(first == ':')
5113  {
5114  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5115  addrem_flags(RNXT, RVAL);
5116  m_evt_handler->begin_map_val_flow();
5117  _set_indentation(m_evt_handler->m_parent->indref);
5118  m_evt_handler->set_key_scalar_plain_empty();
5119  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5120  _line_progressed(1);
5121  goto seqflow_finish;
5122  }
5123  else if(first == '?')
5124  {
5125  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5126  addrem_flags(RNXT, RVAL);
5127  m_was_inside_qmrk = true;
5128  m_evt_handler->begin_map_val_flow();
5129  _set_indentation(m_evt_handler->m_parent->indref);
5130  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5131  _line_progressed(1);
5132  _maybe_skip_whitespace_tokens();
5133  goto seqflow_finish;
5134  }
5135  else
5136  {
5137  _c4err("parse error");
5138  }
5139  }
5140  else // RNXT
5141  {
5142  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5143  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5144  const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5145  if(first == ',')
5146  {
5147  _c4dbgp("seqflow[RNXT]: expect next val");
5148  addrem_flags(RVAL, RNXT);
5149  m_evt_handler->add_sibling();
5150  _line_progressed(1);
5151  }
5152  else if(first == ']')
5153  {
5154  _c4dbgp("seqflow[RNXT]: end!");
5155  _end_seq_flow();
5156  _line_progressed(1);
5157  goto seqflow_finish;
5158  }
5159  else if(first == ':')
5160  {
5161  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5162  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5163  _set_indentation(m_evt_handler->m_parent->indref);
5164  _line_progressed(1);
5165  addrem_flags(RSEQIMAP|RVAL, RNXT);
5166  goto seqflow_finish;
5167  }
5168  else
5169  {
5170  _c4err("parse error");
5171  }
5172  }
5173 
5174  seqflow_again:
5175  _c4dbgt("seqflow: go again", 0);
5176  if(_finished_line())
5177  {
5178  if(C4_LIKELY(!_finished_file()))
5179  {
5180  _line_ended();
5181  _scan_line();
5182  _c4dbgnextline();
5183  }
5184  else
5185  {
5186  _c4err("missing terminating ]");
5187  }
5188  }
5189  goto seqflow_start;
5190 
5191  seqflow_finish:
5192  _c4dbgp("seqflow: finish");
5193 }
5194 
5195 
5196 //-----------------------------------------------------------------------------
5197 
5198 template<class EventHandler>
5199 void ParseEngine<EventHandler>::_handle_map_flow()
5200 {
5201 mapflow_start:
5202  _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5203 
5204  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5205  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW));
5206  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5207  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5208 
5209  _handle_flow_skip_whitespace();
5210  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5211  if(!rem.len)
5212  goto mapflow_again;
5213 
5214  if(has_any(RKEY))
5215  {
5216  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5217  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5218  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5219  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5220  const char first = rem.str[0];
5221  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5222  ScannedScalar sc;
5223  if(first == '\'')
5224  {
5225  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5226  sc = _scan_scalar_squot();
5227  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5228  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5229  addrem_flags(RKCL, RKEY|QMRK);
5230  }
5231  else if(first == '"')
5232  {
5233  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5234  sc = _scan_scalar_dquot();
5235  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5236  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5237  addrem_flags(RKCL, RKEY|QMRK);
5238  }
5239  // block scalars (ie | and >) cannot appear in flow containers
5240  else if(_scan_scalar_plain_map_flow(&sc))
5241  {
5242  _c4dbgp("mapflow[RKEY]: plain scalar");
5243  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5244  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5245  addrem_flags(RKCL, RKEY|QMRK);
5246  }
5247  else if(first == '?')
5248  {
5249  _c4dbgp("mapflow[RKEY]: explicit key");
5250  _line_progressed(1);
5251  addrem_flags(QMRK, RKEY);
5252  _maybe_skip_whitespace_tokens();
5253  }
5254  else if(first == ':')
5255  {
5256  _c4dbgp("mapflow[RKEY]: setting empty key");
5257  m_evt_handler->set_key_scalar_plain_empty();
5258  addrem_flags(RVAL, RKEY|QMRK);
5259  _line_progressed(1);
5260  _maybe_skip_whitespace_tokens();
5261  }
5262  else if(first == ',')
5263  {
5264  _c4dbgp("mapflow[RKEY]: empty key+val!");
5265  m_evt_handler->set_key_scalar_plain_empty();
5266  m_evt_handler->set_val_scalar_plain_empty();
5267  addrem_flags(RNXT, RKEY|QMRK);
5268  // keep going in this function
5269  }
5270  else if(first == '}') // this happens on a trailing comma like ", }"
5271  {
5272  _c4dbgp("mapflow[RKEY]: end!");
5273  _end_map_flow();
5274  _line_progressed(1);
5275  goto mapflow_finish;
5276  }
5277  else if(first == '&')
5278  {
5279  csubstr anchor = _scan_anchor();
5280  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5281  m_evt_handler->set_key_anchor(anchor);
5282  }
5283  else if(first == '*')
5284  {
5285  csubstr ref = _scan_ref_map();
5286  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5287  m_evt_handler->set_key_ref(ref);
5288  addrem_flags(RKCL, RKEY);
5289  }
5290  else if(first == '[')
5291  {
5292  // RYML's tree cannot store container keys, but that's
5293  // handled inside the tree event handler. Other handler
5294  // types may be able to handle it.
5295  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5296  addrem_flags(RKCL, RKEY);
5297  m_evt_handler->begin_seq_key_flow();
5298  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5299  _set_indentation(m_evt_handler->m_parent->indref);
5300  _line_progressed(1);
5301  goto mapflow_finish;
5302  }
5303  else if(first == '{')
5304  {
5305  // RYML's tree cannot store container keys, but that's
5306  // handled inside the tree event handler. Other handler
5307  // types may be able to handle it.
5308  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5309  addrem_flags(RKCL, RKEY);
5310  m_evt_handler->begin_map_key_flow();
5311  addrem_flags(RKEY, RVAL|RKCL);
5312  _set_indentation(m_evt_handler->m_parent->indref);
5313  _line_progressed(1);
5314  // keep going in this function
5315  }
5316  else if(first == '!')
5317  {
5318  csubstr tag = _scan_tag();
5319  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5320  _check_tag(tag);
5321  m_evt_handler->set_key_tag(tag);
5322  }
5323  else
5324  {
5325  _c4err("parse error");
5326  }
5327  }
5328  else if(has_any(RKCL)) // read the key colon
5329  {
5330  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5331  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5332  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5333  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5334  const char first = rem.str[0];
5335  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5336  if(first == ':')
5337  {
5338  _c4dbgp("mapflow[RKCL]: found the colon");
5339  addrem_flags(RVAL, RKCL);
5340  _line_progressed(1);
5341  }
5342  else if(first == '}')
5343  {
5344  _c4dbgp("mapflow[RKCL]: end with missing val!");
5345  addrem_flags(RVAL, RKCL);
5346  m_evt_handler->set_val_scalar_plain_empty();
5347  _end_map_flow();
5348  _line_progressed(1);
5349  goto mapflow_finish;
5350  }
5351  else if(first == ',')
5352  {
5353  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5354  m_evt_handler->set_val_scalar_plain_empty();
5355  m_evt_handler->add_sibling();
5356  addrem_flags(RKEY, RKCL);
5357  _line_progressed(1);
5358  }
5359  else
5360  {
5361  _c4err("parse error");
5362  }
5363  }
5364  else if(has_any(RVAL))
5365  {
5366  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5367  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5368  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5369  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5370  const char first = rem.str[0];
5371  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5372  ScannedScalar sc;
5373  if(first == '\'')
5374  {
5375  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5376  sc = _scan_scalar_squot();
5377  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5378  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5379  addrem_flags(RNXT, RVAL);
5380  }
5381  else if(first == '"')
5382  {
5383  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5384  sc = _scan_scalar_dquot();
5385  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5386  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5387  addrem_flags(RNXT, RVAL);
5388  }
5389  // block scalars (ie | and >) cannot appear in flow containers
5390  else if(_scan_scalar_plain_map_flow(&sc))
5391  {
5392  _c4dbgp("mapflow[RVAL]: plain scalar.");
5393  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5394  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5395  addrem_flags(RNXT, RVAL);
5396  }
5397  else if(first == '[')
5398  {
5399  _c4dbgp("mapflow[RVAL]: start val seqflow");
5400  addrem_flags(RNXT, RVAL);
5401  m_evt_handler->begin_seq_val_flow();
5402  _set_indentation(m_evt_handler->m_parent->indref);
5403  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5404  _line_progressed(1);
5405  goto mapflow_finish;
5406  }
5407  else if(first == '{')
5408  {
5409  _c4dbgp("mapflow[RVAL]: start val mapflow");
5410  addrem_flags(RNXT, RVAL);
5411  m_evt_handler->begin_map_val_flow();
5412  _set_indentation(m_evt_handler->m_parent->indref);
5413  addrem_flags(RKEY, RNXT);
5414  _line_progressed(1);
5415  // keep going in this function
5416  }
5417  else if(first == '}')
5418  {
5419  _c4dbgp("mapflow[RVAL]: end!");
5420  m_evt_handler->set_val_scalar_plain_empty();
5421  _end_map_flow();
5422  _line_progressed(1);
5423  goto mapflow_finish;
5424  }
5425  else if(first == ',')
5426  {
5427  _c4dbgp("mapflow[RVAL]: empty val!");
5428  m_evt_handler->set_val_scalar_plain_empty();
5429  addrem_flags(RNXT, RVAL);
5430  // keep going in this function
5431  }
5432  else if(first == '*')
5433  {
5434  csubstr ref = _scan_ref_map();
5435  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5436  m_evt_handler->set_val_ref(ref);
5437  addrem_flags(RNXT, RVAL);
5438  }
5439  else if(first == '&')
5440  {
5441  csubstr anchor = _scan_anchor();
5442  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5443  m_evt_handler->set_val_anchor(anchor);
5444  }
5445  else if(first == '!')
5446  {
5447  csubstr tag = _scan_tag();
5448  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5449  _check_tag(tag);
5450  m_evt_handler->set_val_tag(tag);
5451  }
5452  else
5453  {
5454  _c4err("parse error");
5455  }
5456  }
5457  else if(has_any(RNXT))
5458  {
5459  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5460  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5461  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5462  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5463  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5464  if(rem.begins_with(','))
5465  {
5466  _c4dbgp("mapflow[RNXT]: expect next keyval");
5467  m_evt_handler->add_sibling();
5468  addrem_flags(RKEY, RNXT);
5469  _line_progressed(1);
5470  }
5471  else if(rem.begins_with('}'))
5472  {
5473  _c4dbgp("mapflow[RNXT]: end!");
5474  _end_map_flow();
5475  _line_progressed(1);
5476  goto mapflow_finish;
5477  }
5478  else
5479  {
5480  _c4err("parse error");
5481  }
5482  }
5483  else if(has_any(QMRK))
5484  {
5485  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5486  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5487  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5488  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5489  const char first = rem.str[0];
5490  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5491  ScannedScalar sc;
5492  if(first == '\'')
5493  {
5494  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5495  sc = _scan_scalar_squot();
5496  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5497  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5498  addrem_flags(RKCL, QMRK);
5499  }
5500  else if(first == '"')
5501  {
5502  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5503  sc = _scan_scalar_dquot();
5504  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5505  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5506  addrem_flags(RKCL, QMRK);
5507  }
5508  // block scalars (ie | and >) cannot appear in flow containers
5509  else if(_scan_scalar_plain_map_flow(&sc))
5510  {
5511  _c4dbgp("mapflow[QMRK]: plain scalar");
5512  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5513  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5514  addrem_flags(RKCL, QMRK);
5515  }
5516  else if(first == ':')
5517  {
5518  _c4dbgp("mapflow[QMRK]: setting empty key");
5519  m_evt_handler->set_key_scalar_plain_empty();
5520  addrem_flags(RVAL, QMRK);
5521  _line_progressed(1);
5522  _maybe_skip_whitespace_tokens();
5523  }
5524  else if(first == '}') // this happens on a trailing comma like ", }"
5525  {
5526  _c4dbgp("mapflow[QMRK]: end!");
5527  m_evt_handler->set_key_scalar_plain_empty();
5528  m_evt_handler->set_val_scalar_plain_empty();
5529  _end_map_flow();
5530  _line_progressed(1);
5531  goto mapflow_finish;
5532  }
5533  else if(first == ',')
5534  {
5535  _c4dbgp("mapflow[QMRK]: empty key+val!");
5536  m_evt_handler->set_key_scalar_plain_empty();
5537  m_evt_handler->set_val_scalar_plain_empty();
5538  addrem_flags(RNXT, QMRK);
5539  }
5540  else if(first == '&')
5541  {
5542  csubstr anchor = _scan_anchor();
5543  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5544  m_evt_handler->set_key_anchor(anchor);
5545  }
5546  else if(first == '*')
5547  {
5548  csubstr ref = _scan_ref_map();
5549  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5550  m_evt_handler->set_key_ref(ref);
5551  addrem_flags(RKCL, QMRK);
5552  }
5553  else if(first == '[')
5554  {
5555  // RYML's tree cannot store container keys, but that's
5556  // handled inside the tree sink. Other sink types may be
5557  // able to handle it.
5558  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5559  addrem_flags(RKCL, QMRK);
5560  m_evt_handler->begin_seq_key_flow();
5561  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5562  _set_indentation(m_evt_handler->m_parent->indref);
5563  _line_progressed(1);
5564  goto mapflow_finish;
5565  }
5566  else if(first == '{')
5567  {
5568  // RYML's tree cannot store container keys, but that's
5569  // handled inside the tree sink. Other sink types may be
5570  // able to handle it.
5571  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5572  addrem_flags(RKCL, QMRK);
5573  m_evt_handler->begin_map_key_flow();
5574  _set_indentation(m_evt_handler->m_parent->indref);
5575  addrem_flags(RKEY, RKCL);
5576  _line_progressed(1);
5577  // keep going in this function
5578  }
5579  else if(first == '!')
5580  {
5581  csubstr tag = _scan_tag();
5582  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5583  _check_tag(tag);
5584  m_evt_handler->set_key_tag(tag);
5585  }
5586  else
5587  {
5588  _c4err("parse error");
5589  }
5590  }
5591 
5592  mapflow_again:
5593  _c4dbgt("mapflow: go again", 0);
5594  if(_finished_line())
5595  {
5596  if(C4_LIKELY(!_finished_file()))
5597  {
5598  _line_ended();
5599  _scan_line();
5600  _c4dbgnextline();
5601  }
5602  else
5603  {
5604  _c4err("missing terminating }");
5605  }
5606  }
5607  goto mapflow_start;
5608 
5609  mapflow_finish:
5610  _c4dbgp("mapflow: finish");
5611 }
5612 
5613 
5614 //-----------------------------------------------------------------------------
5615 
5616 template<class EventHandler>
5617 void ParseEngine<EventHandler>::_handle_seq_block()
5618 {
5619 seqblck_start:
5620  _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5621 
5622  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5623  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK));
5624  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5625  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5626 
5627  _maybe_skip_comment();
5628  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5629  if(!rem.len)
5630  goto seqblck_again;
5631 
5632  if(has_any(RVAL))
5633  {
5634  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5635  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5636  if(m_evt_handler->m_curr->at_line_beginning())
5637  {
5638  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5639  if(m_evt_handler->m_curr->indentation_ge())
5640  {
5641  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5642  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5643  rem = m_evt_handler->m_curr->line_contents.rem;
5644  if(!rem.len)
5645  goto seqblck_again;
5646  }
5647  else if(m_evt_handler->m_curr->indentation_lt())
5648  {
5649  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5650  _handle_indentation_pop_from_block_seq();
5651  goto seqblck_finish;
5652  }
5653  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5654  {
5655  _c4dbgp("seqblck[RVAL]: empty line!");
5656  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5657  goto seqblck_again;
5658  }
5659  }
5660  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5661  else
5662  {
5663  // accomodate annotation on the previous line. eg:
5664  // - &elm
5665  // foo # <-- on this line
5666  // - &elm
5667  // &foo foo: bar # <-- on this line
5668  if(rem.str[0] == ' ')
5669  {
5670  if(_handle_indentation_from_annotations())
5671  {
5672  _c4dbgp("seqblck[RVAL]: annotations!");
5673  rem = m_evt_handler->m_curr->line_contents.rem;
5674  if(!rem.len)
5675  goto seqblck_again;
5676  }
5677  }
5678  }
5679  #endif
5680  _RYML_ASSERT_BASIC_(callbacks(), rem.len);
5681  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5682  const char first = rem.str[0];
5683  const size_t startline = m_evt_handler->m_curr->pos.line;
5684  // warning: the gcc optimizer on x86 builds is brittle with
5685  // this function:
5686  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
5687  ScannedScalar sc;
5688  if(first == '\'')
5689  {
5690  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5691  sc = _scan_scalar_squot();
5692  if(!_maybe_scan_following_colon())
5693  {
5694  _c4dbgp("seqblck[RVAL]: set as val");
5695  _handle_annotations_before_blck_val_scalar();
5696  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5697  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5698  addrem_flags(RNXT, RVAL);
5699  }
5700  else
5701  {
5702  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5703  addrem_flags(RNXT, RVAL);
5704  _handle_annotations_before_start_mapblck(startline);
5705  _handle_colon();
5706  m_evt_handler->begin_map_val_block();
5707  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5708  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5709  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5710  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5711  _maybe_skip_whitespace_tokens();
5712  goto seqblck_finish;
5713  }
5714  }
5715  else if(first == '"')
5716  {
5717  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5718  sc = _scan_scalar_dquot();
5719  if(!_maybe_scan_following_colon())
5720  {
5721  _c4dbgp("seqblck[RVAL]: set as val");
5722  _handle_annotations_before_blck_val_scalar();
5723  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5724  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5725  addrem_flags(RNXT, RVAL);
5726  }
5727  else
5728  {
5729  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5730  addrem_flags(RNXT, RVAL);
5731  _handle_annotations_before_start_mapblck(startline);
5732  _handle_colon();
5733  m_evt_handler->begin_map_val_block();
5734  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5735  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5736  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5737  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5738  _maybe_skip_whitespace_tokens();
5739  goto seqblck_finish;
5740  }
5741  }
5742  // block scalars can only appear as keys when in QMRK scope
5743  // (ie, after ? tokens), so no need to scan following colon in
5744  // here.
5745  else if(first == '|')
5746  {
5747  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5748  ScannedBlock sb;
5749  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5750  _handle_annotations_before_blck_val_scalar();
5751  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5752  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5753  addrem_flags(RNXT, RVAL);
5754  }
5755  else if(first == '>')
5756  {
5757  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5758  ScannedBlock sb;
5759  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5760  _handle_annotations_before_blck_val_scalar();
5761  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5762  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5763  addrem_flags(RNXT, RVAL);
5764  }
5765  else if(_scan_scalar_plain_seq_blck(&sc))
5766  {
5767  _c4dbgp("seqblck[RVAL]: plain scalar.");
5768  if(!_maybe_scan_following_colon())
5769  {
5770  _c4dbgp("seqblck[RVAL]: set as val");
5771  _handle_annotations_before_blck_val_scalar();
5772  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
5773  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5774  addrem_flags(RNXT, RVAL);
5775  }
5776  else
5777  {
5778  if(startindent > m_evt_handler->m_curr->indref)
5779  {
5780  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5781  addrem_flags(RNXT, RVAL);
5782  _handle_annotations_before_start_mapblck(startline);
5783  _handle_colon();
5784  m_evt_handler->begin_map_val_block();
5785  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5786  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5787  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5788  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5789  _maybe_skip_whitespace_tokens();
5790  goto seqblck_finish;
5791  }
5792  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|RBLCK, m_evt_handler->m_parent))
5793  {
5794  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5795  m_evt_handler->set_val_scalar_plain_empty();
5796  m_evt_handler->end_seq_block();
5797  m_evt_handler->add_sibling();
5798  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5799  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5800  addrem_flags(RVAL, RNXT|RKEY);
5801  _maybe_skip_whitespace_tokens();
5802  goto seqblck_finish;
5803  }
5804  else
5805  {
5806  _c4err("parse error");
5807  }
5808  }
5809  }
5810  else if(first == '[')
5811  {
5812  _c4dbgp("seqblck[RVAL]: start child seqflow");
5813  addrem_flags(RNXT, RVAL);
5814  _handle_annotations_before_blck_val_scalar();
5815  m_evt_handler->begin_seq_val_flow();
5816  addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
5817  _line_progressed(1);
5818  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5819  goto seqblck_finish;
5820  }
5821  else if(first == '{')
5822  {
5823  _c4dbgp("seqblck[RVAL]: start child mapflow");
5824  addrem_flags(RNXT, RVAL);
5825  _handle_annotations_before_blck_val_scalar();
5826  m_evt_handler->begin_map_val_flow();
5827  addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
5828  _line_progressed(1);
5829  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5830  goto seqblck_finish;
5831  }
5832  else if(first == '-')
5833  {
5834  if(startindent == m_evt_handler->m_curr->indref)
5835  {
5836  _c4dbgp("seqblck[RVAL]: prev val was empty");
5837  _handle_annotations_before_blck_val_scalar();
5838  m_evt_handler->set_val_scalar_plain_empty();
5839  // keep in RVAL, but for the next sibling
5840  m_evt_handler->add_sibling();
5841  }
5842  else
5843  {
5844  _c4dbgp("seqblck[RVAL]: start child seqblck");
5845  _RYML_ASSERT_BASIC_(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5846  addrem_flags(RNXT, RVAL);
5847  _handle_annotations_before_blck_val_scalar();
5848  m_evt_handler->begin_seq_val_block();
5849  addrem_flags(RVAL, RNXT);
5850  _set_indentation(startindent);
5851  // keep going on inside this function
5852  }
5853  _line_progressed(1);
5854  _maybe_skip_whitespace_tokens();
5855  }
5856  else if(first == ':')
5857  {
5858  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5859  addrem_flags(RNXT, RVAL);
5860  _handle_annotations_before_start_mapblck(startline);
5861  _handle_colon();
5862  m_evt_handler->begin_map_val_block();
5863  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5864  m_evt_handler->set_key_scalar_plain_empty();
5865  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5866  _line_progressed(1);
5867  _maybe_skip_whitespace_tokens();
5868  goto seqblck_finish;
5869  }
5870  else if(first == '&')
5871  {
5872  const csubstr anchor = _scan_anchor();
5873  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5874  // we need to buffer the anchors, as there may be two
5875  // consecutive anchors in here
5876  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5877  }
5878  else if(first == '*')
5879  {
5880  csubstr ref = _scan_ref_seq();
5881  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5882  if(!_maybe_scan_following_colon())
5883  {
5884  _c4dbgp("seqblck[RVAL]: set ref as val!");
5885  _handle_annotations_before_blck_val_scalar();
5886  m_evt_handler->set_val_ref(ref);
5887  addrem_flags(RNXT, RVAL);
5888  }
5889  else
5890  {
5891  _c4dbgp("seqblck[RVAL]: ref is key of map");
5892  addrem_flags(RNXT, RVAL);
5893  _handle_annotations_before_start_mapblck(startline);
5894  m_evt_handler->begin_map_val_block();
5895  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5896  m_evt_handler->set_key_ref(ref);
5897  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5898  _set_indentation(startindent);
5899  _maybe_skip_whitespace_tokens();
5900  goto seqblck_finish;
5901  }
5902  }
5903  else if(first == '!')
5904  {
5905  csubstr tag = _scan_tag();
5906  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5907  // we need to buffer the tags, as there may be two
5908  // consecutive tags in here
5909  _add_annotation(&m_pending_tags, tag, startindent, startline);
5910  }
5911  else if(first == '?')
5912  {
5913  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5914  addrem_flags(RNXT, RVAL);
5915  m_was_inside_qmrk = true;
5916  m_evt_handler->begin_map_val_block();
5917  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5918  _set_indentation(startindent);
5919  _line_progressed(1);
5920  _maybe_skip_whitespace_tokens();
5921  goto seqblck_finish;
5922  }
5923  else
5924  {
5925  _c4err("parse error");
5926  }
5927  }
5928  else // RNXT
5929  {
5930  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5931  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5932  //
5933  // handle indentation
5934  //
5935  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5936  if(C4_LIKELY(_at_line_begin()))
5937  {
5938  _c4dbgp("seqblck[RNXT]: at line begin");
5939  if(m_evt_handler->m_curr->indentation_ge())
5940  {
5941  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5942  _line_progressed(m_evt_handler->m_curr->indref);
5943  _maybe_skip_whitespace_tokens();
5944  rem = m_evt_handler->m_curr->line_contents.rem;
5945  if(!rem.len)
5946  goto seqblck_again;
5947  }
5948  else if(m_evt_handler->m_curr->indentation_lt())
5949  {
5950  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5951  _handle_indentation_pop_from_block_seq();
5952  if(has_all(RSEQ|RBLCK))
5953  {
5954  _c4dbgp("seqblck[RNXT]: still seqblck!");
5955  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5956  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5957  rem = m_evt_handler->m_curr->line_contents.rem;
5958  if(!rem.len)
5959  goto seqblck_again;
5960  }
5961  else
5962  {
5963  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5964  goto seqblck_finish;
5965  }
5966  }
5967  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5968  {
5969  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5970  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5971  rem = m_evt_handler->m_curr->line_contents.rem;
5972  if(!rem.len)
5973  goto seqblck_again;
5974  }
5975  }
5976  else
5977  {
5978  _c4dbgp("seqblck[RNXT]: NOT at line begin");
5979  if(!rem.begins_with_any(" \t"))
5980  {
5981  _c4err("parse error");
5982  }
5983  else
5984  {
5985  _skipchars(" \t");
5986  rem = m_evt_handler->m_curr->line_contents.rem;
5987  if(!rem.len)
5988  {
5989  _c4dbgp("seqblck[RNXT]: again");
5990  goto seqblck_again;
5991  }
5992  }
5993  }
5994  //
5995  // now handle the tokens
5996  //
5997  const char first = rem.str[0];
5998  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5999  if(first == '-')
6000  {
6001  if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
6002  {
6003  _c4dbgp("seqblck[RNXT]: expect next val");
6004  addrem_flags(RVAL, RNXT);
6005  m_evt_handler->add_sibling();
6006  _line_progressed(1);
6007  _maybe_skip_whitespace_tokens();
6008  }
6009  else
6010  {
6011  _c4dbgp("seqblck[RNXT]: start doc");
6012  _start_doc_suddenly();
6013  _line_progressed(3);
6014  _maybe_skip_whitespace_tokens();
6015  goto seqblck_finish;
6016  }
6017  }
6018  else if(first == ':')
6019  {
6020  // This happens for example in `- [a: b]: c` (after
6021  // terminating the seq, ie, after `]`). All other cases
6022  // (ie colon after scalars) are caught elsewhere (ie, in
6023  // RVAL state).
6024  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6025  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
6026  {
6027  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6028  m_evt_handler->end_seq_block();
6029  goto seqblck_finish;
6030  }
6031  else
6032  {
6033  _c4err("parse error");
6034  }
6035  }
6036  else if(first == '.')
6037  {
6038  _c4dbgp("seqblck[RNXT]: maybe doc?");
6039  csubstr rs = rem.sub(1);
6040  if(rs == ".." || rs.begins_with(".. "))
6041  {
6042  _c4dbgp("seqblck[RNXT]: end+start doc");
6043  _end_doc_suddenly();
6044  _line_progressed(3);
6045  _maybe_skip_whitespace_tokens();
6046  goto seqblck_finish;
6047  }
6048  else
6049  {
6050  _c4err("parse error");
6051  }
6052  }
6053  else
6054  {
6055  // may be an indentless sequence nested in a map...
6056  //if(m_evt_handler->m_stack.size() >= 2)
6057  #ifdef RYML_DBG
6058  char flagbuf_[128];
6059  for(auto const& s : m_evt_handler->m_stack)
6060  {
6061  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6062  }
6063  #endif
6064  if(m_evt_handler->m_parent && has_all(RMAP|RBLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6065  {
6066  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6067  _RYML_ASSERT_BASIC_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6068  _handle_indentation_pop(m_evt_handler->m_parent);
6069  _RYML_ASSERT_BASIC_(this->callbacks(), has_all(RMAP|RBLCK));
6070  m_evt_handler->add_sibling();
6071  addrem_flags(RKEY, RNXT);
6072  goto seqblck_finish;
6073  }
6074  else //if(first != '*')
6075  {
6076  _c4err("parse error");
6077  }
6078  }
6079  }
6080 
6081  seqblck_again:
6082  _c4dbgt("seqblck: go again", 0);
6083  if(_finished_line())
6084  {
6085  m_bom_len = 0;
6086  _line_ended();
6087  _scan_line();
6088  if(_finished_file())
6089  {
6090  _c4dbgp("seqblck: finish!");
6091  _end_seq_blck();
6092  goto seqblck_finish;
6093  }
6094  _c4dbgnextline();
6095  }
6096  goto seqblck_start;
6097 
6098  seqblck_finish:
6099  _c4dbgp("seqblck: finish");
6100 }
6101 
6102 
6103 //-----------------------------------------------------------------------------
6104 
6105 template<class EventHandler>
6106 void ParseEngine<EventHandler>::_handle_map_block()
6107 {
6108 mapblck_start:
6109  _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6110 
6111  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
6112  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
6113  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK));
6114  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
6115  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
6116 
6117  _maybe_skip_comment();
6118  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6119  if(!rem.len)
6120  goto mapblck_again;
6121 
6122  if(has_any(RKEY))
6123  {
6124  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6125  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6126  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6127  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6128  //
6129  // handle indentation
6130  //
6131  if(m_evt_handler->m_curr->at_line_beginning())
6132  {
6133  if(m_evt_handler->m_curr->indentation_eq())
6134  {
6135  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6136  _line_progressed(m_evt_handler->m_curr->indref);
6137  rem = m_evt_handler->m_curr->line_contents.rem;
6138  if(!rem.len)
6139  goto mapblck_again;
6140  }
6141  else if(m_evt_handler->m_curr->indentation_lt())
6142  {
6143  _c4dbgp("mapblck[RKEY]: smaller indentation!");
6144  _handle_indentation_pop_from_block_map();
6145  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6146  if(has_all(RMAP|RBLCK))
6147  {
6148  _c4dbgp("mapblck[RKEY]: still mapblck!");
6149  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
6150  rem = m_evt_handler->m_curr->line_contents.rem;
6151  if(!rem.len)
6152  goto mapblck_again;
6153  }
6154  else
6155  {
6156  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6157  goto mapblck_finish;
6158  }
6159  }
6160  else
6161  {
6162  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6163  _c4err("invalid indentation");
6164  }
6165  }
6166  //
6167  // now handle the tokens
6168  //
6169  const char first = rem.str[0];
6170  const size_t startline = m_evt_handler->m_curr->pos.line;
6171  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6172  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6173  ScannedScalar sc;
6174  if(first == '\'')
6175  {
6176  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6177  sc = _scan_scalar_squot();
6178  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6179  _handle_annotations_before_blck_key_scalar();
6180  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6181  addrem_flags(RVAL, RKEY);
6182  if(!_maybe_scan_following_colon())
6183  _c4err("could not find ':' colon after key");
6184  _maybe_skip_whitespace_tokens();
6185  }
6186  else if(first == '"')
6187  {
6188  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6189  sc = _scan_scalar_dquot();
6190  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6191  _handle_annotations_before_blck_key_scalar();
6192  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6193  addrem_flags(RVAL, RKEY);
6194  if(!_maybe_scan_following_colon())
6195  _c4err("could not find ':' colon after key");
6196  _maybe_skip_whitespace_tokens();
6197  }
6198  // block scalars (| and >) can not be used as keys unless they
6199  // appear in an explicit QMRK scope (ie, after the ? token),
6200  else if(C4_UNLIKELY(first == '|'))
6201  {
6202  _c4err("block literal keys must be enclosed in '?'");
6203  }
6204  else if(C4_UNLIKELY(first == '>'))
6205  {
6206  _c4err("block literal keys must be enclosed in '?'");
6207  }
6208  else if(_scan_scalar_plain_map_blck(&sc))
6209  {
6210  _c4dbgp("mapblck[RKEY]: plain scalar");
6211  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6212  _handle_annotations_before_blck_key_scalar();
6213  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6214  addrem_flags(RVAL, RKEY);
6215  if(!_maybe_scan_following_colon())
6216  _c4err("could not find ':' colon after key");
6217  _maybe_skip_whitespace_tokens();
6218  }
6219  else if(first == '?')
6220  {
6221  _c4dbgp("mapblck[RKEY]: key token!");
6222  addrem_flags(QMRK, RKEY);
6223  _line_progressed(1);
6224  _maybe_skip_whitespace_tokens();
6225  m_was_inside_qmrk = true;
6226  goto mapblck_again;
6227  }
6228  else if(first == ':')
6229  {
6230  _c4dbgp("mapblck[RKEY]: setting empty key");
6231  _handle_annotations_before_blck_key_scalar();
6232  m_evt_handler->set_key_scalar_plain_empty();
6233  addrem_flags(RVAL, RKEY);
6234  _line_progressed(1);
6235  _maybe_skip_whitespace_tokens();
6236  }
6237  else if(first == '*')
6238  {
6239  csubstr ref = _scan_ref_map();
6240  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6241  _handle_annotations_before_blck_key_scalar();
6242  m_evt_handler->set_key_ref(ref);
6243  addrem_flags(RVAL, RKEY);
6244  if(!_maybe_scan_following_colon())
6245  _c4err("could not find ':' colon after key");
6246  _maybe_skip_whitespace_tokens();
6247  }
6248  else if(first == '&')
6249  {
6250  csubstr anchor = _scan_anchor();
6251  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6252  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6253  }
6254  else if(first == '!')
6255  {
6256  csubstr tag = _scan_tag();
6257  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6258  _add_annotation(&m_pending_tags, tag, startindent, startline);
6259  }
6260  else if(first == '[')
6261  {
6262  // RYML's tree cannot store container keys, but that's
6263  // handled inside the tree handler. Other handlers may be
6264  // able to handle it.
6265  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6266  addrem_flags(RKCL, RKEY);
6267  _handle_annotations_before_blck_key_scalar();
6268  m_evt_handler->begin_seq_key_flow();
6269  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKCL);
6270  _line_progressed(1);
6271  _set_indentation(startindent);
6272  goto mapblck_finish;
6273  }
6274  else if(first == '{')
6275  {
6276  // RYML's tree cannot store container keys, but that's
6277  // handled inside the tree handler. Other handlers may be
6278  // able to handle it.
6279  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6280  addrem_flags(RKCL, RKEY);
6281  _handle_annotations_before_blck_key_scalar();
6282  m_evt_handler->begin_map_key_flow();
6283  addrem_flags(RFLOW|RKEY, RBLCK|RKCL);
6284  _line_progressed(1);
6285  _set_indentation(startindent);
6286  goto mapblck_finish;
6287  }
6288  else if(first == '-')
6289  {
6290  _c4dbgp("mapblck[RKEY]: maybe doc?");
6291  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6292  {
6293  _c4dbgp("mapblck[RKEY]: end+start doc");
6294  _start_doc_suddenly();
6295  _line_progressed(3);
6296  _maybe_skip_whitespace_tokens();
6297  goto mapblck_finish;
6298  }
6299  else
6300  {
6301  _c4err("parse error");
6302  }
6303  }
6304  else if(first == '.')
6305  {
6306  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6307  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6308  {
6309  _c4dbgp("mapblck[RKEY]: end doc");
6310  _end_doc_suddenly();
6311  _line_progressed(3);
6312  _maybe_skip_whitespace_tokens();
6313  goto mapblck_finish;
6314  }
6315  else
6316  {
6317  _c4err("parse error");
6318  }
6319  }
6321  else if(first == '\t')
6322  {
6323  _c4dbgp("mapblck[RKEY]: skip tabs");
6324  _maybe_skipchars('\t');
6325  })
6326  else
6327  {
6328  _c4err("parse error");
6329  }
6330  }
6331  else if(has_any(RKCL)) // read the key colon
6332  {
6333  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6334  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6335  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6336  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6337  //
6338  // handle indentation
6339  //
6340  if(m_evt_handler->m_curr->at_line_beginning())
6341  {
6342  if(m_evt_handler->m_curr->indentation_eq())
6343  {
6344  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6345  _line_progressed(m_evt_handler->m_curr->indref);
6346  rem = m_evt_handler->m_curr->line_contents.rem;
6347  if(!rem.len)
6348  goto mapblck_again;
6349  }
6350  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6351  {
6352  _c4err("invalid indentation");
6353  }
6354  }
6355  const char first = rem.str[0];
6356  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6357  if(first == ':')
6358  {
6359  _c4dbgp("mapblck[RKCL]: found the colon");
6360  addrem_flags(RVAL, RKCL);
6361  _line_progressed(1);
6362  _maybe_skip_whitespace_tokens();
6363  }
6364  else if(first == '?')
6365  {
6366  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6367  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6368  m_evt_handler->set_val_scalar_plain_empty();
6369  m_evt_handler->add_sibling();
6370  addrem_flags(QMRK, RKCL);
6371  _line_progressed(1);
6372  _maybe_skip_whitespace_tokens();
6373  }
6374  else if(first == '-')
6375  {
6376  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6377  {
6378  _c4dbgp("mapblck[RKCL]: end+start doc");
6379  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6380  _start_doc_suddenly();
6381  _line_progressed(3);
6382  _maybe_skip_whitespace_tokens();
6383  goto mapblck_finish;
6384  }
6385  else
6386  {
6387  _c4err("parse error");
6388  }
6389  }
6390  else if(first == '.')
6391  {
6392  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6393  csubstr rs = rem.sub(1);
6394  if(rs == ".." || rs.begins_with(".. "))
6395  {
6396  _c4dbgp("mapblck[RKCL]: end+start doc");
6397  _end_doc_suddenly();
6398  _line_progressed(3);
6399  goto mapblck_finish;
6400  }
6401  else
6402  {
6403  _c4err("parse error");
6404  }
6405  }
6406  else if(m_was_inside_qmrk)
6407  {
6408  _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6409  _c4dbgp("mapblck[RKCL]: missing :");
6410  m_evt_handler->set_val_scalar_plain_empty();
6411  m_evt_handler->add_sibling();
6412  m_was_inside_qmrk = false;
6413  addrem_flags(RKEY, RKCL);
6414  }
6415  else
6416  {
6417  _c4err("parse error");
6418  }
6419  }
6420  else if(has_any(RVAL))
6421  {
6422  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6423  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6424  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6425  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6426  //
6427  // handle indentation
6428  //
6429  if(m_evt_handler->m_curr->at_line_beginning())
6430  {
6431  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6432  m_evt_handler->m_curr->more_indented = false;
6433  if(m_evt_handler->m_curr->indref == npos)
6434  {
6435  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6436  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6437  _line_progressed(m_evt_handler->m_curr->indref);
6438  rem = m_evt_handler->m_curr->line_contents.rem;
6439  if(!rem.len)
6440  goto mapblck_again;
6441  }
6442  else if(m_evt_handler->m_curr->indentation_eq())
6443  {
6444  _c4dbgp("mapblck[RVAL]: skip indentation!");
6445  _line_progressed(m_evt_handler->m_curr->indref);
6446  rem = m_evt_handler->m_curr->line_contents.rem;
6447  if(!rem.len)
6448  goto mapblck_again;
6449  // TODO: this is valid:
6450  //
6451  // ```yaml
6452  // a:
6453  // b:
6454  // ---
6455  // a:
6456  // b
6457  // ---
6458  // a:
6459  // b: c
6460  // ```
6461  //
6462  // ... but this is not:
6463  //
6464  // ```yaml
6465  // a:
6466  // v
6467  // ---
6468  // a: b: c
6469  // ```
6470  //
6471  // here, we probably need to set a boolean on the state
6472  // to disambiguate between these cases.
6473  }
6474  else if(m_evt_handler->m_curr->indentation_gt())
6475  {
6476  _c4dbgp("mapblck[RVAL]: more indented!");
6477  m_evt_handler->m_curr->more_indented = true;
6478  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6479  rem = m_evt_handler->m_curr->line_contents.rem;
6480  if(!rem.len)
6481  goto mapblck_again;
6482  }
6483  else if(m_evt_handler->m_curr->indentation_lt())
6484  {
6485  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6486  _handle_indentation_pop_from_block_map();
6487  if(has_all(RMAP|RBLCK))
6488  {
6489  _c4dbgp("mapblck[RVAL]: still mapblck!");
6490  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6491  if(has_any(RNXT))
6492  {
6493  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6494  m_evt_handler->add_sibling();
6495  addrem_flags(RKEY, RNXT);
6496  }
6497  goto mapblck_again;
6498  }
6499  else
6500  {
6501  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6502  goto mapblck_finish;
6503  }
6504  }
6505  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6506  {
6507  _c4dbgp("mapblck[RVAL]: empty line!");
6508  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6509  goto mapblck_again;
6510  }
6511  }
6512  //
6513  // now handle the tokens
6514  //
6515  const char first = rem.str[0];
6516  const size_t startline = m_evt_handler->m_curr->pos.line;
6517  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6518  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6519  ScannedScalar sc;
6520  if(first == '\'')
6521  {
6522  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6523  sc = _scan_scalar_squot();
6524  if(!_maybe_scan_following_colon())
6525  {
6526  _c4dbgp("mapblck[RVAL]: set as val");
6527  _handle_annotations_before_blck_val_scalar();
6528  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6529  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6530  addrem_flags(RNXT, RVAL);
6531  }
6532  else
6533  {
6534  if(startindent != m_evt_handler->m_curr->indref)
6535  {
6536  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6537  _handle_annotations_before_start_mapblck(startline);
6538  addrem_flags(RNXT, RVAL);
6539  _handle_colon();
6540  m_evt_handler->begin_map_val_block();
6541  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6542  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6543  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6544  _maybe_skip_whitespace_tokens();
6545  // keep the child state on RVAL
6546  addrem_flags(RVAL, RNXT);
6547  }
6548  else
6549  {
6550  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6551  m_evt_handler->set_val_scalar_plain_empty();
6552  m_evt_handler->add_sibling();
6553  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6554  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6555  // keep going on RVAL
6556  _maybe_skip_whitespace_tokens();
6557  }
6558  }
6559  }
6560  else if(first == '"')
6561  {
6562  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6563  sc = _scan_scalar_dquot();
6564  if(!_maybe_scan_following_colon())
6565  {
6566  _c4dbgp("mapblck[RVAL]: set as val");
6567  _handle_annotations_before_blck_val_scalar();
6568  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6569  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6570  addrem_flags(RNXT, RVAL);
6571  }
6572  else
6573  {
6574  if(startindent != m_evt_handler->m_curr->indref)
6575  {
6576  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6577  _handle_annotations_before_start_mapblck(startline);
6578  addrem_flags(RNXT, RVAL);
6579  _handle_colon();
6580  m_evt_handler->begin_map_val_block();
6581  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6582  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6583  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6584  _maybe_skip_whitespace_tokens();
6585  // keep the child state on RVAL
6586  addrem_flags(RVAL, RNXT);
6587  }
6588  else
6589  {
6590  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6591  m_evt_handler->set_val_scalar_plain_empty();
6592  m_evt_handler->add_sibling();
6593  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6594  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6595  // keep going on RVAL
6596  _maybe_skip_whitespace_tokens();
6597  }
6598  }
6599  }
6600  // block scalars can only appear as keys when in QMRK scope
6601  // (ie, after ? tokens), so no need to scan following colon
6602  else if(first == '|')
6603  {
6604  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6605  ScannedBlock sb;
6606  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6607  _handle_annotations_before_blck_val_scalar();
6608  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6609  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6610  addrem_flags(RNXT, RVAL);
6611  }
6612  else if(first == '>')
6613  {
6614  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6615  ScannedBlock sb;
6616  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6617  _handle_annotations_before_blck_val_scalar();
6618  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6619  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6620  addrem_flags(RNXT, RVAL);
6621  }
6622  else if(_scan_scalar_plain_map_blck(&sc))
6623  {
6624  _c4dbgp("mapblck[RVAL]: plain scalar.");
6625  if(!_maybe_scan_following_colon())
6626  {
6627  _c4dbgp("mapblck[RVAL]: set as val");
6628  _handle_annotations_before_blck_val_scalar();
6629  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6630  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6631  addrem_flags(RNXT, RVAL);
6632  }
6633  else
6634  {
6635  if(startindent != m_evt_handler->m_curr->indref)
6636  {
6637  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6638  addrem_flags(RNXT, RVAL);
6639  _handle_annotations_before_start_mapblck(startline);
6640  _handle_colon();
6641  m_evt_handler->begin_map_val_block();
6642  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6643  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6644  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6645  _maybe_skip_whitespace_tokens();
6646  // keep the child state on RVAL
6647  addrem_flags(RVAL, RNXT);
6648  }
6649  else
6650  {
6651  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6652  _handle_annotations_before_blck_val_scalar();
6653  m_evt_handler->set_val_scalar_plain_empty();
6654  m_evt_handler->add_sibling();
6655  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6656  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6657  // keep going on RVAL
6658  _maybe_skip_whitespace_tokens();
6659  }
6660  }
6661  }
6662  else if(first == '-')
6663  {
6664  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6665  {
6666  _c4dbgp("mapblck[RVAL]: start val seqblck");
6667  addrem_flags(RNXT, RVAL);
6668  _handle_annotations_before_blck_val_scalar();
6669  m_evt_handler->begin_seq_val_block();
6670  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6671  _set_indentation(startindent);
6672  _line_progressed(1);
6673  _maybe_skip_whitespace_tokens();
6674  goto mapblck_finish;
6675  }
6676  else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6677  {
6678  _c4dbgp("mapblck[RVAL]: end+start doc");
6679  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6680  _start_doc_suddenly();
6681  _line_progressed(3);
6682  _maybe_skip_whitespace_tokens();
6683  goto mapblck_finish;
6684  }
6685  else
6686  {
6687  _c4err("parse error");
6688  }
6689  }
6690  else if(first == '[')
6691  {
6692  _c4dbgp("mapblck[RVAL]: start val seqflow");
6693  addrem_flags(RNXT, RVAL);
6694  _handle_annotations_before_blck_val_scalar();
6695  m_evt_handler->begin_seq_val_flow();
6696  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
6697  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6698  _line_progressed(1);
6699  goto mapblck_finish;
6700  }
6701  else if(first == '{')
6702  {
6703  _c4dbgp("mapblck[RVAL]: start val mapflow");
6704  addrem_flags(RNXT, RVAL);
6705  _handle_annotations_before_blck_val_scalar();
6706  m_evt_handler->begin_map_val_flow();
6707  addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
6708  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6709  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6710  _line_progressed(1);
6711  goto mapblck_finish;
6712  }
6713  else if(first == '*')
6714  {
6715  csubstr ref = _scan_ref_map();
6716  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6717  if(startindent == m_evt_handler->m_curr->indref)
6718  {
6719  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6720  m_evt_handler->set_val_ref(ref);
6721  addrem_flags(RNXT, RVAL);
6722  }
6723  else
6724  {
6725  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6726  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6727  if(_maybe_scan_following_colon())
6728  {
6729  _c4dbgp("mapblck[RVAL]: start child map, block");
6730  addrem_flags(RNXT, RVAL);
6731  _handle_annotations_before_blck_val_scalar();
6732  m_evt_handler->begin_map_val_block();
6733  m_evt_handler->set_key_ref(ref);
6734  _set_indentation(startindent);
6735  // keep going in RVAL
6736  addrem_flags(RVAL, RNXT);
6737  }
6738  else
6739  {
6740  _c4dbgp("mapblck[RVAL]: was val ref");
6741  _handle_annotations_before_blck_val_scalar();
6742  m_evt_handler->set_val_ref(ref);
6743  addrem_flags(RNXT, RVAL);
6744  }
6745  }
6746  _maybe_skip_whitespace_tokens();
6747  }
6748  else if(first == '&')
6749  {
6750  csubstr anchor = _scan_anchor();
6751  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6752  if(startindent == m_evt_handler->m_curr->indref)
6753  {
6754  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6755  m_evt_handler->set_val_scalar_plain_empty();
6756  m_evt_handler->add_sibling();
6757  addrem_flags(RKEY, RVAL);
6758  }
6759  // we need to buffer the anchors, as there may be two
6760  // consecutive anchors in here
6761  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6762  }
6763  else if(first == '!')
6764  {
6765  csubstr tag = _scan_tag();
6766  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6767  if(startindent == m_evt_handler->m_curr->indref)
6768  {
6769  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6770  _handle_annotations_before_blck_val_scalar();
6771  m_evt_handler->set_val_scalar_plain_empty();
6772  m_evt_handler->add_sibling();
6773  addrem_flags(RKEY, RVAL);
6774  }
6775  // we need to buffer the tags, as there may be two
6776  // consecutive tags in here
6777  _add_annotation(&m_pending_tags, tag, startindent, startline);
6778  }
6779  else if(first == '?')
6780  {
6781  if(startindent == m_evt_handler->m_curr->indref)
6782  {
6783  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6784  _handle_annotations_before_blck_val_scalar();
6785  m_evt_handler->set_val_scalar_plain_empty();
6786  m_evt_handler->add_sibling();
6787  addrem_flags(QMRK, RVAL);
6788  }
6789  else if(startindent > m_evt_handler->m_curr->indref)
6790  {
6791  _c4dbgp("mapblck[RVAL]: start val mapblck");
6792  addrem_flags(RNXT, RVAL);
6793  _handle_annotations_before_blck_val_scalar();
6794  m_evt_handler->begin_map_val_block();
6795  addrem_flags(QMRK|RBLCK, RNXT);
6796  _set_indentation(startindent);
6797  }
6798  else
6799  {
6800  _c4err("parse error");
6801  }
6802  m_was_inside_qmrk = true;
6803  _line_progressed(1);
6804  _maybe_skip_whitespace_tokens();
6805  goto mapblck_again;
6806  }
6807  else if(first == ':')
6808  {
6809  if(startindent == m_evt_handler->m_curr->indref)
6810  {
6811  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6812  m_evt_handler->set_val_scalar_plain_empty();
6813  m_evt_handler->add_sibling();
6814  m_evt_handler->set_key_scalar_plain_empty();
6815  }
6816  else if(startindent > m_evt_handler->m_curr->indref)
6817  {
6818  _c4dbgp("mapblck[RVAL]: start val mapblck");
6819  addrem_flags(RNXT, RVAL);
6820  _handle_annotations_before_start_mapblck(startline);
6821  _handle_colon();
6822  m_evt_handler->begin_map_val_block();
6823  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6824  m_evt_handler->set_key_scalar_plain_empty();
6825  // keep the child state on RVAL
6826  addrem_flags(RVAL, RNXT);
6827  }
6828  else
6829  {
6830  _c4err("parse error");
6831  }
6832  _line_progressed(1);
6833  _maybe_skip_whitespace_tokens();
6834  goto mapblck_again;
6835  }
6836  else if(first == '.')
6837  {
6838  _c4dbgp("mapblck[RVAL]: maybe doc?");
6839  csubstr rs = rem.sub(1);
6840  if(rs == ".." || rs.begins_with(".. "))
6841  {
6842  _c4dbgp("seqblck[RVAL]: end doc expl");
6843  _end_doc_suddenly();
6844  _line_progressed(3);
6845  _maybe_skip_whitespace_tokens();
6846  goto mapblck_finish;
6847  }
6848  else
6849  {
6850  _c4err("parse error");
6851  }
6852  }
6854  else if(first == '\t')
6855  {
6856  _c4dbgp("mapblck[RVAL]: skip tabs");
6857  _maybe_skipchars('\t');
6858  })
6859  else
6860  {
6861  _c4err("parse error");
6862  }
6863  }
6864  else if(has_any(RNXT))
6865  {
6866  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6867  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6868  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6869  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6870  //
6871  // handle indentation
6872  //
6873  if(m_evt_handler->m_curr->at_line_beginning())
6874  {
6875  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6876  if(m_evt_handler->m_curr->indentation_eq())
6877  {
6878  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6879  _line_progressed(m_evt_handler->m_curr->indref);
6880  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6881  m_evt_handler->add_sibling();
6882  addrem_flags(RKEY, RNXT);
6883  goto mapblck_again;
6884  }
6885  else if(m_evt_handler->m_curr->indentation_lt())
6886  {
6887  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6888  _handle_indentation_pop_from_block_map();
6889  if(has_all(RMAP|RBLCK))
6890  {
6891  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6892  if(!has_any(RKCL))
6893  {
6894  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6895  m_evt_handler->add_sibling();
6896  addrem_flags(RKEY, RNXT);
6897  }
6898  goto mapblck_again;
6899  }
6900  else
6901  {
6902  goto mapblck_finish;
6903  }
6904  }
6905  }
6906  else
6907  {
6908  _c4dbgp("mapblck[RNXT]: NOT at line begin");
6909  if(!rem.begins_with_any(" \t"))
6910  {
6911  _c4err("parse error");
6912  }
6913  else
6914  {
6915  _skipchars(" \t");
6916  rem = m_evt_handler->m_curr->line_contents.rem;
6917  if(!rem.len)
6918  {
6919  _c4dbgp("seqblck[RNXT]: again");
6920  goto mapblck_again;
6921  }
6922  }
6923  }
6924  //
6925  // handle tokens
6926  //
6927  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6928  const char first = rem.str[0];
6929  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6930  if(first == ':')
6931  {
6932  if(m_evt_handler->m_curr->more_indented)
6933  {
6934  _c4dbgp("mapblck[RNXT]: start child block map");
6935  C4_NOT_IMPLEMENTED();
6936  //m_evt_handler->actually_as_block_map();
6937  _line_progressed(1);
6938  _set_indentation(m_evt_handler->m_curr->scalar_col);
6939  m_evt_handler->m_curr->more_indented = false;
6940  goto mapblck_again;
6941  }
6942  else
6943  {
6944  _c4err("parse error");
6945  }
6946  }
6947  else if(first == ' ')
6948  {
6949  _c4dbgp("mapblck[RNXT]: skip spaces");
6950  _maybe_skip_whitespace_tokens();
6951  }
6952  else
6953  {
6954  _c4err("parse error");
6955  }
6956  }
6957  else if(has_any(QMRK))
6958  {
6959  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6960  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6961  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6962  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6963  //
6964  // handle indentation
6965  //
6966  if(m_evt_handler->m_curr->at_line_beginning())
6967  {
6968  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
6969  if(m_evt_handler->m_curr->indentation_eq())
6970  {
6971  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6972  _line_progressed(m_evt_handler->m_curr->indref);
6973  rem = m_evt_handler->m_curr->line_contents.rem;
6974  if(!rem.len)
6975  goto mapblck_again;
6976  }
6977  else if(m_evt_handler->m_curr->indentation_lt())
6978  {
6979  _c4dbgp("mapblck[QMRK]: smaller indentation!");
6980  _handle_indentation_pop_from_block_map();
6981  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6982  if(has_all(RMAP|RBLCK))
6983  {
6984  _c4dbgp("mapblck[QMRK]: still mapblck!");
6985  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
6986  rem = m_evt_handler->m_curr->line_contents.rem;
6987  if(!rem.len)
6988  goto mapblck_again;
6989  }
6990  else
6991  {
6992  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
6993  goto mapblck_finish;
6994  }
6995  }
6996  // indentation can be larger in QMRK state
6997  else
6998  {
6999  _c4dbgp("mapblck[QMRK]: larger indentation !");
7000  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7001  rem = m_evt_handler->m_curr->line_contents.rem;
7002  if(!rem.len)
7003  goto mapblck_again;
7004  }
7005  }
7006  //
7007  // now handle the tokens
7008  //
7009  const char first = rem.str[0];
7010  const size_t startline = m_evt_handler->m_curr->pos.line;
7011  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7012  _c4dbgpf("mapblck[QMRK]: '{}'", first);
7013  ScannedScalar sc;
7014  if(first == '\'')
7015  {
7016  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7017  sc = _scan_scalar_squot();
7018  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7019  if(!_maybe_scan_following_colon())
7020  {
7021  _c4dbgp("mapblck[QMRK]: set as key");
7022  _handle_annotations_before_blck_key_scalar();
7023  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7024  addrem_flags(RKCL, QMRK);
7025  }
7026  else
7027  {
7028  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7029  addrem_flags(RKCL, QMRK);
7030  _handle_annotations_before_start_mapblck_as_key();
7031  m_evt_handler->begin_map_key_block();
7032  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7033  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7034  _maybe_skip_whitespace_tokens();
7035  _set_indentation(startindent);
7036  // keep the child state on RVAL
7037  addrem_flags(RVAL, RKCL|QMRK);
7038  }
7039  }
7040  else if(first == '"')
7041  {
7042  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7043  sc = _scan_scalar_dquot();
7044  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7045  if(!_maybe_scan_following_colon())
7046  {
7047  _c4dbgp("mapblck[QMRK]: set as key");
7048  _handle_annotations_before_blck_key_scalar();
7049  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7050  addrem_flags(RKCL, QMRK);
7051  }
7052  else
7053  {
7054  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7055  addrem_flags(RKCL, QMRK);
7056  _handle_annotations_before_start_mapblck_as_key();
7057  m_evt_handler->begin_map_key_block();
7058  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7059  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7060  _maybe_skip_whitespace_tokens();
7061  _set_indentation(startindent);
7062  // keep the child state on RVAL
7063  addrem_flags(RVAL, RKCL|QMRK);
7064  }
7065  }
7066  else if(first == '|')
7067  {
7068  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7069  ScannedBlock sb;
7070  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7071  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7072  _handle_annotations_before_blck_key_scalar();
7073  m_evt_handler->set_key_scalar_literal(maybe_filtered);
7074  addrem_flags(RKCL, QMRK);
7075  }
7076  else if(first == '>')
7077  {
7078  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7079  ScannedBlock sb;
7080  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7081  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7082  _handle_annotations_before_blck_key_scalar();
7083  m_evt_handler->set_key_scalar_folded(maybe_filtered);
7084  addrem_flags(RKCL, QMRK);
7085  }
7086  else if(_scan_scalar_plain_map_blck(&sc))
7087  {
7088  _c4dbgp("mapblck[QMRK]: plain scalar");
7089  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7090  if(!_maybe_scan_following_colon())
7091  {
7092  _c4dbgp("mapblck[QMRK]: set as key");
7093  _handle_annotations_before_blck_key_scalar();
7094  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7095  addrem_flags(RKCL, QMRK);
7096  }
7097  else
7098  {
7099  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7100  addrem_flags(RKCL, QMRK);
7101  _handle_annotations_before_start_mapblck_as_key();
7102  m_evt_handler->begin_map_key_block();
7103  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7104  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7105  _maybe_skip_whitespace_tokens();
7106  _set_indentation(startindent);
7107  // keep the child state on RVAL
7108  addrem_flags(RVAL, RKCL|QMRK);
7109  }
7110  }
7111  else if(first == ':')
7112  {
7113  if(startindent == m_evt_handler->m_curr->indref)
7114  {
7115  _c4dbgp("mapblck[QMRK]: empty key");
7116  addrem_flags(RVAL, QMRK);
7117  _handle_annotations_before_blck_key_scalar();
7118  m_evt_handler->set_key_scalar_plain_empty();
7119  _line_progressed(1);
7120  _maybe_skip_whitespace_tokens();
7121  }
7122  else
7123  {
7124  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7125  addrem_flags(RKCL, QMRK);
7126  _handle_annotations_before_start_mapblck_as_key();
7127  m_evt_handler->begin_map_key_block();
7128  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7129  m_evt_handler->set_key_scalar_plain_empty();
7130  _line_progressed(1);
7131  _maybe_skip_whitespace_tokens();
7132  _set_indentation(startindent);
7133  // keep the child state on RVAL
7134  addrem_flags(RVAL, RKCL|QMRK);
7135  }
7136  }
7137  else if(first == '*')
7138  {
7139  csubstr ref = _scan_ref_map();
7140  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7141  if(!_maybe_scan_following_colon())
7142  {
7143  _c4dbgp("mapblck[QMRK]: set ref as key");
7144  _handle_annotations_before_blck_key_scalar();
7145  m_evt_handler->set_key_ref(ref);
7146  addrem_flags(RKCL, QMRK);
7147  }
7148  else
7149  {
7150  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7151  addrem_flags(RKCL, QMRK);
7152  _handle_annotations_before_blck_key_scalar();
7153  m_evt_handler->begin_map_key_block();
7154  m_evt_handler->set_key_ref(ref);
7155  _set_indentation(startindent);
7156  // keep the child state on RVAL
7157  addrem_flags(RVAL, RKCL|QMRK);
7158  }
7159  _maybe_skip_whitespace_tokens();
7160  }
7161  else if(first == '&')
7162  {
7163  csubstr anchor = _scan_anchor();
7164  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7165  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7166  }
7167  else if(first == '!')
7168  {
7169  csubstr tag = _scan_tag();
7170  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7171  _add_annotation(&m_pending_tags, tag, startindent, startline);
7172  }
7173  else if(first == '-')
7174  {
7175  _c4dbgp("mapblck[QMRK]: maybe doc?");
7176  csubstr rs = rem.sub(1);
7177  if(rs == "--" || rs.begins_with("-- "))
7178  {
7179  _c4dbgp("mapblck[QMRK]: end+start doc");
7180  _start_doc_suddenly();
7181  _line_progressed(3);
7182  }
7183  else
7184  {
7185  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7186  addrem_flags(RKCL, RKEY|QMRK);
7187  _handle_annotations_before_blck_key_scalar();
7188  m_evt_handler->begin_seq_key_block();
7189  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
7190  _set_indentation(startindent);
7191  _line_progressed(1);
7192  }
7193  _maybe_skip_whitespace_tokens();
7194  goto mapblck_finish;
7195  }
7196  else if(first == '[')
7197  {
7198  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7199  addrem_flags(RKCL, RKEY|QMRK);
7200  m_evt_handler->begin_seq_key_flow();
7201  addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|QMRK|RBLCK);
7202  _set_indentation(m_evt_handler->m_parent->indref);
7203  _line_progressed(1);
7204  goto mapblck_finish;
7205  }
7206  else if(first == '{')
7207  {
7208  _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
7209  addrem_flags(RKCL, RKEY|QMRK);
7210  m_evt_handler->begin_map_key_flow();
7211  addrem_flags(RKEY|RFLOW, RVAL|RKCL|QMRK|RBLCK);
7212  _set_indentation(m_evt_handler->m_parent->indref);
7213  _line_progressed(1);
7214  goto mapblck_finish;
7215  }
7216  else if(first == '?')
7217  {
7218  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7219  m_evt_handler->set_key_scalar_plain_empty();
7220  m_evt_handler->set_val_scalar_plain_empty();
7221  m_evt_handler->add_sibling();
7222  _line_progressed(1);
7223  }
7224  else if(first == '.')
7225  {
7226  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7227  csubstr rs = rem.sub(1);
7228  if(rs == ".." || rs.begins_with(".. "))
7229  {
7230  _c4dbgp("mapblck[QMRK]: end+start doc");
7231  _end_doc_suddenly();
7232  _line_progressed(3);
7233  goto mapblck_finish;
7234  }
7235  else
7236  {
7237  _c4err("parse error");
7238  }
7239  }
7240  else
7241  {
7242  _c4err("parse error");
7243  }
7244  }
7245 
7246  mapblck_again:
7247  _c4dbgt("mapblck: again", 0);
7248  if(_finished_line())
7249  {
7250  _line_ended();
7251  _scan_line();
7252  if(_finished_file())
7253  {
7254  _c4dbgp("mapblck: file finished!");
7255  _end_map_blck();
7256  goto mapblck_finish;
7257  }
7258  _c4dbgnextline();
7259  }
7260  goto mapblck_start;
7261 
7262  mapblck_finish:
7263  _c4dbgp("mapblck: finish");
7264 }
7265 
7266 
7267 //-----------------------------------------------------------------------------
7268 
7269 template<class EventHandler>
7270 void ParseEngine<EventHandler>::_handle_unk_json()
7271 {
7272  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7273 
7274  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7275  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7276 
7277  _maybe_skip_comment();
7278  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7279  if(!rem.len)
7280  return;
7281 
7282  size_t pos = rem.first_not_of(" \t");
7283  if(pos)
7284  {
7285  pos = pos != npos ? pos : rem.len;
7286  _c4dbgpf("skipping indentation of {}", pos);
7287  _line_progressed(pos);
7288  rem = m_evt_handler->m_curr->line_contents.rem;
7289  if(!rem.len)
7290  return;
7291  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7292  }
7293 
7294  if(rem.begins_with('['))
7295  {
7296  _c4dbgp("it's a seq");
7297  m_evt_handler->check_trailing_doc_token();
7298  _maybe_begin_doc();
7299  m_evt_handler->begin_seq_val_flow();
7300  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7301  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7302  m_doc_empty = false;
7303  _line_progressed(1);
7304  }
7305  else if(rem.begins_with('{'))
7306  {
7307  _c4dbgp("it's a map");
7308  m_evt_handler->check_trailing_doc_token();
7309  _maybe_begin_doc();
7310  m_evt_handler->begin_map_val_flow();
7311  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7312  m_doc_empty = false;
7313  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7314  _line_progressed(1);
7315  }
7316  else if(_handle_bom())
7317  {
7318  _c4dbgp("byte order mark");
7319  }
7320  else
7321  {
7322  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7323  _maybe_skip_whitespace_tokens();
7324  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7325  if(!s.len)
7326  return;
7327  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7328  const char first = s.str[0];
7329  ScannedScalar sc;
7330  if(first == '"')
7331  {
7332  _c4dbgp("runk_json: scanning double-quoted scalar");
7333  m_evt_handler->check_trailing_doc_token();
7334  _maybe_begin_doc();
7335  add_flags(RDOC);
7336  m_doc_empty = false;
7337  sc = _scan_scalar_dquot();
7338  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7339  if(!_maybe_scan_following_colon())
7340  {
7341  _c4dbgp("runk_json: set as val");
7342  _handle_annotations_before_blck_val_scalar();
7343  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7344  }
7345  else
7346  {
7347  _c4err("parse error");
7348  }
7349  }
7350  else if(_scan_scalar_plain_unk(&sc))
7351  {
7352  _c4dbgp("runk_json: got a plain scalar");
7353  m_evt_handler->check_trailing_doc_token();
7354  _maybe_begin_doc();
7355  add_flags(RDOC);
7356  m_doc_empty = false;
7357  if(!_maybe_scan_following_colon())
7358  {
7359  _c4dbgp("runk_json: set as val");
7360  _handle_annotations_before_blck_val_scalar();
7361  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7362  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7363  }
7364  else
7365  {
7366  _c4err("parse error");
7367  }
7368  }
7369  else
7370  {
7371  _c4err("parse error");
7372  }
7373  }
7374 }
7375 
7376 
7377 //-----------------------------------------------------------------------------
7378 
7379 template<class EventHandler>
7380 void ParseEngine<EventHandler>::_handle_unk()
7381 {
7382  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7383 
7384  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7385  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7386 
7387  _maybe_skip_comment();
7388  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7389  if(!rem.len)
7390  return;
7391 
7392  size_t pos = rem.first_not_of(" \t");
7393  if(pos)
7394  {
7395  pos = pos != npos ? pos : rem.len;
7396  _c4dbgpf("skipping {} whitespace characters", pos);
7397  _line_progressed(pos);
7398  rem = m_evt_handler->m_curr->line_contents.rem;
7399  if(!rem.len)
7400  return;
7401  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7402  }
7403 
7404  if(m_evt_handler->m_curr->line_contents.indentation == 0u && (_at_line_begin() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7405  {
7406  _c4dbgpf("rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7407  _c4dbgp("check BOM");
7408  if(_handle_bom())
7409  {
7410  m_bom_line = m_evt_handler->m_curr->pos.line;
7411  _c4dbgpf("byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7412  return;
7413  }
7414  const char first = rem.str[0];
7415  if(first == '-')
7416  {
7417  _c4dbgp("rtop: suspecting doc");
7418  if(_is_doc_begin_token(rem))
7419  {
7420  _c4dbgp("rtop: begin doc");
7421  _maybe_end_doc();
7422  _begin2_doc_expl();
7423  _set_indentation(0);
7424  addrem_flags(RDOC|RUNK, NDOC);
7425  _line_progressed(3u);
7426  _maybe_skip_whitespace_tokens();
7427  return;
7428  }
7429  }
7430  else if(first == '.')
7431  {
7432  _c4dbgp("rtop: suspecting doc end");
7433  if(_is_doc_end_token(rem))
7434  {
7435  _c4dbgp("rtop: end doc");
7436  if(has_any(RDOC))
7437  {
7438  _end2_doc_expl();
7439  }
7440  else
7441  {
7442  _c4dbgp("rtop: ignore end doc");
7443  }
7444  addrem_flags(NDOC|RUNK, RDOC);
7445  _line_progressed(3u);
7446  _maybe_skip_whitespace_tokens();
7447  return;
7448  }
7449  }
7450  else if(first == '%')
7451  {
7452  _c4dbgpf("directive: {}", rem);
7453  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7454  _c4err("need document footer before directives");
7455  _handle_directive(rem);
7456  return;
7457  }
7458  }
7459 
7460  /* no else-if! */
7461  char first = rem.str[0];
7462 
7463  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7464  size_t remindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7465  if(m_bom_len)
7466  {
7467  _c4dbgpf("prev BOMlen={}", m_bom_len);
7468  if(m_evt_handler->m_curr->pos.line == m_bom_line)
7469  {
7470  _c4dbgpf("BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7471  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len);
7472  remindent -= m_bom_len;
7473  }
7474  else
7475  {
7476  m_bom_len = 0;
7477  }
7478  }
7479 
7480  if(first == '[')
7481  {
7482  m_evt_handler->check_trailing_doc_token();
7483  _maybe_begin_doc();
7484  m_doc_empty = false;
7485  if(C4_LIKELY( ! _annotations_require_key_container()))
7486  {
7487  _c4dbgp("it's a seq, flow");
7488  _handle_annotations_before_blck_val_scalar();
7489  m_evt_handler->begin_seq_val_flow();
7490  addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7491  _set_indentation(remindent);
7492  }
7493  else
7494  {
7495  _c4dbgp("start new block map, set flow seq as key (!)");
7496  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7497  m_evt_handler->begin_map_val_block();
7498  addrem_flags(RMAP|RBLCK|RKCL, RUNK|RTOP|RDOC);
7499  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7500  m_evt_handler->begin_seq_key_flow();
7501  addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKCL);
7502  _set_indentation(remindent);
7503  }
7504  _line_progressed(1);
7505  }
7506  else if(first == '{')
7507  {
7508  m_evt_handler->check_trailing_doc_token();
7509  _maybe_begin_doc();
7510  m_doc_empty = false;
7511  if(C4_LIKELY( ! _annotations_require_key_container()))
7512  {
7513  _c4dbgp("it's a map, flow");
7514  _handle_annotations_before_blck_val_scalar();
7515  m_evt_handler->begin_map_val_flow();
7516  addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7517  _set_indentation(remindent);
7518  }
7519  else
7520  {
7521  _c4dbgp("start new block map, set flow map as key (!)");
7522  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7523  m_evt_handler->begin_map_val_block();
7524  addrem_flags(RMAP|RBLCK|RKCL, RUNK|RTOP|RDOC);
7525  _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7526  m_evt_handler->begin_map_key_flow();
7527  addrem_flags(RMAP|RFLOW|RKEY, RBLCK|RKCL);
7528  _set_indentation(remindent);
7529  }
7530  _line_progressed(1);
7531  }
7532  else if(first == '-' && _is_blck_token(rem))
7533  {
7534  _c4dbgp("it's a seq, block");
7535  m_evt_handler->check_trailing_doc_token();
7536  _maybe_begin_doc();
7537  _handle_annotations_before_blck_val_scalar();
7538  m_evt_handler->begin_seq_val_block();
7539  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7540  m_doc_empty = false;
7541  _set_indentation(remindent);
7542  _line_progressed(1);
7543  _maybe_skip_whitespace_tokens();
7544  }
7545  else if(first == '?' && _is_blck_token(rem))
7546  {
7547  _c4dbgp("it's a map + this key is complex");
7548  m_evt_handler->check_trailing_doc_token();
7549  _maybe_begin_doc();
7550  _handle_annotations_before_blck_val_scalar();
7551  m_evt_handler->begin_map_val_block();
7552  addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7553  m_doc_empty = false;
7554  m_was_inside_qmrk = true;
7555  _set_indentation(remindent); //_save_indentation();
7556  _line_progressed(1);
7557  _maybe_skip_whitespace_tokens();
7558  }
7559  else if(first == ':' && _is_blck_token(rem))
7560  {
7561  if(m_doc_empty)
7562  {
7563  _c4dbgp("it's a map with an empty key");
7564  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7565  m_evt_handler->check_trailing_doc_token();
7566  _maybe_begin_doc();
7567  _handle_annotations_before_start_mapblck(startline);
7568  _handle_colon();
7569  m_evt_handler->begin_map_val_block();
7570  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7571  m_evt_handler->set_key_scalar_plain_empty();
7572  m_doc_empty = false;
7573  _set_indentation(startindent);
7574  }
7575  else
7576  {
7577  _c4dbgp("actually prev val is a key!");
7578  size_t prev_indentation = m_evt_handler->m_curr->indref;
7579  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7580  _set_indentation(prev_indentation);
7581  }
7582  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7583  _line_progressed(1);
7584  _maybe_skip_whitespace_tokens();
7585  }
7586  else if(first == '&')
7587  {
7588  csubstr anchor = _scan_anchor();
7589  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7590  m_evt_handler->check_trailing_doc_token();
7591  _maybe_begin_doc();
7592  const size_t line = m_evt_handler->m_curr->pos.line;
7593  _add_annotation(&m_pending_anchors, anchor, remindent, line);
7594  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7595  m_doc_empty = false;
7596  }
7597  else if(first == '*')
7598  {
7599  csubstr ref = _scan_ref_map();
7600  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7601  m_evt_handler->check_trailing_doc_token();
7602  _maybe_begin_doc();
7603  m_doc_empty = false;
7604  if(!_maybe_scan_following_colon())
7605  {
7606  _c4dbgp("runk: set val ref");
7607  _handle_annotations_before_blck_val_scalar();
7608  m_evt_handler->set_val_ref(ref);
7609  }
7610  else
7611  {
7612  _c4dbgp("runk: start new block map, set ref as key");
7613  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7614  _handle_annotations_before_start_mapblck(startline);
7615  m_evt_handler->begin_map_val_block();
7616  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7617  m_evt_handler->set_key_ref(ref);
7618  _maybe_skip_whitespace_tokens();
7619  _set_indentation(startindent);
7620  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7621  }
7622  }
7623  else if(first == '!')
7624  {
7625  csubstr tag = _scan_tag();
7626  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7627  // we need to buffer the tags, as there may be two
7628  // consecutive tags in here
7629  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7630  const size_t line = m_evt_handler->m_curr->pos.line;
7631  _add_annotation(&m_pending_tags, tag, indentation, line);
7632  }
7633  else
7634  {
7635  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7636  _maybe_skip_whitespace_tokens();
7637  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7638  if(!s.len)
7639  return;
7640  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7641  first = s.str[0];
7642  ScannedScalar sc;
7643  if(first == '\'')
7644  {
7645  _c4dbgp("runk: scanning single-quoted scalar");
7646  m_evt_handler->check_trailing_doc_token();
7647  _maybe_begin_doc();
7648  add_flags(RDOC);
7649  m_doc_empty = false;
7650  sc = _scan_scalar_squot();
7651  if(!_maybe_scan_following_colon())
7652  {
7653  _c4dbgp("runk: set as val");
7654  _handle_annotations_before_blck_val_scalar();
7655  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7656  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7657  }
7658  else
7659  {
7660  _c4dbgp("runk: start new block map, set scalar as key");
7661  _handle_annotations_before_start_mapblck(startline);
7662  _handle_colon();
7663  m_evt_handler->begin_map_val_block();
7664  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7665  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7666  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7667  _maybe_skip_whitespace_tokens();
7668  _set_indentation(startindent);
7669  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7670  }
7671  }
7672  else if(first == '"')
7673  {
7674  _c4dbgp("runk: scanning double-quoted scalar");
7675  m_evt_handler->check_trailing_doc_token();
7676  _maybe_begin_doc();
7677  add_flags(RDOC);
7678  m_doc_empty = false;
7679  sc = _scan_scalar_dquot();
7680  if(!_maybe_scan_following_colon())
7681  {
7682  _c4dbgp("runk: set as val");
7683  _handle_annotations_before_blck_val_scalar();
7684  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7685  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7686  }
7687  else
7688  {
7689  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7690  _handle_annotations_before_start_mapblck(startline);
7691  m_evt_handler->begin_map_val_block();
7692  _handle_colon();
7693  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7694  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7695  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7696  _maybe_skip_whitespace_tokens();
7697  _set_indentation(startindent);
7698  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7699  }
7700  }
7701  else if(first == '|')
7702  {
7703  _c4dbgp("runk: scanning block-literal scalar");
7704  m_evt_handler->check_trailing_doc_token();
7705  _maybe_begin_doc();
7706  add_flags(RDOC);
7707  m_doc_empty = false;
7708  ScannedBlock sb;
7709  _scan_block(&sb, startindent);
7710  if(C4_LIKELY(!_maybe_scan_following_colon()))
7711  {
7712  _c4dbgp("runk: set as val");
7713  _handle_annotations_before_blck_val_scalar();
7714  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7715  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7716  }
7717  else
7718  {
7719  _c4err("block literal keys must be enclosed in '?'");
7720  }
7721  }
7722  else if(first == '>')
7723  {
7724  _c4dbgp("runk: scanning block-folded scalar");
7725  m_evt_handler->check_trailing_doc_token();
7726  _maybe_begin_doc();
7727  add_flags(RDOC);
7728  m_doc_empty = false;
7729  ScannedBlock sb;
7730  _scan_block(&sb, startindent);
7731  if(C4_LIKELY(!_maybe_scan_following_colon()))
7732  {
7733  _c4dbgp("runk: set as val");
7734  _handle_annotations_before_blck_val_scalar();
7735  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7736  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7737  }
7738  else
7739  {
7740  _c4err("block folded keys must be enclosed in '?'");
7741  }
7742  }
7743  else if(_scan_scalar_plain_unk(&sc))
7744  {
7745  _c4dbgp("runk: got a plain scalar");
7746  m_evt_handler->check_trailing_doc_token();
7747  _maybe_begin_doc();
7748  add_flags(RDOC);
7749  m_doc_empty = false;
7750  if(!_maybe_scan_following_colon())
7751  {
7752  _c4dbgp("runk: set as val");
7753  _handle_annotations_before_blck_val_scalar();
7754  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7755  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7756  }
7757  else
7758  {
7759  _c4dbgp("runk: start new block map, set scalar as key");
7760  _handle_annotations_before_start_mapblck(startline);
7761  _handle_colon();
7762  m_evt_handler->begin_map_val_block();
7763  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7764  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7765  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7766  _maybe_skip_whitespace_tokens();
7767  _set_indentation(startindent);
7768  addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
7769  }
7770  }
7771  }
7772 }
7773 
7774 
7775 //-----------------------------------------------------------------------------
7776 
7777 template<class EventHandler>
7778 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7779 {
7780  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7781 
7782  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW));
7783 
7784  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7785  if(has_any(RNXT))
7786  {
7787  _c4dbgp("usty[RNXT]: finishing!");
7788  _end_stream();
7789  }
7790  #endif
7791 
7792  _maybe_skip_comment();
7793  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7794  if(!rem.len)
7795  return;
7796 
7797  size_t pos = rem.first_not_of(" \t");
7798  if(pos)
7799  {
7800  pos = pos != npos ? pos : rem.len;
7801  _c4dbgpf("skipping indentation of {}", pos);
7802  _line_progressed(pos);
7803  rem = m_evt_handler->m_curr->line_contents.rem;
7804  if(!rem.len)
7805  return;
7806  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7807  }
7808 
7809  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7810  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7811  char first = rem.str[0];
7812  if(has_any(RSEQ)) // destination is a sequence
7813  {
7814  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7815  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7816  if(first == '[')
7817  {
7818  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7819  add_flags(RNXT);
7820  m_evt_handler->_push();
7821  addrem_flags(RFLOW|RVAL, RNXT|USTY);
7822  _set_indentation(startindent);
7823  _line_progressed(1);
7824  _maybe_skip_whitespace_tokens();
7825  }
7826  else if(first == '-' && _is_blck_token(rem))
7827  {
7828  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7829  add_flags(RNXT);
7830  m_evt_handler->_push();
7831  addrem_flags(RBLCK|RVAL, RNXT|USTY);
7832  _set_indentation(startindent);
7833  _line_progressed(1);
7834  _maybe_skip_whitespace_tokens();
7835  }
7836  else
7837  {
7838  _c4err("can only parse a seq into an existing seq");
7839  }
7840  }
7841  else if(has_any(RMAP)) // destination is a map
7842  {
7843  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7844  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7845  if(first == '{')
7846  {
7847  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7848  add_flags(RNXT);
7849  _handle_annotations_before_blck_val_scalar();
7850  m_evt_handler->_push();
7851  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
7852  _set_indentation(startindent);
7853  _line_progressed(1);
7854  _maybe_skip_whitespace_tokens();
7855  }
7856  else if(first == '?' && _is_blck_token(rem))
7857  {
7858  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7859  add_flags(RNXT);
7860  _handle_annotations_before_blck_val_scalar();
7861  m_evt_handler->_push();
7862  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
7863  m_was_inside_qmrk = true;
7864  _save_indentation();
7865  _line_progressed(1);
7866  _maybe_skip_whitespace_tokens();
7867  }
7868  else if(first == ':' && _is_blck_token(rem))
7869  {
7870  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7871  add_flags(RNXT);
7872  _handle_annotations_before_blck_val_scalar();
7873  m_evt_handler->_push();
7874  m_evt_handler->set_key_scalar_plain_empty();
7875  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
7876  _save_indentation();
7877  _line_progressed(1);
7878  _maybe_skip_whitespace_tokens();
7879  }
7880  else if(rem.begins_with('&'))
7881  {
7882  csubstr anchor = _scan_anchor();
7883  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7884  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7885  const size_t line = m_evt_handler->m_curr->pos.line;
7886  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7887  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7888  }
7889  else if(first == '*')
7890  {
7891  csubstr ref = _scan_ref_map();
7892  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7893  if(!_maybe_scan_following_colon())
7894  {
7895  _c4err("cannot read a VAL to a map");
7896  }
7897  else
7898  {
7899  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7900  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7901  add_flags(RNXT);
7902  _handle_annotations_before_start_mapblck(startline);
7903  m_evt_handler->_push();
7904  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7905  m_evt_handler->set_key_ref(ref);
7906  _maybe_skip_whitespace_tokens();
7907  _set_indentation(startindent);
7908  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
7909  }
7910  }
7911  else if(first == '!')
7912  {
7913  csubstr tag = _scan_tag();
7914  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7915  // we need to buffer the tags, as there may be two
7916  // consecutive tags in here
7917  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7918  const size_t line = m_evt_handler->m_curr->pos.line;
7919  _add_annotation(&m_pending_tags, tag, indentation, line);
7920  }
7921  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7922  {
7923  _c4err("cannot parse a seq into an existing map");
7924  }
7925  else
7926  {
7927  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7928  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7929  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7930  ScannedScalar sc;
7931  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7932  if(first == '\'')
7933  {
7934  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7935  sc = _scan_scalar_squot();
7936  if(!_maybe_scan_following_colon())
7937  {
7938  _c4err("cannot read a VAL to a map");
7939  }
7940  else
7941  {
7942  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7943  add_flags(RNXT);
7944  _handle_annotations_before_start_mapblck(startline);
7945  m_evt_handler->_push();
7946  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7947  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7948  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7949  _set_indentation(startindent);
7950  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
7951  _maybe_skip_whitespace_tokens();
7952  }
7953  }
7954  else if(first == '"')
7955  {
7956  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
7957  sc = _scan_scalar_dquot();
7958  if(!_maybe_scan_following_colon())
7959  {
7960  _c4err("cannot read a VAL to a map");
7961  }
7962  else
7963  {
7964  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
7965  add_flags(RNXT);
7966  _handle_annotations_before_start_mapblck(startline);
7967  m_evt_handler->_push();
7968  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7969  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7970  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7971  _set_indentation(startindent);
7972  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
7973  _maybe_skip_whitespace_tokens();
7974  }
7975  }
7976  else if(first == '|')
7977  {
7978  _c4err("block literal keys must be enclosed in '?'");
7979  }
7980  else if(first == '>')
7981  {
7982  _c4err("block literal keys must be enclosed in '?'");
7983  }
7984  else if(_scan_scalar_plain_unk(&sc))
7985  {
7986  _c4dbgp("usty[RMAP]: got a plain scalar");
7987  if(!_maybe_scan_following_colon())
7988  {
7989  _c4err("cannot read a VAL to a map");
7990  }
7991  else
7992  {
7993  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7994  add_flags(RNXT);
7995  _handle_annotations_before_start_mapblck(startline);
7996  m_evt_handler->_push();
7997  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7998  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7999  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8000  _set_indentation(startindent);
8001  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8002  _maybe_skip_whitespace_tokens();
8003  }
8004  }
8005  else
8006  {
8007  _c4err("parse error");
8008  }
8009  }
8010  }
8011  else // destination is unknown
8012  {
8013  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
8014  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8015  if(first == '[')
8016  {
8017  _c4dbgp("usty[UNK]: it's a flow seq");
8018  add_flags(RNXT);
8019  _handle_annotations_before_blck_val_scalar();
8020  m_evt_handler->begin_seq_val_flow();
8021  addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8022  _set_indentation(startindent);
8023  _line_progressed(1);
8024  _maybe_skip_whitespace_tokens();
8025  }
8026  else if(first == '-' && _is_blck_token(rem))
8027  {
8028  _c4dbgp("usty[UNK]: it's a block seq");
8029  add_flags(RNXT);
8030  _handle_annotations_before_blck_val_scalar();
8031  m_evt_handler->begin_seq_val_block();
8032  addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8033  _set_indentation(startindent);
8034  _line_progressed(1);
8035  _maybe_skip_whitespace_tokens();
8036  }
8037  else if(first == '{')
8038  {
8039  _c4dbgp("usty[UNK]: it's a flow map");
8040  add_flags(RNXT);
8041  _handle_annotations_before_blck_val_scalar();
8042  m_evt_handler->begin_map_val_flow();
8043  addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8044  _set_indentation(startindent);
8045  _line_progressed(1);
8046  _maybe_skip_whitespace_tokens();
8047  }
8048  else if(first == '?' && _is_blck_token(rem))
8049  {
8050  _c4dbgp("usty[UNK]: it's a map + this key is complex");
8051  add_flags(RNXT);
8052  _handle_annotations_before_blck_val_scalar();
8053  m_evt_handler->begin_map_val_block();
8054  addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8055  m_was_inside_qmrk = true;
8056  _save_indentation();
8057  _line_progressed(1);
8058  _maybe_skip_whitespace_tokens();
8059  }
8060  else if(first == ':' && _is_blck_token(rem))
8061  {
8062  _c4dbgp("usty[UNK]: it's a map with an empty key");
8063  add_flags(RNXT);
8064  _handle_annotations_before_blck_val_scalar();
8065  m_evt_handler->begin_map_val_block();
8066  m_evt_handler->set_key_scalar_plain_empty();
8067  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8068  _save_indentation();
8069  _line_progressed(1);
8070  _maybe_skip_whitespace_tokens();
8071  }
8072  else if(first == '&')
8073  {
8074  csubstr anchor = _scan_anchor();
8075  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8076  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8077  const size_t line = m_evt_handler->m_curr->pos.line;
8078  _add_annotation(&m_pending_anchors, anchor, indentation, line);
8079  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8080  }
8081  else if(first == '*')
8082  {
8083  csubstr ref = _scan_ref_map();
8084  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8085  if(!_maybe_scan_following_colon())
8086  {
8087  _c4dbgp("usty[UNK]: set val ref");
8088  _handle_annotations_before_blck_val_scalar();
8089  m_evt_handler->set_val_ref(ref);
8090  }
8091  else
8092  {
8093  _c4dbgp("usty[UNK]: start new block map, set ref as key");
8094  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8095  add_flags(RNXT);
8096  _handle_annotations_before_start_mapblck(startline);
8097  m_evt_handler->begin_map_val_block();
8098  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8099  m_evt_handler->set_key_ref(ref);
8100  _maybe_skip_whitespace_tokens();
8101  _set_indentation(startindent);
8102  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8103  }
8104  }
8105  else if(first == '!')
8106  {
8107  csubstr tag = _scan_tag();
8108  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8109  // we need to buffer the tags, as there may be two
8110  // consecutive tags in here
8111  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8112  const size_t line = m_evt_handler->m_curr->pos.line;
8113  _add_annotation(&m_pending_tags, tag, indentation, line);
8114  }
8115  else
8116  {
8117  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
8118  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8119  const size_t startline = m_evt_handler->m_curr->pos.line; // save
8120  first = rem.str[0];
8121  ScannedScalar sc;
8122  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8123  if(first == '\'')
8124  {
8125  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8126  sc = _scan_scalar_squot();
8127  if(!_maybe_scan_following_colon())
8128  {
8129  _c4dbgp("usty[UNK]: set as val");
8130  _handle_annotations_before_blck_val_scalar();
8131  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8132  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8133  _end_stream();
8134  }
8135  else
8136  {
8137  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8138  add_flags(RNXT);
8139  _handle_annotations_before_start_mapblck(startline);
8140  m_evt_handler->begin_map_val_block();
8141  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8142  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8143  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8144  _set_indentation(startindent);
8145  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8146  _maybe_skip_whitespace_tokens();
8147  }
8148  }
8149  else if(first == '"')
8150  {
8151  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8152  sc = _scan_scalar_dquot();
8153  if(!_maybe_scan_following_colon())
8154  {
8155  _c4dbgp("usty[UNK]: set as val");
8156  _handle_annotations_before_blck_val_scalar();
8157  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8158  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8159  _end_stream();
8160  }
8161  else
8162  {
8163  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8164  add_flags(RNXT);
8165  _handle_annotations_before_start_mapblck(startline);
8166  m_evt_handler->begin_map_val_block();
8167  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8168  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8169  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8170  _set_indentation(startindent);
8171  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8172  _maybe_skip_whitespace_tokens();
8173  }
8174  }
8175  else if(first == '|')
8176  {
8177  _c4dbgp("usty[UNK]: scanning block-literal scalar");
8178  ScannedBlock sb;
8179  _scan_block(&sb, startindent);
8180  _c4dbgp("usty[UNK]: set as val");
8181  _handle_annotations_before_blck_val_scalar();
8182  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8183  m_evt_handler->set_val_scalar_literal(maybe_filtered);
8184  _end_stream();
8185  }
8186  else if(first == '>')
8187  {
8188  _c4dbgp("usty[UNK]: scanning block-folded scalar");
8189  ScannedBlock sb;
8190  _scan_block(&sb, startindent);
8191  _c4dbgp("usty[UNK]: set as val");
8192  _handle_annotations_before_blck_val_scalar();
8193  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8194  m_evt_handler->set_val_scalar_folded(maybe_filtered);
8195  _end_stream();
8196  }
8197  else if(_scan_scalar_plain_unk(&sc))
8198  {
8199  _c4dbgp("usty[UNK]: got a plain scalar");
8200  if(!_maybe_scan_following_colon())
8201  {
8202  _c4dbgp("usty[UNK]: set as val");
8203  _handle_annotations_before_blck_val_scalar();
8204  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8205  m_evt_handler->set_val_scalar_plain(maybe_filtered);
8206  _end_stream();
8207  }
8208  else
8209  {
8210  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8211  add_flags(RNXT);
8212  _handle_annotations_before_start_mapblck(startline);
8213  m_evt_handler->begin_map_val_block();
8214  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8215  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8216  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8217  _set_indentation(startindent);
8218  addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8219  _maybe_skip_whitespace_tokens();
8220  }
8221  }
8222  else
8223  {
8224  _c4err("parse error");
8225  }
8226  }
8227  }
8228 }
8229 
8230 
8231 //-----------------------------------------------------------------------------
8232 
8233 template<class EventHandler>
8234 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8235 {
8236  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8237  m_file = filename;
8238  m_buf = src;
8239  _reset();
8240  m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena, this);
8241  m_evt_handler->begin_stream();
8242  while( ! _finished_file())
8243  {
8244  _scan_line();
8245  while( ! _finished_line())
8246  {
8247  _c4dbgnextline();
8248  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8249  if(has_any(RSEQ))
8250  {
8251  _handle_seq_json();
8252  }
8253  else if(has_any(RMAP))
8254  {
8255  _handle_map_json();
8256  }
8257  else if(has_any(RUNK))
8258  {
8259  _handle_unk_json();
8260  }
8261  else
8262  {
8263  _c4err("internal error");
8264  }
8265  }
8266  if(_finished_file())
8267  break; // it may have finished because of multiline blocks
8268  _line_ended();
8269  }
8270  _end_stream();
8271  m_evt_handler->finish_parse();
8272 }
8273 
8274 
8275 //-----------------------------------------------------------------------------
8276 
8277 template<class EventHandler>
8278 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8279 {
8280  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8281  m_file = filename;
8282  m_buf = src;
8283  _reset();
8284  m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena, this);
8285  m_evt_handler->begin_stream();
8286  while( ! _finished_file())
8287  {
8288  _scan_line();
8289  while( ! _finished_line())
8290  {
8291  _c4dbgnextline();
8292  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8293  if(has_any(RFLOW))
8294  {
8295  if(has_none(RSEQIMAP))
8296  {
8297  if(has_any(RSEQ))
8298  {
8299  _handle_seq_flow();
8300  }
8301  else
8302  {
8303  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8304  _handle_map_flow();
8305  }
8306  }
8307  else
8308  {
8309  _handle_seq_imap();
8310  }
8311  }
8312  else if(has_any(RBLCK))
8313  {
8314  if(has_any(RSEQ))
8315  {
8316  _handle_seq_block();
8317  }
8318  else
8319  {
8320  _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8321  _handle_map_block();
8322  }
8323  }
8324  else if(has_any(RUNK))
8325  {
8326  _handle_unk();
8327  }
8328  else if(has_any(USTY))
8329  {
8330  _handle_usty();
8331  }
8332  else
8333  {
8334  _c4err("internal error");
8335  }
8336  }
8337  if(_finished_file())
8338  break; // it may have finished because of multiline blocks
8339  _line_ended();
8340  }
8341  _end_stream();
8342  m_evt_handler->finish_parse();
8343 }
8344 /** @endcond */
8345 
8346 } // namespace yml
8347 } // namespace c4
8348 
8349 // NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
8350 
8351 #undef _c4dbgnextline
8352 
8353 #if defined(_MSC_VER)
8354 # pragma warning(pop)
8355 #elif defined(__clang__)
8356 # pragma clang diagnostic pop
8357 #elif defined(__GNUC__)
8358 # pragma GCC diagnostic pop
8359 #endif
8360 
8361 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:28
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1546
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition: common.cpp:210
enum c4::yml::BlockChomp_ BlockChomp_e
@ CHOMP_CLIP
single newline at end (default)
@ CHOMP_KEEP
all newlines from end (+)
@ CHOMP_STRIP
no newline at end (-)
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:889
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
Definition: common.hpp:258
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
@ RFLOW
reading is inside explicit flow chars: [] or {}
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with x
@ UTF16BE
UTF16, Big-Endian.
Definition: common.hpp:266
@ UTF8
UTF8.
Definition: common.hpp:264
@ UTF16LE
UTF16, Little-Endian.
Definition: common.hpp:265
@ NOBOM
No Byte Order Mark was found.
Definition: common.hpp:263
@ UTF32BE
UTF32, Big-Endian.
Definition: common.hpp:268
@ UTF32LE
UTF32, Little-Endian.
Definition: common.hpp:267
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _c4err(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
Definition: common.hpp:347
utilities for UTF and Byte Order Mark