rapidyaml  0.7.1
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 #include <c4/dump.hpp>
9 
10 #include <ctype.h>
11 
12 #include "c4/yml/detail/parser_dbg.hpp"
14 #ifdef RYML_DBG
15 #include "c4/yml/detail/print.hpp"
16 #endif
17 
18 
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
23 #else
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
27 #endif
28 
29 
30 // scaffold:
31 #define _c4dbgnextline() \
32  do { \
33  _c4dbgq("\n-----------"); \
34  _c4dbgt("handling line={}, offset={}B", \
35  m_evt_handler->m_curr->pos.line, \
36  m_evt_handler->m_curr->pos.offset); \
37  } while(0)
38 
39 
40 #if defined(_MSC_VER)
41 # pragma warning(push)
42 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
43 # pragma warning(disable: 4702/*unreachable code*/)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
54 # if __GNUC__ >= 7
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
56 # endif
57 #endif
58 
59 namespace c4 {
60 namespace yml {
61 
62 namespace {
63 
64 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
65 {
66  RYML_ASSERT(s.len > 0);
67  RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
68  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
69 }
70 
71 inline bool _is_doc_begin_token(csubstr s)
72 {
73  RYML_ASSERT(s.begins_with('-'));
74  RYML_ASSERT(!s.ends_with("\n"));
75  RYML_ASSERT(!s.ends_with("\r"));
76  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
77  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
78 }
79 
80 inline bool _is_doc_end_token(csubstr s)
81 {
82  RYML_ASSERT(s.begins_with('.'));
83  RYML_ASSERT(!s.ends_with("\n"));
84  RYML_ASSERT(!s.ends_with("\r"));
85  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
86  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
87 }
88 
89 inline bool _is_doc_token(csubstr s) noexcept
90 {
91  //
92  // NOTE: this function was failing under some scenarios when
93  // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
94  // related to optimizer assumptions on the input string and
95  // possibly caused from UB around assignment to that string (the
96  // call site was in _scan_block()). For more details see:
97  //
98  // https://github.com/biojppm/rapidyaml/issues/440
99  //
100  // The current version does not suffer this problem, but it may
101  // appear again.
102  //
103  if(s.len >= 3)
104  {
105  switch(s.str[0])
106  {
107  case '-':
108  //return _is_doc_begin_token(s); // this was failing with gcc -O2
109  return (s.str[1] == '-' && s.str[2] == '-')
110  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
111  case '.':
112  //return _is_doc_end_token(s); // this was failing with gcc -O2
113  return (s.str[1] == '.' && s.str[2] == '.')
114  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
115  }
116  }
117  return false;
118 }
119 
120 inline size_t _is_special_json_scalar(csubstr s)
121 {
122  RYML_ASSERT(s.len);
123  switch(s.str[0])
124  {
125  case 'f':
126  if(s.len >= 5 && s.begins_with("false"))
127  return 5u;
128  break;
129  case 't':
130  if(s.len >= 4 && s.begins_with("true"))
131  return 4u;
132  break;
133  case 'n':
134  if(s.len >= 4 && s.begins_with("null"))
135  return 4u;
136  break;
137  }
138  return 0u;
139 }
140 
141 
142 //-----------------------------------------------------------------------------
143 
144 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
145 {
146  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
147 }
148 
149 //! look for the next newline chars, and jump to the right of those
150 inline substr from_next_line(substr rem)
151 {
152  size_t nlpos = rem.first_of("\r\n");
153  if(nlpos == csubstr::npos)
154  return {};
155  const char nl = rem[nlpos];
156  rem = rem.right_of(nlpos);
157  if(rem.empty())
158  return {};
159  if(_extend_from_combined_newline(nl, rem.front()))
160  rem = rem.sub(1);
161  return rem;
162 }
163 
164 
165 //-----------------------------------------------------------------------------
166 
167 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
168 {
169  RYML_ASSERT(r[*i] == '\n');
170  size_t numnl_following = 0;
171  ++(*i);
172  for( ; *i < r.len; ++(*i))
173  {
174  if(r.str[*i] == '\n')
175  ++numnl_following;
176  // skip leading whitespace
177  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
178  ;
179  else
180  break;
181  }
182  return numnl_following;
183 }
184 
185 /** @p i is set to the first non whitespace character after the line
186  * @return the number of empty lines after the initial position */
187 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
188 {
189  RYML_ASSERT(r[*i] == '\n');
190  size_t numnl_following = 0;
191  ++(*i);
192  if(indentation == 0)
193  {
194  for( ; *i < r.len; ++(*i))
195  {
196  if(r.str[*i] == '\n')
197  ++numnl_following;
198  // skip leading whitespace
199  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
200  ;
201  else
202  break;
203  }
204  }
205  else
206  {
207  for( ; *i < r.len; ++(*i))
208  {
209  if(r.str[*i] == '\n')
210  {
211  ++numnl_following;
212  // skip the indentation after the newline
213  size_t stop = *i + indentation;
214  for( ; *i < r.len; ++(*i))
215  {
216  if(r.str[*i] != ' ' && r.str[*i] != '\r')
217  break;
218  RYML_ASSERT(*i < stop);
219  }
220  C4_UNUSED(stop);
221  }
222  // skip leading whitespace
223  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
224  ;
225  else
226  break;
227  }
228  }
229  return numnl_following;
230 }
231 
232 } // anon namespace
233 
234 
235 //-----------------------------------------------------------------------------
236 //-----------------------------------------------------------------------------
237 //-----------------------------------------------------------------------------
238 
239 template<class EventHandler>
241 {
242  _free();
243  _clr();
244 }
245 
246 template<class EventHandler>
248  : m_options(opts)
249  , m_file()
250  , m_buf()
251  , m_evt_handler(evt_handler)
252  , m_pending_anchors()
253  , m_pending_tags()
254  , m_newline_offsets()
255  , m_newline_offsets_size(0)
256  , m_newline_offsets_capacity(0)
257  , m_newline_offsets_buf()
258 {
259  RYML_CHECK(evt_handler);
260 }
261 
262 template<class EventHandler>
264  : m_options(that.m_options)
265  , m_file(that.m_file)
266  , m_buf(that.m_buf)
267  , m_evt_handler(that.m_evt_handler)
268  , m_pending_anchors(that.m_pending_anchors)
269  , m_pending_tags(that.m_pending_tags)
270  , m_newline_offsets(that.m_newline_offsets)
271  , m_newline_offsets_size(that.m_newline_offsets_size)
272  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
273  , m_newline_offsets_buf(that.m_newline_offsets_buf)
274 {
275  that._clr();
276 }
277 
278 template<class EventHandler>
280  : m_options(that.m_options)
281  , m_file(that.m_file)
282  , m_buf(that.m_buf)
283  , m_evt_handler(that.m_evt_handler)
284  , m_pending_anchors(that.m_pending_anchors)
285  , m_pending_tags(that.m_pending_tags)
286  , m_newline_offsets()
287  , m_newline_offsets_size()
288  , m_newline_offsets_capacity()
289  , m_newline_offsets_buf()
290 {
291  if(that.m_newline_offsets_capacity)
292  {
293  _resize_locations(that.m_newline_offsets_capacity);
294  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
295  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
296  m_newline_offsets_size = that.m_newline_offsets_size;
297  }
298 }
299 
300 template<class EventHandler>
302 {
303  _free();
304  m_options = (that.m_options);
305  m_file = (that.m_file);
306  m_buf = (that.m_buf);
307  m_evt_handler = that.m_evt_handler;
308  m_pending_anchors = that.m_pending_anchors;
309  m_pending_tags = that.m_pending_tags;
310  m_newline_offsets = (that.m_newline_offsets);
311  m_newline_offsets_size = (that.m_newline_offsets_size);
312  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
313  m_newline_offsets_buf = (that.m_newline_offsets_buf);
314  that._clr();
315  return *this;
316 }
317 
318 template<class EventHandler>
320 {
321  _free();
322  m_options = (that.m_options);
323  m_file = (that.m_file);
324  m_buf = (that.m_buf);
325  m_evt_handler = that.m_evt_handler;
326  m_pending_anchors = that.m_pending_anchors;
327  m_pending_tags = that.m_pending_tags;
328  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
329  _resize_locations(that.m_newline_offsets_capacity);
330  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
331  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
332  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
333  m_newline_offsets_size = that.m_newline_offsets_size;
334  m_newline_offsets_buf = that.m_newline_offsets_buf;
335  return *this;
336 }
337 
338 template<class EventHandler>
340 {
341  m_options = {};
342  m_file = {};
343  m_buf = {};
344  m_evt_handler = {};
345  m_pending_anchors = {};
346  m_pending_tags = {};
347  m_newline_offsets = {};
348  m_newline_offsets_size = {};
349  m_newline_offsets_capacity = {};
350  m_newline_offsets_buf = {};
351 }
352 
353 template<class EventHandler>
354 void ParseEngine<EventHandler>::_free()
355 {
356  if(m_newline_offsets)
357  {
358  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
359  m_newline_offsets = nullptr;
360  m_newline_offsets_size = 0u;
361  m_newline_offsets_capacity = 0u;
362  m_newline_offsets_buf = 0u;
363  }
364 }
365 
366 
367 //-----------------------------------------------------------------------------
368 
369 template<class EventHandler>
370 void ParseEngine<EventHandler>::_reset()
371 {
372  m_pending_anchors = {};
373  m_pending_tags = {};
374  if(m_options.locations())
375  {
376  _prepare_locations();
377  }
378  m_was_inside_qmrk = false;
379 }
380 
381 
382 //-----------------------------------------------------------------------------
383 
384 template<class EventHandler>
385 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
386 {
387  #define _ryml_relocate(s) \
388  if(s.is_sub(prev_arena)) \
389  { \
390  s.str = next_arena.str + (s.str - prev_arena.str); \
391  }
392  _ryml_relocate(m_buf);
393  _ryml_relocate(m_newline_offsets_buf);
394  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
395  _ryml_relocate(m_pending_tags.annotations[i].str);
396  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
397  _ryml_relocate(m_pending_anchors.annotations[i].str);
398  #undef _ryml_relocate
399 }
400 
401 template<class EventHandler>
402 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
403 {
404  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
405 }
406 
407 
408 //-----------------------------------------------------------------------------
409 
410 template<class EventHandler>
411 template<class DumpFn>
412 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
413 {
414  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
415  auto const& lc = st->line_contents;
416  csubstr contents = lc.stripped;
417  if(contents.len)
418  {
419  // print the yaml src line
420  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
421  if(m_file.len)
422  {
423  detail::_dump(dumpfn, "{}:", m_file);
424  offs += m_file.len + 1;
425  }
426  detail::_dump(dumpfn, "{}:{}: ", st->pos.line, st->pos.col);
427  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
428  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
429  detail::_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
430  // highlight the remaining portion of the previous line
431  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
432  size_t lastcol = firstcol + lc.rem.len;
433  for(size_t i = 0; i < offs + firstcol; ++i)
434  dumpfn(" ");
435  dumpfn("^");
436  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
437  dumpfn("~");
438  detail::_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
439  }
440  else
441  {
442  dumpfn("\n");
443  }
444 
445 #ifdef RYML_DBG
446  // next line: print the state flags
447  {
448  char flagbuf_[128];
449  detail::_dump(dumpfn, "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
450  }
451 #endif
452 }
453 
454 
455 //-----------------------------------------------------------------------------
456 
457 template<class EventHandler>
458 template<class ...Args>
459 void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
460 {
461  char errmsg[RYML_ERRMSG_SIZE];
462  detail::_SubstrWriter writer(errmsg);
463  auto dumpfn = [&writer](csubstr s){ writer.append(s); };
464  detail::_dump(dumpfn, fmt, args...);
465  writer.append('\n');
466  _fmt_msg(dumpfn);
467  size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
468  m_evt_handler->cancel_parse();
469  m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
470 }
471 
472 
473 //-----------------------------------------------------------------------------
474 #ifdef RYML_DBG
475 template<class EventHandler>
476 template<class ...Args>
477 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
478 {
479  if(_dbg_enabled())
480  {
481  auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
482  detail::_dump(dumpfn, fmt, args...);
483  dumpfn("\n");
484  _fmt_msg(dumpfn);
485  }
486 }
487 #endif
488 
489 
490 //-----------------------------------------------------------------------------
491 template<class EventHandler>
492 bool ParseEngine<EventHandler>::_finished_file() const
493 {
494  bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
495  if(ret)
496  {
497  _c4dbgp("finished file!!!");
498  }
499  return ret;
500 }
501 
502 template<class EventHandler>
503 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
504 {
505  return m_evt_handler->m_curr->line_contents.rem.empty();
506 }
507 
508 
509 //-----------------------------------------------------------------------------
510 
511 template<class EventHandler>
512 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
513 {
514  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
515  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
516  {
517  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
518  if(pos == npos)
519  pos = rem.len; // maybe the line is just all whitespace
520  _c4dbgpf("skip {} whitespace characters", pos);
521  _line_progressed(pos);
522  }
523 }
524 
525 template<class EventHandler>
526 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
527 {
528  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
529  if(rem.len && rem.str[0] == c)
530  {
531  size_t pos = rem.first_not_of(c);
532  if(pos == npos)
533  pos = rem.len; // maybe the line is just all c
534  _c4dbgpf("skip {}x'{}'", pos, c);
535  _line_progressed(pos);
536  }
537 }
538 
539 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
540 template<class EventHandler>
541 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
542 {
543  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
544  if(rem.len && rem.str[0] == c)
545  {
546  size_t pos = rem.first_not_of(c);
547  if(pos == npos)
548  pos = rem.len; // maybe the line is just all c
549  if(pos > max_to_skip)
550  pos = max_to_skip;
551  _c4dbgpf("skip {}x'{}'", pos, c);
552  _line_progressed(pos);
553  }
554 }
555 #endif
556 
557 template<class EventHandler>
558 template<size_t N>
559 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
560 {
561  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
562  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
563  if(pos == npos)
564  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
565  _c4dbgpf("skip {} characters", pos);
566  _line_progressed(pos);
567 }
568 
569 template<class EventHandler>
570 void ParseEngine<EventHandler>::_skip_comment()
571 {
572  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
573  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
574  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
575  csubstr full = m_evt_handler->m_curr->line_contents.full;
576  // raise an error if the comment is not preceded by whitespace
577  if(!full.begins_with('#'))
578  {
579  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
580  const char c = full[(size_t)(rem.str - full.str - 1)];
581  if(C4_UNLIKELY(c != ' ' && c != '\t'))
582  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
583  }
584  else
585  {
586  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
587  }
588  _c4dbgpf("comment was '{}'", rem);
589  _line_progressed(rem.len);
590 }
591 
592 template<class EventHandler>
593 void ParseEngine<EventHandler>::_maybe_skip_comment()
594 {
595  csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
596  if(s.begins_with('#'))
597  {
598  _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
599  _skip_comment();
600  }
601 }
602 
603 template<class EventHandler>
604 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
605 {
606  if(m_evt_handler->m_curr->line_contents.rem.len)
607  {
608  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
609  {
610  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
611  if(pos == npos)
612  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
613  _c4dbgpf("skip {}x'{}'", pos, ' ');
614  _line_progressed(pos);
615  }
616  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
617  {
618  _c4dbgp("found ':' colon next");
619  _line_progressed(1);
620  return true;
621  }
622  }
623  return false;
624 }
625 
626 template<class EventHandler>
627 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
628 {
629  if(m_evt_handler->m_curr->line_contents.rem.len)
630  {
631  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
632  {
633  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
634  if(pos == npos)
635  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
636  _c4dbgpf("skip {}x'{}'", pos, ' ');
637  _line_progressed(pos);
638  }
639  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
640  {
641  _c4dbgp("found ',' comma next");
642  _line_progressed(1);
643  return true;
644  }
645  }
646  return false;
647 }
648 
649 
650 //-----------------------------------------------------------------------------
651 
652 template<class EventHandler>
653 csubstr ParseEngine<EventHandler>::_scan_anchor()
654 {
655  csubstr s = m_evt_handler->m_curr->line_contents.rem;
656  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
657  csubstr anchor = s.range(1, s.first_of(' '));
658  _line_progressed(1u + anchor.len);
659  _maybe_skipchars(' ');
660  return anchor;
661 }
662 
663 template<class EventHandler>
664 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
665 {
666  csubstr s = m_evt_handler->m_curr->line_contents.rem;
667  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
668  csubstr ref = s.first(s.first_of(",] :"));
669  _line_progressed(ref.len);
670  return ref;
671 }
672 
673 template<class EventHandler>
674 csubstr ParseEngine<EventHandler>::_scan_ref_map()
675 {
676  csubstr s = m_evt_handler->m_curr->line_contents.rem;
677  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
678  csubstr ref = s.first(s.first_of(",} "));
679  _line_progressed(ref.len);
680  return ref;
681 }
682 
683 template<class EventHandler>
684 csubstr ParseEngine<EventHandler>::_scan_tag()
685 {
686  csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
687  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
688  csubstr t;
689  if(rem.begins_with("!!"))
690  {
691  _c4dbgp("begins with '!!'");
692  if(has_any(FLOW))
693  t = rem.left_of(rem.first_of(" ,"));
694  else
695  t = rem.left_of(rem.first_of(' '));
696  }
697  else if(rem.begins_with("!<"))
698  {
699  _c4dbgp("begins with '!<'");
700  t = rem.left_of(rem.first_of('>'), true);
701  }
702  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
703  else if(rem.begins_with("!h!"))
704  {
705  _c4dbgp("begins with '!h!'");
706  t = rem.left_of(rem.first_of(' '));
707  }
708  #endif
709  else
710  {
711  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
712  _c4dbgp("begins with '!'");
713  if(has_any(FLOW))
714  t = rem.left_of(rem.first_of(" ,"));
715  else
716  t = rem.left_of(rem.first_of(' '));
717  }
718  _line_progressed(t.len);
719  _maybe_skip_whitespace_tokens();
720  return t;
721 }
722 
723 
724 //-----------------------------------------------------------------------------
725 
726 template<class EventHandler>
727 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
728 {
729  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
730 
731  // it's not a scalar if it starts with any of these characters:
732  switch(s.str[0])
733  {
734  // these are all legal tokens which mean no scalar is starting:
735  case '[':
736  case ']':
737  case '{':
738  case '}':
739  case '!':
740  case '&':
741  case '*':
742  case '|':
743  case '>':
744  case '#':
745  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
746  return false;
747  // '-' and ':' are illegal at the beginning if not followed by a scalar character
748  case '-':
749  case ':':
750  if(s.len > 1)
751  {
752  switch(s.str[1])
753  {
754  case '\n':
755  case '\r':
756  case '{':
757  case '[':
758  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
759  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
760  break;
761  case ' ':
762  case '}':
763  case ']':
764  if(s.str[0] == ':')
765  {
766  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
767  return false;
768  }
769  break;
770  default:
771  break;
772  }
773  }
774  else
775  {
776  return false;
777  }
778  break;
779  case '?':
780  if(s.len > 1)
781  {
782  switch(s.str[1])
783  {
784  case ' ':
785  case '\n':
786  case '\r':
787  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
788  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
789  return false;
790  case '{':
791  case '}':
792  case '[':
793  case ']':
794  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
795  break;
796  default:
797  break;
798  }
799  }
800  else
801  {
802  return false;
803  }
804  break;
805  // everything else is a legal starting character
806  default:
807  break;
808  }
809 
810  return true;
811 }
812 
813 template<class EventHandler>
814 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
815 {
816  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
817  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
818  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
819  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
820  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
821 
822  substr s = m_evt_handler->m_curr->line_contents.rem;
823  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
824  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
825 
826  if(!s.len)
827  return false;
828 
829  if(!_is_valid_start_scalar_plain_flow(s))
830  return false;
831 
832  _c4dbgp("scanning seqflow scalar...");
833 
834  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
835  bool needs_filter = false;
836  while(true)
837  {
838  _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
839  for(size_t i = 0; i < s.len; ++i)
840  {
841  const char c = s.str[i];
842  switch(c)
843  {
844  case ',':
845  _c4dbgpf("found terminating character at {}: '{}'", i, c);
846  _line_progressed(i);
847  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
848  {
849  goto ended_scalar;
850  }
851  else
852  {
853  _c4dbgp("at the beginning. no scalar here.");
854  return false;
855  }
856  break;
857  case ']':
858  _c4dbgpf("found terminating character at {}: '{}'", i, c);
859  _line_progressed(i);
860  goto ended_scalar;
861  break;
862  case '#':
863  _c4dbgp("found suspicious '#'");
864  if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
865  {
866  _c4dbgpf("found terminating character at {}: '{}'", i, c);
867  _line_progressed(i);
868  goto ended_scalar;
869  }
870  break;
871  case ':':
872  _c4dbgp("found suspicious ':'");
873  if(s.len > i+1)
874  {
875  const char next = s.str[i+1];
876  _c4dbgpf("next char is '{}'", _c4prc(next));
877  if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
878  {
879  _c4dbgp("map starting!");
880  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
881  {
882  _c4dbgp("scalar finished!");
883  _line_progressed(i);
884  goto ended_scalar;
885  }
886  else
887  {
888  _c4dbgp("at the beginning. no scalar here.");
889  return false;
890  }
891  }
892  else
893  {
894  _c4dbgp("it's a scalar indeed.");
895  ++i; // skip the next char
896  }
897  }
898  else if(s.len == i+1)
899  {
900  _c4dbgp("':' at line end. map starting!");
901  return false;
902  }
903  break;
904  case '[':
905  case '{':
906  case '}':
907  _line_progressed(i);
908  _c4err("invalid character: '{}'", c); // noreturn
909  default:
910  ;
911  }
912  }
913  _line_progressed(s.len);
914  if(!_finished_file())
915  {
916  _c4dbgp("next line!");
917  _line_ended();
918  _scan_line();
919  }
920  else
921  {
922  _c4dbgp("file finished!");
923  goto ended_scalar;
924  }
925  s = m_evt_handler->m_curr->line_contents.rem;
926  needs_filter = true;
927  }
928 
929 ended_scalar:
930 
931  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
932  sc->needs_filter = needs_filter;
933 
934  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
935 
936  return true;
937 }
938 
939 template<class EventHandler>
940 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
941 {
942  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
943  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
944  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
945  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
946  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
947 
948  substr s = m_evt_handler->m_curr->line_contents.rem;
949  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
950 
951  if(!s.len)
952  return false;
953 
954  if(!_is_valid_start_scalar_plain_flow(s))
955  return false;
956 
957  _c4dbgp("scanning scalar...");
958 
959  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
960  bool needs_filter = false;
961  while(true)
962  {
963  for(size_t i = 0; i < s.len; ++i)
964  {
965  const char c = s.str[i];
966  switch(c)
967  {
968  case ',':
969  case '}':
970  _line_progressed(i);
971  _c4dbgpf("found terminating character: '{}'", c);
972  goto ended_scalar;
973  case ':':
974  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
975  {
976  _line_progressed(i);
977  _c4dbgpf("found terminating character: '{}'", c);
978  goto ended_scalar;
979  }
980  break;
981  case '{':
982  case '[':
983  _line_progressed(i);
984  _c4err("invalid character: '{}'", c); // noreturn
985  break;
986  case ']':
987  _line_progressed(i);
988  if(has_any(RSEQIMAP))
989  goto ended_scalar;
990  else
991  _c4err("invalid character: '{}'", c); // noreturn
992  break;
993  case '#':
994  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
995  {
996  _line_progressed(i);
997  _c4dbgpf("found terminating character: '{}'", c);
998  goto ended_scalar;
999  }
1000  break;
1001  default:
1002  ;
1003  }
1004  }
1005  _c4dbgp("next line!");
1006  _line_progressed(s.len);
1007  if(!_finished_file())
1008  {
1009  _c4dbgp("next line!");
1010  _line_ended();
1011  _scan_line();
1012  }
1013  else
1014  {
1015  _c4dbgp("file finished!");
1016  goto ended_scalar;
1017  }
1018  s = m_evt_handler->m_curr->line_contents.rem;
1019  needs_filter = true;
1020  }
1021 
1022 ended_scalar:
1023 
1024  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1025  sc->needs_filter = needs_filter;
1026 
1027  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1028 
1029  return true;
1030 }
1031 
1032 template<class EventHandler>
1033 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1034 {
1035  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1036  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1037  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1038  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1039 
1040  substr s = m_evt_handler->m_curr->line_contents.rem;
1041  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1042 
1043  if(!s.len)
1044  return false;
1045 
1046  _c4dbgp("scanning scalar...");
1047 
1048  switch(s.str[0])
1049  {
1050  case ']':
1051  case '{':
1052  case ',':
1053  _c4dbgp("not a scalar.");
1054  return false;
1055  }
1056 
1057  {
1058  const size_t len = _is_special_json_scalar(s);
1059  if(len)
1060  {
1061  sc->scalar = s.first(len);
1062  sc->needs_filter = false;
1063  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1064  _line_progressed(len);
1065  return true;
1066  }
1067  }
1068 
1069  // must be a number
1070  size_t i = 0;
1071  for( ; i < s.len; ++i)
1072  {
1073  const char c = s.str[i];
1074  switch(c)
1075  {
1076  case ',':
1077  case ']':
1078  case ' ':
1079  case '\t':
1080  _c4dbgpf("found terminating character: '{}'", c);
1081  goto ended_scalar;
1082  case '#':
1083  if(!i || s.str[i-1] == ' ')
1084  {
1085  _c4dbgpf("found terminating character: '{}'", c);
1086  goto ended_scalar;
1087  }
1088  break;
1089  default:
1090  ;
1091  }
1092  }
1093 
1094 ended_scalar:
1095 
1096  if(C4_LIKELY(i > 0))
1097  {
1098  _line_progressed(i);
1099  sc->scalar = s.first(i);
1100  sc->needs_filter = false;
1101  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1102  return true;
1103  }
1104 
1105  return false;
1106 }
1107 
1108 template<class EventHandler>
1109 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1110 {
1111  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1112  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1113  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1114  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1115  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1116 
1117  substr s = m_evt_handler->m_curr->line_contents.rem;
1118  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1119 
1120  if(!s.len)
1121  return false;
1122 
1123  _c4dbgp("scanning scalar...");
1124 
1125  {
1126  const size_t len = _is_special_json_scalar(s);
1127  if(len)
1128  {
1129  sc->scalar = s.first(len);
1130  sc->needs_filter = false;
1131  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1132  _line_progressed(len);
1133  return true;
1134  }
1135  }
1136 
1137  // must be a number
1138  size_t i = 0;
1139  for( ; i < s.len; ++i)
1140  {
1141  const char c = s.str[i];
1142  switch(c)
1143  {
1144  case ',':
1145  case '}':
1146  case ' ':
1147  case '\t':
1148  _c4dbgpf("found terminating character: '{}'", c);
1149  goto ended_scalar;
1150  case '#':
1151  if(!i || s.str[i-1] == ' ')
1152  {
1153  _c4dbgpf("found terminating character: '{}'", c);
1154  goto ended_scalar;
1155  }
1156  break;
1157  default:
1158  ;
1159  }
1160  }
1161 
1162 ended_scalar:
1163 
1164  if(C4_LIKELY(i > 0))
1165  {
1166  _line_progressed(i);
1167  sc->scalar = s.first(i);
1168  sc->needs_filter = false;
1169  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1170  return true;
1171  }
1172 
1173  return false;
1174 }
1175 
1176 template<class EventHandler>
1177 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1178 {
1179  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1180  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1181 }
1182 
1183 template<class EventHandler>
1184 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1185 {
1186  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1187  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1188 }
1189 
1190 template<class EventHandler>
1191 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1192 {
1193  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1194  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1195  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
1196 
1197  substr s = m_evt_handler->m_curr->line_contents.rem;
1198  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1199 
1200  if(!s.len)
1201  return false;
1202 
1203  switch(s.str[0])
1204  {
1205  case '-':
1206  if(_is_blck_token(s))
1207  {
1208  return false;
1209  }
1210  else if(_is_doc_begin(s))
1211  {
1212  _c4dbgp("token is doc start");
1213  return false;
1214  }
1215  break;
1216  case ':':
1217  case '?':
1218  if(_is_blck_token(s))
1219  return false;
1220  break;
1221  case '[':
1222  case '{':
1223  case '&':
1224  case '*':
1225  case '!':
1226  _RYML_WITH_TAB_TOKENS(case '\t':)
1227  return false;
1228  case '.':
1229  if(_is_doc_end(s))
1230  {
1231  _c4dbgp("token is doc end");
1232  return false;
1233  }
1234  break;
1235  }
1236 
1237  _c4dbgpf("plain scalar! indentation={}", indentation);
1238 
1239  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1240  const size_t start_line = m_evt_handler->m_curr->pos.line;
1241 
1242  bool needs_filter = false;
1243  while(true)
1244  {
1245  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1246  for(size_t i = 0; i < s.len; ++i)
1247  {
1248  const char curr = s.str[i];
1249  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1250  switch(curr)
1251  {
1252  case ':':
1253  _c4dbgpf("[{}]: got suspicious ':'", i);
1254  // are there more characters?
1255  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1256  {
1257  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1258  _line_progressed(i);
1259  // ': ' is accepted only on the first line
1260  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1261  {
1262  _c4dbgp("start line. scalar ends here");
1263  goto ended_scalar;
1264  }
1265  else
1266  {
1267  _c4err("parse error");
1268  }
1269  }
1270  else
1271  {
1272  size_t j = i;
1273  while(j + 1 < s.len && s.str[j+1] == ':')
1274  {
1275  _c4dbgp("skip colon");
1276  ++j;
1277  }
1278  i = j > i ? j-1 : i;
1279  _c4dbgp("nothing to see here");
1280  }
1281  break;
1282  case '#':
1283  _c4dbgp("got suspicious '#'");
1284  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1285  {
1286  _c4dbgp("comment! scalar ends here");
1287  _line_progressed(i);
1288  goto ended_scalar;
1289  }
1290  else
1291  {
1292  _c4dbgp("nothing to see here");
1293  }
1294  break;
1295  }
1296  }
1297  _line_progressed(s.len);
1298  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1299  next_peeked = next_peeked.trimr("\n\r");
1300  const size_t next_indentation = next_peeked.first_not_of(' ');
1301  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1302  if(next_indentation < indentation)
1303  {
1304  _c4dbgp("smaller indentation! scalar ended");
1305  goto ended_scalar;
1306  }
1307  else if(next_indentation == 0 && next_peeked.len > 0)
1308  {
1309  const char first = next_peeked.str[0];
1310  switch(first)
1311  {
1312  case '-':
1313  next_peeked = next_peeked.trimr("\n\r");
1314  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1315  if(_is_doc_begin_token(next_peeked))
1316  {
1317  _c4dbgp("doc begin! scalar ended");
1318  goto ended_scalar;
1319  }
1320  break;
1321  case '.':
1322  next_peeked = next_peeked.trimr("\n\r");
1323  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1324  if(_is_doc_end_token(next_peeked))
1325  {
1326  _c4dbgp("doc end! scalar ended");
1327  goto ended_scalar;
1328  }
1329  break;
1330  }
1331  }
1332  // load with next line
1333  _c4dbgp("next line!");
1334  if(!_finished_file())
1335  {
1336  _c4dbgp("next line!");
1337  _line_ended();
1338  _scan_line();
1339  }
1340  else
1341  {
1342  _c4dbgp("file finished!");
1343  goto ended_scalar;
1344  }
1345  s = m_evt_handler->m_curr->line_contents.rem;
1346  needs_filter = true;
1347  }
1348 
1349 ended_scalar:
1350 
1351  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1352  sc->needs_filter = needs_filter;
1353 
1354  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1355 
1356  return true;
1357 }
1358 
1359 template<class EventHandler>
1360 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1361 {
1362  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1363  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1364  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1365  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1366  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1367  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1368  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1369 }
1370 
1371 template<class EventHandler>
1372 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1373 {
1374  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1375  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1376  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1377  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1378  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1379  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1380 }
1381 
1382 template<class EventHandler>
1383 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1384 {
1385  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1386  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1387 }
1388 
1389 
1390 //-----------------------------------------------------------------------------
1391 
1392 template<class EventHandler>
1393 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1394 {
1395  substr rem{}; // declare here because of the goto
1396  size_t nlpos{}; // declare here because of the goto
1397  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1398  if(pos >= m_buf.len)
1399  goto next_is_empty;
1400 
1401  // look for the next newline chars, and jump to the right of those
1402  rem = from_next_line(m_buf.sub(pos));
1403  if(rem.empty())
1404  goto next_is_empty;
1405 
1406  // now get everything up to and including the following newline chars
1407  nlpos = rem.first_of("\r\n");
1408  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1409  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1410  rem = rem.left_of(nlpos, /*include_pos*/true);
1411 
1412  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1413  return rem;
1414 
1415 next_is_empty:
1416  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1417  return {};
1418 }
1419 
1420 //-----------------------------------------------------------------------------
1421 
1422 template<class EventHandler>
1423 void ParseEngine<EventHandler>::_scan_line()
1424 {
1425  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1426  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1427  else
1428  m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1429 }
1430 
1431 template<class EventHandler>
1432 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1433 {
1434  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1435  m_evt_handler->m_curr->pos.offset += ahead;
1436  m_evt_handler->m_curr->pos.col += ahead;
1437  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1438  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1439 }
1440 
1441 template<class EventHandler>
1442 void ParseEngine<EventHandler>::_line_ended()
1443 {
1444  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1445  m_evt_handler->m_curr->pos.line,
1446  m_evt_handler->m_curr->line_contents.full.len,
1447  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1448  m_evt_handler->m_curr->pos.col, 1);
1449  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1450  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1451  ++m_evt_handler->m_curr->pos.line;
1452  m_evt_handler->m_curr->pos.col = 1;
1453 }
1454 
1455 template<class EventHandler>
1456 void ParseEngine<EventHandler>::_line_ended_undo()
1457 {
1458  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1459  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1460  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1461  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1462  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1463  m_evt_handler->m_curr->pos.offset -= delta;
1464  --m_evt_handler->m_curr->pos.line;
1465  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1466  // don't forget to undo also the changes to the remainder of the line
1467  //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
1468  m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1469 }
1470 
1471 
1472 //-----------------------------------------------------------------------------
1473 template<class EventHandler>
1474 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1475 {
1476  m_evt_handler->m_curr->indref = indentation;
1477  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1478 }
1479 
1480 template<class EventHandler>
1481 void ParseEngine<EventHandler>::_save_indentation()
1482 {
1483  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1484  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1485  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1486 }
1487 
1488 
1489 //-----------------------------------------------------------------------------
1490 
1491 template<class EventHandler>
1492 void ParseEngine<EventHandler>::_end_map_blck()
1493 {
1494  _c4dbgp("mapblck: end");
1495  if(has_any(RKCL|RVAL))
1496  {
1497  _c4dbgp("mapblck: set missing val");
1498  _handle_annotations_before_blck_val_scalar();
1499  m_evt_handler->set_val_scalar_plain({});
1500  }
1501  else if(has_any(QMRK))
1502  {
1503  _c4dbgp("mapblck: set missing keyval");
1504  _handle_annotations_before_blck_key_scalar();
1505  m_evt_handler->set_key_scalar_plain({});
1506  _handle_annotations_before_blck_val_scalar();
1507  m_evt_handler->set_val_scalar_plain({});
1508  }
1509  m_evt_handler->end_map();
1510 }
1511 
1512 template<class EventHandler>
1513 void ParseEngine<EventHandler>::_end_seq_blck()
1514 {
1515  if(has_any(RVAL))
1516  {
1517  _c4dbgp("seqblck: set missing val");
1518  _handle_annotations_before_blck_val_scalar();
1519  m_evt_handler->set_val_scalar_plain({});
1520  }
1521  m_evt_handler->end_seq();
1522 }
1523 
1524 template<class EventHandler>
1525 void ParseEngine<EventHandler>::_end2_map()
1526 {
1527  _c4dbgp("map: end");
1528  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1529  if(has_any(BLCK))
1530  {
1531  _end_map_blck();
1532  }
1533  else
1534  {
1535  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1536  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1537  m_evt_handler->_pop();
1538  }
1539 }
1540 
1541 template<class EventHandler>
1542 void ParseEngine<EventHandler>::_end2_seq()
1543 {
1544  _c4dbgp("seq: end");
1545  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1546  if(has_any(BLCK))
1547  {
1548  _end_seq_blck();
1549  }
1550  else
1551  {
1552  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1553  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1554  m_evt_handler->_pop();
1555  }
1556 }
1557 
1558 template<class EventHandler>
1559 void ParseEngine<EventHandler>::_begin2_doc()
1560 {
1561  m_doc_empty = true;
1562  add_flags(RDOC);
1563  m_evt_handler->begin_doc();
1564  m_evt_handler->m_curr->indref = 0; // ?
1565 }
1566 
1567 template<class EventHandler>
1568 void ParseEngine<EventHandler>::_begin2_doc_expl()
1569 {
1570  m_doc_empty = true;
1571  add_flags(RDOC);
1572  m_evt_handler->begin_doc_expl();
1573  m_evt_handler->m_curr->indref = 0; // ?
1574 }
1575 
1576 template<class EventHandler>
1577 void ParseEngine<EventHandler>::_end2_doc()
1578 {
1579  _c4dbgp("doc: end");
1580  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1581  if(m_doc_empty)
1582  {
1583  _c4dbgp("doc was empty; add empty val");
1584  m_evt_handler->set_val_scalar_plain({});
1585  }
1586  m_evt_handler->end_doc();
1587 }
1588 
1589 template<class EventHandler>
1590 void ParseEngine<EventHandler>::_end2_doc_expl()
1591 {
1592  _c4dbgp("doc: end");
1593  if(m_doc_empty)
1594  {
1595  _c4dbgp("doc: no children; add empty val");
1596  m_evt_handler->set_val_scalar_plain({});
1597  }
1598  m_evt_handler->end_doc_expl();
1599 }
1600 
1601 template<class EventHandler>
1602 void ParseEngine<EventHandler>::_maybe_begin_doc()
1603 {
1604  if(has_none(RDOC))
1605  {
1606  _c4dbgp("doc must be started");
1607  _begin2_doc();
1608  }
1609 }
1610 template<class EventHandler>
1611 void ParseEngine<EventHandler>::_maybe_end_doc()
1612 {
1613  if(has_any(RDOC))
1614  {
1615  _c4dbgp("doc must be finished");
1616  _end2_doc();
1617  }
1618 }
1619 
1620 template<class EventHandler>
1621 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1622 {
1623  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1624  if(m_evt_handler->m_stack[0].flags & RDOC)
1625  {
1626  _c4dbgp("root is RDOC");
1627  if(m_evt_handler->m_curr->level != 0)
1628  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1629  }
1630  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1631  {
1632  _c4dbgp("root is STREAM");
1633  if(m_evt_handler->m_curr->level != 1)
1634  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1635  }
1636  else
1637  {
1638  _c4err("internal error");
1639  }
1640  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1641 }
1642 
1643 template<class EventHandler>
1644 void ParseEngine<EventHandler>::_end_doc_suddenly()
1645 {
1646  _c4dbgp("end doc suddenly");
1647  _end_doc_suddenly__pop();
1648  _end2_doc_expl();
1649  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1650 }
1651 
1652 template<class EventHandler>
1653 void ParseEngine<EventHandler>::_start_doc_suddenly()
1654 {
1655  _c4dbgp("start doc suddenly");
1656  _end_doc_suddenly__pop();
1657  _end2_doc();
1658  _begin2_doc_expl();
1659 }
1660 
1661 template<class EventHandler>
1662 void ParseEngine<EventHandler>::_end_stream()
1663 {
1664  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1665  if(has_all(RSEQ|FLOW))
1666  _c4err("missing terminating ]");
1667  else if(has_all(RMAP|FLOW))
1668  _c4err("missing terminating }");
1669  if(m_evt_handler->m_stack.size() > 1)
1670  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1671  if(has_all(RDOC))
1672  {
1673  _end2_doc();
1674  }
1675  else if(has_all(RTOP|RUNK))
1676  {
1677  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1678  {
1679  if(m_doc_empty)
1680  {
1681  m_evt_handler->begin_doc();
1682  _handle_annotations_before_blck_val_scalar();
1683  m_evt_handler->set_val_scalar_plain({});
1684  m_evt_handler->end_doc();
1685  }
1686  }
1687  }
1688  m_evt_handler->end_stream();
1689 }
1690 
1691 
1692 template<class EventHandler>
1693 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1694 {
1695  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1696  while(m_evt_handler->m_curr != popto)
1697  {
1698  if(has_any(RSEQ))
1699  {
1700  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1701  _end2_seq();
1702  }
1703  else if(has_any(RMAP))
1704  {
1705  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1706  _end2_map();
1707  }
1708  else
1709  {
1710  break;
1711  }
1712  }
1713  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1714 }
1715 
1716 template<class EventHandler>
1717 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1718 {
1719  // search the stack frame to jump to based on its indentation
1720  using state_type = typename EventHandler::state;
1721  state_type const* popto = nullptr;
1722  auto &stack = m_evt_handler->m_stack;
1723  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1724  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1725  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1726  #ifdef RYML_DBG
1727  if(_dbg_enabled())
1728  {
1729  char flagbuf_[128];
1730  for(state_type const& s : stack)
1731  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1732  }
1733  #endif
1734  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1735  {
1736  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1737  if(s->indref == ind)
1738  {
1739  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1740  popto = s;
1741  break;
1742  }
1743  }
1744  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1745  {
1746  _c4err("parse error: incorrect indentation?");
1747  }
1748  _handle_indentation_pop(popto);
1749 }
1750 
1751 template<class EventHandler>
1752 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1753 {
1754  // search the stack frame to jump to based on its indentation
1755  using state_type = typename EventHandler::state;
1756  auto &stack = m_evt_handler->m_stack;
1757  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1758  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1759  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1760  state_type const* popto = nullptr;
1761  #ifdef RYML_DBG
1762  char flagbuf_[128];
1763  if(_dbg_enabled())
1764  {
1765  for(state_type const& s : stack)
1766  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1767  }
1768  #endif
1769  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1770  {
1771  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1772  if(s->indref < ind)
1773  {
1774  break;
1775  }
1776  else if(s->indref == ind)
1777  {
1778  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1779  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1780  {
1781  break;
1782  }
1783  popto = s;
1784  if(has_all(RSEQ|BLCK, s))
1785  {
1786  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1787  const size_t first = rem.first_not_of(' ');
1788  _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
1789  rem = rem.right_of(first, true);
1790  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1791  if(rem.begins_with('-') && _is_blck_token(rem))
1792  {
1793  _c4dbgp("parent was indentless seq");
1794  break;
1795  }
1796  }
1797  }
1798  }
1799  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1800  {
1801  _c4err("parse error: incorrect indentation?");
1802  }
1803  _handle_indentation_pop(popto);
1804 }
1805 
1806 
1807 //-----------------------------------------------------------------------------
1808 template<class EventHandler>
1809 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1810 {
1811  // quoted scalars can spread over multiple lines!
1812  // nice explanation here: http://yaml-multiline.info/
1813 
1814  // a span to the end of the file
1815  size_t b = m_evt_handler->m_curr->pos.offset;
1816  substr s = m_buf.sub(b);
1817  if(s.begins_with(' '))
1818  {
1819  s = s.triml(' ');
1820  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1821  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1822  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1823  }
1824  b = m_evt_handler->m_curr->pos.offset; // take this into account
1825  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1826 
1827  // skip the opening quote
1828  _line_progressed(1);
1829  s = s.sub(1);
1830 
1831  bool needs_filter = false;
1832 
1833  size_t numlines = 1; // we already have one line
1834  size_t pos = npos; // find the pos of the matching quote
1835  while( ! _finished_file())
1836  {
1837  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1838  bool line_is_blank = true;
1839  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1840  for(size_t i = 0; i < line.len; ++i)
1841  {
1842  const char curr = line.str[i];
1843  if(curr == '\'') // single quotes are escaped with two single quotes
1844  {
1845  const char next = i+1 < line.len ? line.str[i+1] : '~';
1846  if(next != '\'') // so just look for the first quote
1847  { // without another after it
1848  pos = i;
1849  break;
1850  }
1851  else
1852  {
1853  needs_filter = true; // needs filter to remove escaped quotes
1854  ++i; // skip the escaped quote
1855  }
1856  }
1857  else if(curr != ' ')
1858  {
1859  line_is_blank = false;
1860  }
1861  }
1862 
1863  // leading whitespace also needs filtering
1864  needs_filter = needs_filter
1865  || (numlines > 1)
1866  || line_is_blank
1867  || (_at_line_begin() && line.begins_with(' '));
1868 
1869  if(pos == npos)
1870  {
1871  _line_progressed(line.len);
1872  ++numlines;
1873  }
1874  else
1875  {
1876  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1877  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
1878  _line_progressed(pos + 1); // progress beyond the quote
1879  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1880  break;
1881  }
1882 
1883  _line_ended();
1884  _scan_line();
1885  }
1886 
1887  if(pos == npos)
1888  {
1889  _c4err("reached end of file while looking for closing quote");
1890  }
1891  else
1892  {
1893  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1894  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1895  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
1896  s = s.sub(0, pos-1);
1897  }
1898 
1899  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
1900 
1901  return ScannedScalar { s, needs_filter };
1902 }
1903 
1904 
1905 //-----------------------------------------------------------------------------
1906 template<class EventHandler>
1907 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1908 {
1909  // quoted scalars can spread over multiple lines!
1910  // nice explanation here: http://yaml-multiline.info/
1911 
1912  // a span to the end of the file
1913  size_t b = m_evt_handler->m_curr->pos.offset;
1914  substr s = m_buf.sub(b);
1915  if(s.begins_with(' '))
1916  {
1917  s = s.triml(' ');
1918  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1919  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1920  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1921  }
1922  b = m_evt_handler->m_curr->pos.offset; // take this into account
1923  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
1924 
1925  // skip the opening quote
1926  _line_progressed(1);
1927  s = s.sub(1);
1928 
1929  bool needs_filter = false;
1930 
1931  size_t numlines = 1; // we already have one line
1932  size_t pos = npos; // find the pos of the matching quote
1933  while( ! _finished_file())
1934  {
1935  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1936  bool line_is_blank = true;
1937  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1938  for(size_t i = 0; i < line.len; ++i)
1939  {
1940  const char curr = line.str[i];
1941  if(curr != ' ')
1942  line_is_blank = false;
1943  // every \ is an escape
1944  if(curr == '\\')
1945  {
1946  const char next = i+1 < line.len ? line.str[i+1] : '~';
1947  needs_filter = true;
1948  if(next == '"' || next == '\\')
1949  ++i;
1950  }
1951  else if(curr == '"')
1952  {
1953  pos = i;
1954  break;
1955  }
1956  }
1957 
1958  // leading whitespace also needs filtering
1959  needs_filter = needs_filter
1960  || (numlines > 1)
1961  || line_is_blank
1962  || (_at_line_begin() && line.begins_with(' '));
1963 
1964  if(pos == npos)
1965  {
1966  _line_progressed(line.len);
1967  ++numlines;
1968  }
1969  else
1970  {
1971  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1972  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"');
1973  _line_progressed(pos + 1); // progress beyond the quote
1974  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1975  break;
1976  }
1977 
1978  _line_ended();
1979  _scan_line();
1980  }
1981 
1982  if(pos == npos)
1983  {
1984  _c4err("reached end of file looking for closing quote");
1985  }
1986  else
1987  {
1988  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1989  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
1990  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1991  s = s.sub(0, pos-1);
1992  }
1993 
1994  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
1995 
1996  return ScannedScalar { s, needs_filter };
1997 }
1998 
1999 
2000 //-----------------------------------------------------------------------------
2001 template<class EventHandler>
2002 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2003 {
2004  _c4dbgpf("blck: indref={}", indref);
2005  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
2006 
2007  // nice explanation here: http://yaml-multiline.info/
2008  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2009  csubstr trimmed = s.triml(' ');
2010  if(trimmed.str > s.str)
2011  {
2012  _c4dbgp("skipping whitespace");
2013  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2014  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2015  s = trimmed;
2016  }
2017  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2018 
2019  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2020 
2021  // parse the spec
2022  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2023  size_t indentation = npos; // have to find out if no spec is given
2024  csubstr digits;
2025  if(s.len > 1)
2026  {
2027  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2028  csubstr t = s.sub(1);
2029  _c4dbgpf("blck: spec is multichar: '{}'", t);
2030  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2031  size_t pos = t.first_of("-+");
2032  _c4dbgpf("blck: spec chomp char at {}", pos);
2033  if(pos != npos)
2034  {
2035  if(t[pos] == '-')
2036  chomp = CHOMP_STRIP;
2037  else if(t[pos] == '+')
2038  chomp = CHOMP_KEEP;
2039  if(pos == 0)
2040  t = t.sub(1);
2041  else
2042  t = t.first(pos);
2043  }
2044  // from here to the end, only digits are considered
2045  digits = t.left_of(t.first_not_of("0123456789"));
2046  if( ! digits.empty())
2047  {
2048  if(C4_UNLIKELY(digits.len > 1))
2049  _c4err("parse error: invalid indentation");
2050  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2051  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2052  _c4err("parse error: could not read indentation as decimal");
2053  if(C4_UNLIKELY( ! indentation))
2054  _c4err("parse error: null indentation");
2055  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2056  indentation += m_evt_handler->m_curr->indref;
2057  }
2058  }
2059 
2060  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2061 
2062  // finish the current line
2063  _line_progressed(s.len);
2064  _line_ended();
2065  _scan_line();
2066 
2067  // start with a zero-length block, already pointing at the right place
2068  substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
2069  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2070 
2071  // read every full line into a raw block,
2072  // from which newlines are to be stripped as needed.
2073  //
2074  // If no explicit indentation was given, pick it from the first
2075  // non-empty line. See
2076  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2077  size_t num_lines = 0;
2078  size_t first = m_evt_handler->m_curr->pos.line;
2079  size_t provisional_indentation = npos;
2080  LineContents lc;
2081  while(( ! _finished_file()))
2082  {
2083  // peek next line, but do not advance immediately
2084  lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2085  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2086  // evaluate termination conditions
2087  if(indentation != npos)
2088  {
2089  _c4dbgpf("blck: indentation={}", indentation);
2090  // stop when the line is deindented and not empty
2091  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2092  {
2093  if(raw_block.len)
2094  {
2095  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2096  }
2097  else
2098  {
2099  _c4err("indentation decreased without any scalar");
2100  }
2101  break;
2102  }
2103  else if(indentation == 0)
2104  {
2105  _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2106  if(_is_doc_token(lc.rem))
2107  {
2108  _c4dbgp("blck: stop. indentation=0 and doc ended");
2109  break;
2110  }
2111  }
2112  }
2113  else
2114  {
2115  const size_t fns = lc.stripped.first_not_of(' ');
2116  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2117  if(fns != npos) // non-empty line
2118  {
2120  if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
2121  _c4err("parse error");
2122  )
2123  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2124  if(provisional_indentation == npos)
2125  {
2126  if(lc.indentation < indref)
2127  {
2128  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2129  if(raw_block.len == 0)
2130  {
2131  _c4dbgp("blck: was empty, undo next line");
2132  _line_ended_undo();
2133  }
2134  break;
2135  }
2136  else if(lc.indentation == m_evt_handler->m_curr->indref)
2137  {
2138  if(has_any(RSEQ|RMAP))
2139  {
2140  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2141  break;
2142  }
2143  }
2144  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2145  indentation = lc.indentation;
2146  }
2147  else
2148  {
2149  if(lc.indentation >= provisional_indentation)
2150  {
2151  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2152  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2153  indentation = lc.indentation;
2154  }
2155  else
2156  {
2157  break;
2158  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2159  }
2160  }
2161  }
2162  else // empty line
2163  {
2164  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2165  if(provisional_indentation != npos)
2166  {
2167  if(lc.stripped.len >= provisional_indentation)
2168  {
2169  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2170  provisional_indentation = lc.stripped.len;
2171  }
2172  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2173  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2174  {
2175  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2176  provisional_indentation = lc.indentation;
2177  }
2178  #endif
2179  }
2180  else
2181  {
2182  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2183  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2184  if(provisional_indentation == npos)
2185  {
2186  provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
2187  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2188  }
2189  if(provisional_indentation < indref)
2190  {
2191  provisional_indentation = indref;
2192  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2193  }
2194  }
2195  }
2196  }
2197  // advance now that we know the folded scalar continues
2198  m_evt_handler->m_curr->line_contents = lc;
2199  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2200  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2201  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2202  _line_ended();
2203  ++num_lines;
2204  }
2205  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2206  C4_UNUSED(num_lines);
2207  C4_UNUSED(first);
2208 
2209  if(indentation == npos)
2210  {
2211  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2212  indentation = provisional_indentation;
2213  }
2214 
2215  if(num_lines)
2216  _line_ended_undo();
2217 
2218  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2219 
2220  sb->scalar = raw_block;
2221  sb->indentation = indentation;
2222  sb->chomp = chomp;
2223 }
2224 
2225 
2226 //-----------------------------------------------------------------------------
2227 //-----------------------------------------------------------------------------
2228 //-----------------------------------------------------------------------------
2229 
2230 // a debugging scaffold:
2231 #if 0
2232 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2233 #else
2234 #define _c4dbgfws(...)
2235 #endif
2236 
2237 template<class EventHandler>
2238 template<class FilterProcessor>
2239 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2240 {
2241  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2242  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2243 
2244  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2245  if(first_pos != npos)
2246  {
2247  const char first_char = proc.src[first_pos];
2248  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2249  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2250  {
2251  _c4dbgfws("whitespace is trailing on line", "");
2252  proc.skip(first_pos - proc.rpos);
2253  }
2254  else // a legit whitespace
2255  {
2256  proc.copy();
2257  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2258  }
2259  return true;
2260  }
2261  _c4dbgfws("whitespace is trailing on line", "");
2262  return false;
2263 }
2264 
2265 template<class EventHandler>
2266 template<class FilterProcessor>
2267 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2268 {
2269  if(!_filter_ws_handle_to_first_non_space(proc))
2270  {
2271  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2272  proc.copy(proc.src.len - proc.rpos);
2273  }
2274 }
2275 
2276 template<class EventHandler>
2277 template<class FilterProcessor>
2278 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2279 {
2280  if(!_filter_ws_handle_to_first_non_space(proc))
2281  {
2282  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2283  proc.skip(proc.src.len - proc.rpos);
2284  }
2285 }
2286 
2287 #undef _c4dbgfws
2288 
2289 
2290 //-----------------------------------------------------------------------------
2291 //-----------------------------------------------------------------------------
2292 //-----------------------------------------------------------------------------
2293 /* plain scalars */
2294 
2295 // a debugging scaffold:
2296 #if 0
2297 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2298 #else
2299 #define _c4dbgfps(fmt, ...)
2300 #endif
2301 
2302 template<class EventHandler>
2303 template<class FilterProcessor>
2304 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2305 {
2306  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2307 
2308  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2309  size_t ii = proc.rpos;
2310  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2311  if(numnl_following)
2312  {
2313  proc.set('\n', numnl_following);
2314  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2315  }
2316  else
2317  {
2318  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2319  if(ret != npos)
2320  {
2321  proc.set(' ');
2322  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2323  }
2324  else
2325  {
2326  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2327  ii = proc.src.len;
2328  }
2329  }
2330  proc.rpos = ii;
2331 }
2332 
2333 template<class EventHandler>
2334 template<class FilterProcessor>
2335 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2336 {
2337  _RYML_CB_ASSERT(this->callbacks(), indentation != npos);
2338  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2339 
2340  while(proc.has_more_chars())
2341  {
2342  const char curr = proc.curr();
2343  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2344  switch(curr)
2345  {
2346  case ' ':
2347  _RYML_WITH_TAB_TOKENS(case '\t':)
2348  _c4dbgfps("whitespace", curr);
2349  _filter_ws_skip_trailing(proc);
2350  break;
2351  case '\n':
2352  _c4dbgfps("newline", curr);
2353  _filter_nl_plain(proc, /*indentation*/indentation);
2354  break;
2355  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2356  _c4dbgfps("carriage return, ignore", curr);
2357  proc.skip();
2358  break;
2359  default:
2360  proc.copy();
2361  break;
2362  }
2363  }
2364 
2365  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2366 
2367  return proc.result();
2368 }
2369 
2370 #undef _c4dbgfps
2371 
2372 
2373 template<class EventHandler>
2374 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2375 {
2376  FilterProcessorSrcDst proc(scalar, dst);
2377  return _filter_plain(proc, indentation);
2378 }
2379 
2380 template<class EventHandler>
2381 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2382 {
2383  FilterProcessorInplaceEndExtending proc(dst, cap);
2384  return _filter_plain(proc, indentation);
2385 }
2386 
2387 
2388 //-----------------------------------------------------------------------------
2389 //-----------------------------------------------------------------------------
2390 //-----------------------------------------------------------------------------
2391 /* single quoted */
2392 
2393 // a debugging scaffold:
2394 #if 0
2395 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2396 #else
2397 #define _c4dbgfsq(fmt, ...)
2398 #endif
2399 
2400 template<class EventHandler>
2401 template<class FilterProcessor>
2402 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2403 {
2404  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2405 
2406  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2407  size_t ii = proc.rpos;
2408  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2409  if(numnl_following)
2410  {
2411  proc.set('\n', numnl_following);
2412  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2413  }
2414  else
2415  {
2416  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2417  if(ret != npos)
2418  {
2419  proc.set(' ');
2420  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2421  }
2422  else
2423  {
2424  proc.set(' ');
2425  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2426  }
2427  }
2428  proc.rpos = ii;
2429 }
2430 
2431 template<class EventHandler>
2432 template<class FilterProcessor>
2433 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2434 {
2435  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2436 
2437  // from the YAML spec for double-quoted scalars:
2438  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2439  while(proc.has_more_chars())
2440  {
2441  const char curr = proc.curr();
2442  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2443  switch(curr)
2444  {
2445  case ' ':
2446  case '\t':
2447  _c4dbgfsq("whitespace", curr);
2448  _filter_ws_copy_trailing(proc);
2449  break;
2450  case '\n':
2451  _c4dbgfsq("newline", curr);
2452  _filter_nl_squoted(proc);
2453  break;
2454  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2455  _c4dbgfsq("skip cr", curr);
2456  proc.skip();
2457  break;
2458  case '\'':
2459  _c4dbgfsq("squote", curr);
2460  if(proc.next() == '\'')
2461  {
2462  _c4dbgfsq("two consecutive squotes", curr);
2463  proc.skip();
2464  proc.copy();
2465  }
2466  else
2467  {
2468  _c4err("filter error");
2469  }
2470  break;
2471  default:
2472  proc.copy();
2473  break;
2474  }
2475  }
2476 
2477  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2478 
2479  return proc.result();
2480 }
2481 
2482 #undef _c4dbgfsq
2483 
2484 template<class EventHandler>
2485 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2486 {
2487  FilterProcessorSrcDst proc(scalar, dst);
2488  return _filter_squoted(proc);
2489 }
2490 
2491 template<class EventHandler>
2493 {
2494  FilterProcessorInplaceEndExtending proc(dst, cap);
2495  return _filter_squoted(proc);
2496 }
2497 
2498 
2499 //-----------------------------------------------------------------------------
2500 //-----------------------------------------------------------------------------
2501 //-----------------------------------------------------------------------------
2502 /* double quoted */
2503 
2504 // a debugging scaffold:
2505 #if 0
2506 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2507 #else
2508 #define _c4dbgfdq(...)
2509 #endif
2510 
2511 template<class EventHandler>
2512 template<class FilterProcessor>
2513 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2514 {
2515  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2516 
2517  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2518  size_t ii = proc.rpos;
2519  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2520  if(numnl_following)
2521  {
2522  proc.set('\n', numnl_following);
2523  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2524  }
2525  else
2526  {
2527  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2528  if(ret != npos)
2529  {
2530  proc.set(' ');
2531  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2532  }
2533  else
2534  {
2535  proc.set(' ');
2536  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2537  }
2538  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2539  {
2540  _c4dbgfdq("backslash at [{}]", ii);
2541  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2542  if(next == ' ' || next == '\t')
2543  {
2544  _c4dbgfdq("extend skip to backslash", "");
2545  ++ii;
2546  }
2547  }
2548  }
2549  proc.rpos = ii;
2550 }
2551 
2552 template<class EventHandler>
2553 template<class FilterProcessor>
2554 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2555 {
2556  char next = proc.next();
2557  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2558  if(next == '\r')
2559  {
2560  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2561  {
2562  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2563  next = '\n';
2564  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2565  }
2566  }
2567 
2568  if(next == '\n')
2569  {
2570  size_t ii = proc.rpos + 2;
2571  for( ; ii < proc.src.len; ++ii)
2572  {
2573  // skip leading whitespace
2574  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2575  ;
2576  else
2577  break;
2578  }
2579  proc.skip(ii - proc.rpos);
2580  }
2581  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2582  {
2583  // escapes for json compatibility
2584  proc.translate_esc(next);
2585  _c4dbgfdq("here, used '{}'", _c4prc(next));
2586  }
2587  else if(next == '\r')
2588  {
2589  proc.skip();
2590  }
2591  else if(next == 'n')
2592  {
2593  proc.translate_esc('\n');
2594  }
2595  else if(next == 'r')
2596  {
2597  proc.translate_esc('\r');
2598  }
2599  else if(next == 't')
2600  {
2601  proc.translate_esc('\t');
2602  }
2603  else if(next == '\\')
2604  {
2605  proc.translate_esc('\\');
2606  }
2607  else if(next == 'x') // UTF8
2608  {
2609  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2610  _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2611  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2612  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2613  uint8_t byteval = {};
2614  if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
2615  _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2616  proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
2617  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2618  }
2619  else if(next == 'u') // UTF16
2620  {
2621  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2622  _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2623  char readbuf[8];
2624  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2625  uint32_t codepoint_val = {};
2626  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2627  _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2628  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2629  if(C4_UNLIKELY(numbytes == 0))
2630  _c4err("failed to decode code point={}", proc.rpos);
2631  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2632  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2633  }
2634  else if(next == 'U') // UTF32
2635  {
2636  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2637  _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2638  char readbuf[8];
2639  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2640  uint32_t codepoint_val = {};
2641  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2642  _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2643  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2644  if(C4_UNLIKELY(numbytes == 0))
2645  _c4err("failed to decode code point={}", proc.rpos);
2646  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2647  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2648  }
2649  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2650  else if(next == '0')
2651  {
2652  proc.translate_esc('\0');
2653  }
2654  else if(next == 'b') // backspace
2655  {
2656  proc.translate_esc('\b');
2657  }
2658  else if(next == 'f') // form feed
2659  {
2660  proc.translate_esc('\f');
2661  }
2662  else if(next == 'a') // bell character
2663  {
2664  proc.translate_esc('\a');
2665  }
2666  else if(next == 'v') // vertical tab
2667  {
2668  proc.translate_esc('\v');
2669  }
2670  else if(next == 'e') // escape character
2671  {
2672  proc.translate_esc('\x1b');
2673  }
2674  else if(next == '_') // unicode non breaking space \u00a0
2675  {
2676  // https://www.compart.com/en/unicode/U+00a0
2677  const char payload[] = {
2678  _RYML_CHCONST(-0x3e, 0xc2),
2679  _RYML_CHCONST(-0x60, 0xa0),
2680  };
2681  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2682  }
2683  else if(next == 'N') // unicode next line \u0085
2684  {
2685  // https://www.compart.com/en/unicode/U+0085
2686  const char payload[] = {
2687  _RYML_CHCONST(-0x3e, 0xc2),
2688  _RYML_CHCONST(-0x7b, 0x85),
2689  };
2690  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2691  }
2692  else if(next == 'L') // unicode line separator \u2028
2693  {
2694  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2695  const char payload[] = {
2696  _RYML_CHCONST(-0x1e, 0xe2),
2697  _RYML_CHCONST(-0x80, 0x80),
2698  _RYML_CHCONST(-0x58, 0xa8),
2699  };
2700  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2701  }
2702  else if(next == 'P') // unicode paragraph separator \u2029
2703  {
2704  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2705  const char payload[] = {
2706  _RYML_CHCONST(-0x1e, 0xe2),
2707  _RYML_CHCONST(-0x80, 0x80),
2708  _RYML_CHCONST(-0x57, 0xa9),
2709  };
2710  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2711  }
2712  else if(next == '\0')
2713  {
2714  proc.skip();
2715  }
2716  else
2717  {
2718  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2719  }
2720  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2721 }
2722 
2723 
2724 template<class EventHandler>
2725 template<class FilterProcessor>
2726 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2727 {
2728  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2729  // from the YAML spec for double-quoted scalars:
2730  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2731  while(proc.has_more_chars())
2732  {
2733  const char curr = proc.curr();
2734  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2735  switch(curr)
2736  {
2737  case ' ':
2738  case '\t':
2739  {
2740  _c4dbgfdq("whitespace", curr);
2741  _filter_ws_copy_trailing(proc);
2742  break;
2743  }
2744  case '\n':
2745  {
2746  _c4dbgfdq("newline", curr);
2747  _filter_nl_dquoted(proc);
2748  break;
2749  }
2750  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2751  {
2752  _c4dbgfdq("carriage return, ignore", curr);
2753  proc.skip();
2754  break;
2755  }
2756  case '\\':
2757  {
2758  _filter_dquoted_backslash(proc);
2759  break;
2760  }
2761  default:
2762  {
2763  proc.copy();
2764  break;
2765  }
2766  }
2767  }
2768  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2769  return proc.result();
2770 }
2771 
2772 #undef _c4dbgfdq
2773 
2774 
2775 template<class EventHandler>
2776 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2777 {
2778  FilterProcessorSrcDst proc(scalar, dst);
2779  return _filter_dquoted(proc);
2780 }
2781 
2782 template<class EventHandler>
2783 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2784 {
2785  FilterProcessorInplaceMidExtending proc(dst, cap);
2786  return _filter_dquoted(proc);
2787 }
2788 
2789 
2790 //-----------------------------------------------------------------------------
2791 //-----------------------------------------------------------------------------
2792 //-----------------------------------------------------------------------------
2793 // block filtering helpers
2794 
2795 template<class EventHandler>
2796 template<class FilterProcessor>
2797 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2798 {
2799  _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2800  _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2801 
2802  // a debugging scaffold:
2803  #if 0
2804  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2805  #else
2806  #define _c4dbgchomp(...)
2807  #endif
2808 
2809  // advance to the last line having spaces beyond the indentation
2810  {
2811  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2812  if(last != npos)
2813  {
2814  _c4dbgchomp("found newline and larger indentation. last={}", last);
2815  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2816  _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2817  // remove indentation spaces, copy the rest
2818  while((proc.rpos < last) && proc.has_more_chars())
2819  {
2820  const char curr = proc.curr();
2821  _c4dbgchomp("curr='{}'", _c4prc(curr));
2822  switch(curr)
2823  {
2824  case '\n':
2825  {
2826  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2827  proc.copy();
2828  // are there spaces after the newline?
2829  csubstr at_next_line = proc.rem();
2830  if(at_next_line.begins_with(' '))
2831  {
2832  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2833  // there are spaces.
2834  size_t first_non_space = at_next_line.first_not_of(' ');
2835  _c4dbgchomp("first_non_space={}", first_non_space);
2836  if(first_non_space == npos)
2837  {
2838  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2839  first_non_space = at_next_line.len;
2840  }
2841  if(first_non_space <= indentation)
2842  {
2843  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2844  proc.skip(first_non_space);
2845  }
2846  else
2847  {
2848  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2849  proc.skip(indentation);
2850  // copy the spaces after the indentation
2851  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2852  proc.copy(first_non_space - indentation);
2853  }
2854  }
2855  break;
2856  }
2857  case '\r':
2858  proc.skip();
2859  break;
2860  default:
2861  _c4err("parse error");
2862  break;
2863  }
2864  }
2865  }
2866  }
2867 
2868  // from now on, we only have line ends (or indentation spaces)
2869  switch(chomp)
2870  {
2871  case CHOMP_CLIP:
2872  {
2873  bool had_one = false;
2874  while(proc.has_more_chars())
2875  {
2876  const char curr = proc.curr();
2877  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
2878  switch(curr)
2879  {
2880  case '\n':
2881  {
2882  _c4dbgchomp("copy newline!", curr);
2883  proc.copy();
2884  proc.set_at_end();
2885  had_one = true;
2886  break;
2887  }
2888  case ' ':
2889  case '\r':
2890  _c4dbgchomp("skip!", curr);
2891  proc.skip();
2892  break;
2893  }
2894  }
2895  if(!had_one) // there were no newline characters. add one.
2896  {
2897  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
2898  proc.set('\n');
2899  }
2900  break;
2901  }
2902  case CHOMP_KEEP:
2903  {
2904  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2905  while(proc.has_more_chars())
2906  {
2907  const char curr = proc.curr();
2908  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
2909  switch(curr)
2910  {
2911  case '\n':
2912  _c4dbgchomp("copy newline!", curr);
2913  proc.copy();
2914  break;
2915  case ' ':
2916  case '\r':
2917  _c4dbgchomp("skip!", curr);
2918  proc.skip();
2919  break;
2920  }
2921  }
2922  break;
2923  }
2924  case CHOMP_STRIP:
2925  {
2926  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
2927  // nothing to do!
2928  break;
2929  }
2930  }
2931 
2932  #undef _c4dbgchomp
2933 }
2934 
2935 
2936 // a debugging scaffold:
2937 #if 0
2938 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2939 #else
2940 #define _c4dbgfb(...)
2941 #endif
2942 
2943 template<class EventHandler>
2944 template<class FilterProcessor>
2945 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2946 {
2947  csubstr rem = proc.rem(); // remaining
2948  if(rem.len)
2949  {
2950  size_t first = rem.first_not_of(' ');
2951  if(first != npos)
2952  {
2953  _c4dbgfb("{} spaces follow before next nonws character", first);
2954  if(first < indentation)
2955  {
2956  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
2957  proc.skip(first);
2958  }
2959  else
2960  {
2961  _c4dbgfb("skip {} spaces from indentation", indentation);
2962  proc.skip(indentation);
2963  }
2964  }
2965  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2966  else
2967  {
2968  _c4dbgfb("all spaces to the end: {} spaces", first);
2969  first = rem.len;
2970  if(first)
2971  {
2972  if(first < indentation)
2973  {
2974  _c4dbgfb("skip everything", first);
2975  proc.skip(proc.src.len - proc.rpos);
2976  }
2977  else
2978  {
2979  _c4dbgfb("skip {} spaces from indentation", indentation);
2980  proc.skip(indentation);
2981  }
2982  }
2983  }
2984  #endif
2985  }
2986 }
2987 
2988 template<class EventHandler>
2989 template<class FilterProcessor>
2990 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
2991 {
2992  csubstr contents = proc.src.trimr(" \n\r");
2993  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
2994  if(!contents.len)
2995  {
2996  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
2997  if(chomp == CHOMP_KEEP && proc.src.len)
2998  {
2999  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3000  while(proc.has_more_chars())
3001  {
3002  const char curr = proc.curr();
3003  if(curr == '\n')
3004  proc.copy();
3005  else
3006  proc.skip();
3007  }
3008  if(!proc.wpos)
3009  {
3010  proc.set('\n');
3011  }
3012  }
3013  }
3014  return contents.len;
3015 }
3016 
3017 template<class EventHandler>
3018 template<class FilterProcessor>
3019 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3020 {
3021  _c4dbgfb("contents_len={}", contents_len);
3022 
3023  _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3024 
3025  // extend contents to just before the first newline at the end,
3026  // in case it is preceded by spaces
3027  size_t firstnewl = proc.src.first_of('\n', contents_len);
3028  if(firstnewl != npos)
3029  {
3030  contents_len = firstnewl;
3031  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3032  }
3033  else
3034  {
3035  contents_len = proc.src.len;
3036  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3037  }
3038 
3039  return contents_len;
3040 }
3041 
3042 #undef _c4dbgfb
3043 
3044 
3045 //-----------------------------------------------------------------------------
3046 //-----------------------------------------------------------------------------
3047 //-----------------------------------------------------------------------------
3048 
3049 // a debugging scaffold:
3050 #if 0
3051 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3052 #else
3053 #define _c4dbgfbl(...)
3054 #endif
3055 
3056 template<class EventHandler>
3057 template<class FilterProcessor>
3058 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3059 {
3060  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3061 
3062  size_t contents_len = _handle_all_whitespace(proc, chomp);
3063  if(!contents_len)
3064  return proc.result();
3065 
3066  contents_len = _extend_to_chomp(proc, contents_len);
3067 
3068  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3069 
3070  _filter_block_indentation(proc, indentation);
3071 
3072  // now filter the bulk
3073  while(proc.has_more_chars(/*maxpos*/contents_len))
3074  {
3075  const char curr = proc.curr();
3076  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3077  switch(curr)
3078  {
3079  case '\n':
3080  {
3081  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3082  proc.copy(); // copy the newline
3083  _filter_block_indentation(proc, indentation);
3084  break;
3085  }
3086  case '\r':
3087  proc.skip();
3088  break;
3089  default:
3090  proc.copy();
3091  break;
3092  }
3093  }
3094 
3095  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3096 
3097  _filter_chomp(proc, chomp, indentation);
3098 
3099  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3100 
3101  return proc.result();
3102 }
3103 
3104 #undef _c4dbgfbl
3105 
3106 template<class EventHandler>
3107 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3108 {
3109  FilterProcessorSrcDst proc(scalar, dst);
3110  return _filter_block_literal(proc, indentation, chomp);
3111 }
3112 
3113 template<class EventHandler>
3114 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3115 {
3116  FilterProcessorInplaceEndExtending proc(scalar, cap);
3117  return _filter_block_literal(proc, indentation, chomp);
3118 }
3119 
3120 
3121 //-----------------------------------------------------------------------------
3122 //-----------------------------------------------------------------------------
3123 //-----------------------------------------------------------------------------
3124 
3125 // a debugging scaffold:
3126 #if 0
3127 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3128 #else
3129 #define _c4dbgfbf(...)
3130 #endif
3131 
3132 
3133 template<class EventHandler>
3134 template<class FilterProcessor>
3135 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3136 {
3137  _filter_block_indentation(proc, indentation);
3138  while(proc.has_more_chars(len))
3139  {
3140  const char curr = proc.curr();
3141  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3142  switch(curr)
3143  {
3144  case '\n':
3145  _c4dbgfbf("newline.", curr);
3146  proc.copy();
3147  _filter_block_indentation(proc, indentation);
3148  break;
3149  case '\r':
3150  proc.skip();
3151  break;
3152  case ' ':
3153  case '\t':
3154  {
3155  size_t first = proc.rem().first_not_of(" \t");
3156  _c4dbgfbf("space. first={}", first);
3157  if(first == npos)
3158  first = proc.rem().len;
3159  _c4dbgfbf("... indentation increased to {}", first);
3160  _filter_block_folded_indented_block(proc, indentation, len, first);
3161  break;
3162  }
3163  default:
3164  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3165  return;
3166  }
3167  }
3168 }
3169 
3170 template<class EventHandler>
3171 template<class FilterProcessor>
3172 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3173 {
3174  switch(num_newl)
3175  {
3176  case 1u:
3177  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3178  wpos_at_first_newl = proc.wpos;
3179  proc.skip();
3180  proc.set(' ');
3181  break;
3182  case 2u:
3183  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3184  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
3185  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3186  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3187  proc.skip();
3188  proc.set_at(wpos_at_first_newl, '\n');
3189  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3190  break;
3191  default:
3192  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3193  proc.copy();
3194  break;
3195  }
3196  return wpos_at_first_newl;
3197 }
3198 
3199 template<class EventHandler>
3200 template<class FilterProcessor>
3201 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3202 {
3203  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
3204  size_t num_newl = 0;
3205  size_t wpos_at_first_newl = npos;
3206  while(proc.has_more_chars(len))
3207  {
3208  const char curr = proc.curr();
3209  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3210  switch(curr)
3211  {
3212  case '\n':
3213  {
3214  _c4dbgfbf("newline. sofar={}", num_newl);
3215  // NOTE: vs2022-32bit-release builds were giving wrong
3216  // results in this block, if it was written as either
3217  // as a switch(num_newl) or its equivalent if-form.
3218  //
3219  // For this reason, we're using a dedicated function
3220  // (**_compress), which seems to work around the issue.
3221  //
3222  // The manifested problem was that somewhere between the
3223  // assignment to curr and this point, proc.wpos (the
3224  // write-position of the processor) jumped to npos, which
3225  // made the write wrap-around! To make things worse,
3226  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3227  // problem go away!
3228  //
3229  // The only way to make the problem appear with prints
3230  // enabled was by disabling all prints in this function
3231  // (including in the block which was moved to the compress
3232  // function) and then selectively enabling only some of
3233  // those prints.
3234  //
3235  // This may be due to some bug in the cl-x86 optimizer; or
3236  // it may be triggered by some UB which may be
3237  // inadvertedly present in this function or in the filter
3238  // processor. This is despite our best efforts to weed out
3239  // any such UB problem: neither clang-tidy nor none of the
3240  // sanitizers, or gcc's -fanalyzer pointed to any problems
3241  // in this code.
3242  //
3243  // In the end, moving this block to a separate function
3244  // was the only way to bury the problem. But it may
3245  // resurface again, as The Undead, rising to from the
3246  // grave to haunt us with his terrible presence.
3247  //
3248  // We may have to revisit this. With a stake, and lots of
3249  // garlic.
3250  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3251  _filter_block_indentation(proc, indentation);
3252  break;
3253  }
3254  case ' ':
3255  case '\t':
3256  {
3257  size_t first = proc.rem().first_not_of(" \t");
3258  _c4dbgfbf("space. first={}", first);
3259  if(first == npos)
3260  first = proc.rem().len;
3261  _c4dbgfbf("... indentation increased to {}", first);
3262  if(num_newl)
3263  {
3264  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3265  proc.set_at(wpos_at_first_newl, '\n');
3266  }
3267  if(num_newl > 1u)
3268  {
3269  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3270  proc.set('\n');
3271  }
3272  _filter_block_folded_indented_block(proc, indentation, len, first);
3273  num_newl = 0;
3274  wpos_at_first_newl = npos;
3275  break;
3276  }
3277  case '\r':
3278  proc.skip();
3279  break;
3280  default:
3281  _c4dbgfbf("not space, not newline. stop.", 0);
3282  return;
3283  }
3284  }
3285 }
3286 
3287 
3288 template<class EventHandler>
3289 template<class FilterProcessor>
3290 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3291 {
3292  _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3293  if(curr_indentation)
3294  proc.copy(curr_indentation);
3295  while(proc.has_more_chars(len))
3296  {
3297  const char curr = proc.curr();
3298  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3299  switch(curr)
3300  {
3301  case '\n':
3302  {
3303  proc.copy();
3304  _filter_block_indentation(proc, indentation);
3305  csubstr rem = proc.rem();
3306  const size_t first = rem.first_not_of(' ');
3307  _c4dbgfbf("newline. firstns={}", first);
3308  if(first == 0)
3309  {
3310  const char c = rem[first];
3311  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3312  if(c == '\n' || c == '\r')
3313  {
3314  ;
3315  }
3316  else
3317  {
3318  _c4dbgfbf("done with indented block", first);
3319  goto endloop;
3320  }
3321  }
3322  else if(first != npos)
3323  {
3324  proc.copy(first);
3325  _c4dbgfbf("copy all {} spaces", first);
3326  }
3327  break;
3328  }
3329  break;
3330  case '\r':
3331  proc.skip();
3332  break;
3333  default:
3334  proc.copy();
3335  break;
3336  }
3337  }
3338  endloop:
3339  return;
3340 }
3341 
3342 
3343 template<class EventHandler>
3344 template<class FilterProcessor>
3345 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3346 {
3347  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3348 
3349  size_t contents_len = _handle_all_whitespace(proc, chomp);
3350  if(!contents_len)
3351  return proc.result();
3352 
3353  contents_len = _extend_to_chomp(proc, contents_len);
3354 
3355  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3356 
3357  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3358 
3359  // now filter the bulk
3360  while(proc.has_more_chars(/*maxpos*/contents_len))
3361  {
3362  const char curr = proc.curr();
3363  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3364  switch(curr)
3365  {
3366  case '\n':
3367  {
3368  _c4dbgfbf("found newline", curr);
3369  _filter_block_folded_newlines(proc, indentation, contents_len);
3370  break;
3371  }
3372  case '\r':
3373  proc.skip();
3374  break;
3375  default:
3376  proc.copy();
3377  break;
3378  }
3379  }
3380 
3381  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3382 
3383  _filter_chomp(proc, chomp, indentation);
3384 
3385  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3386 
3387  return proc.result();
3388 }
3389 
3390 #undef _c4dbgfbf
3391 
3392 template<class EventHandler>
3393 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3394 {
3395  FilterProcessorSrcDst proc(scalar, dst);
3396  return _filter_block_folded(proc, indentation, chomp);
3397 }
3398 
3399 template<class EventHandler>
3400 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3401 {
3402  FilterProcessorInplaceEndExtending proc(scalar, cap);
3403  return _filter_block_folded(proc, indentation, chomp);
3404 }
3405 
3406 
3407 //-----------------------------------------------------------------------------
3408 //-----------------------------------------------------------------------------
3409 //-----------------------------------------------------------------------------
3410 
3411 template<class EventHandler>
3412 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3413 {
3414  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3415  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3416  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3417  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3418  return r.get();
3419 }
3420 
3421 //-----------------------------------------------------------------------------
3422 
3423 template<class EventHandler>
3424 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3425 {
3426  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3427  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3428  _RYML_CB_ASSERT(this->callbacks(), r.valid());
3429  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3430  return r.get();
3431 }
3432 
3433 
3434 //-----------------------------------------------------------------------------
3435 
3436 template<class EventHandler>
3437 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3438 {
3439  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3440  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3441  if(C4_LIKELY(r.valid()))
3442  {
3443  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3444  return r.get();
3445  }
3446  else
3447  {
3448  const size_t len = r.required_len();
3449  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3450  substr dst = m_evt_handler->alloc_arena(len, &s);
3451  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3452  _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3453  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3454  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3455  _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3456  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3457  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3458  return rsd.get();
3459  }
3460 }
3461 
3462 
3463 //-----------------------------------------------------------------------------
3464 template<class EventHandler>
3465 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3466 {
3467  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3468  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3469  if(C4_LIKELY(r.valid()))
3470  {
3471  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3472  return r.get();
3473  }
3474  else
3475  {
3476  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3477  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3478  FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3479  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3480  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3481  return rsd.get();
3482  }
3483 }
3484 
3485 
3486 //-----------------------------------------------------------------------------
3487 template<class EventHandler>
3488 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3489 {
3490  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3491  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3492  if(C4_LIKELY(r.valid()))
3493  {
3494  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3495  return r.get();
3496  }
3497  else
3498  {
3499  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3500  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3501  FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3502  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3503  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3504  return rsd.get();
3505  }
3506 }
3507 
3508 
3509 //-----------------------------------------------------------------------------
3510 
3511 template<class EventHandler>
3512 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3513 {
3514  csubstr maybe_filtered = sc.scalar;
3515  if(sc.needs_filter)
3516  {
3517  if(m_options.scalar_filtering())
3518  {
3519  maybe_filtered = _filter_scalar_plain(sc.scalar, indentation);
3520  }
3521  else
3522  {
3523  _c4dbgp("plain scalar left unfiltered");
3524  m_evt_handler->mark_key_scalar_unfiltered();
3525  }
3526  }
3527  else
3528  {
3529  _c4dbgp("plain scalar doesn't need filtering");
3530  }
3531  return maybe_filtered;
3532 }
3533 
3534 template<class EventHandler>
3535 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3536 {
3537  csubstr maybe_filtered = sc.scalar;
3538  if(sc.needs_filter)
3539  {
3540  if(m_options.scalar_filtering())
3541  {
3542  maybe_filtered = _filter_scalar_plain(sc.scalar, indentation);
3543  }
3544  else
3545  {
3546  _c4dbgp("plain scalar left unfiltered");
3547  m_evt_handler->mark_val_scalar_unfiltered();
3548  }
3549  }
3550  else
3551  {
3552  _c4dbgp("plain scalar doesn't need filtering");
3553  }
3554  return maybe_filtered;
3555 }
3556 
3557 
3558 //-----------------------------------------------------------------------------
3559 
3560 template<class EventHandler>
3561 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3562 {
3563  csubstr maybe_filtered = sc.scalar;
3564  if(sc.needs_filter)
3565  {
3566  if(m_options.scalar_filtering())
3567  {
3568  maybe_filtered = _filter_scalar_squot(sc.scalar);
3569  }
3570  else
3571  {
3572  _c4dbgp("squo key scalar left unfiltered");
3573  m_evt_handler->mark_key_scalar_unfiltered();
3574  }
3575  }
3576  else
3577  {
3578  _c4dbgp("squo key scalar doesn't need filtering");
3579  }
3580  return maybe_filtered;
3581 }
3582 
3583 template<class EventHandler>
3584 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3585 {
3586  csubstr maybe_filtered = sc.scalar;
3587  if(sc.needs_filter)
3588  {
3589  if(m_options.scalar_filtering())
3590  {
3591  maybe_filtered = _filter_scalar_squot(sc.scalar);
3592  }
3593  else
3594  {
3595  _c4dbgp("squo val scalar left unfiltered");
3596  m_evt_handler->mark_val_scalar_unfiltered();
3597  }
3598  }
3599  else
3600  {
3601  _c4dbgp("squo val scalar doesn't need filtering");
3602  }
3603  return maybe_filtered;
3604 }
3605 
3606 
3607 //-----------------------------------------------------------------------------
3608 
3609 template<class EventHandler>
3610 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3611 {
3612  csubstr maybe_filtered = sc.scalar;
3613  if(sc.needs_filter)
3614  {
3615  if(m_options.scalar_filtering())
3616  {
3617  maybe_filtered = _filter_scalar_dquot(sc.scalar);
3618  }
3619  else
3620  {
3621  _c4dbgp("dquo scalar left unfiltered");
3622  m_evt_handler->mark_key_scalar_unfiltered();
3623  }
3624  }
3625  else
3626  {
3627  _c4dbgp("dquo scalar doesn't need filtering");
3628  }
3629  return maybe_filtered;
3630 }
3631 
3632 template<class EventHandler>
3633 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3634 {
3635  csubstr maybe_filtered = sc.scalar;
3636  if(sc.needs_filter)
3637  {
3638  if(m_options.scalar_filtering())
3639  {
3640  maybe_filtered = _filter_scalar_dquot(sc.scalar);
3641  }
3642  else
3643  {
3644  _c4dbgp("dquo scalar left unfiltered");
3645  m_evt_handler->mark_val_scalar_unfiltered();
3646  }
3647  }
3648  else
3649  {
3650  _c4dbgp("dquo scalar doesn't need filtering");
3651  }
3652  return maybe_filtered;
3653 }
3654 
3655 
3656 //-----------------------------------------------------------------------------
3657 
3658 template<class EventHandler>
3659 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3660 {
3661  csubstr maybe_filtered = sb.scalar;
3662  if(m_options.scalar_filtering())
3663  {
3664  maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3665  }
3666  else
3667  {
3668  _c4dbgp("literal scalar left unfiltered");
3669  m_evt_handler->mark_key_scalar_unfiltered();
3670  }
3671  return maybe_filtered;
3672 }
3673 
3674 template<class EventHandler>
3675 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3676 {
3677  csubstr maybe_filtered = sb.scalar;
3678  if(m_options.scalar_filtering())
3679  {
3680  maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3681  }
3682  else
3683  {
3684  _c4dbgp("literal scalar left unfiltered");
3685  m_evt_handler->mark_val_scalar_unfiltered();
3686  }
3687  return maybe_filtered;
3688 }
3689 
3690 
3691 //-----------------------------------------------------------------------------
3692 
3693 template<class EventHandler>
3694 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3695 {
3696  csubstr maybe_filtered = sb.scalar;
3697  if(m_options.scalar_filtering())
3698  {
3699  maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3700  }
3701  else
3702  {
3703  _c4dbgp("folded scalar left unfiltered");
3704  m_evt_handler->mark_key_scalar_unfiltered();
3705  }
3706  return maybe_filtered;
3707 }
3708 
3709 template<class EventHandler>
3710 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3711 {
3712  csubstr maybe_filtered = sb.scalar;
3713  if(m_options.scalar_filtering())
3714  {
3715  maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3716  }
3717  else
3718  {
3719  _c4dbgp("folded scalar left unfiltered");
3720  m_evt_handler->mark_val_scalar_unfiltered();
3721  }
3722  return maybe_filtered;
3723 }
3724 
3725 
3726 //-----------------------------------------------------------------------------
3727 //-----------------------------------------------------------------------------
3728 //-----------------------------------------------------------------------------
3729 
3730 #ifdef RYML_DBG // !!! <----------------------------------
3731 
3732 template<class EventHandler>
3733 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3734 {
3735  char buf1_[64], buf2_[64], buf3_[64];
3736  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3737  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3738  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3739  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3740  s->flags |= on;
3741 }
3742 
3743 template<class EventHandler>
3744 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3745 {
3746  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3747  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3748  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3749  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3750  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3751  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3752  s->flags |= on;
3753  s->flags &= ~off;
3754 }
3755 
3756 template<class EventHandler>
3757 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3758 {
3759  char buf1_[64], buf2_[64], buf3_[64];
3760  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3761  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3762  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3763  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3764  s->flags &= ~off;
3765 }
3766 
3767 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3768 {
3769  size_t pos = 0;
3770  bool gotone = false;
3771 
3772  #define _prflag(fl) \
3773  if((flags & fl) == (fl)) \
3774  { \
3775  if(gotone) \
3776  { \
3777  if(pos + 1 < buf.len) \
3778  buf[pos] = '|'; \
3779  ++pos; \
3780  } \
3781  csubstr fltxt = #fl; \
3782  if(pos + fltxt.len <= buf.len) \
3783  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3784  pos += fltxt.len; \
3785  gotone = true; \
3786  }
3787 
3788  _prflag(RTOP);
3789  _prflag(RUNK);
3790  _prflag(RMAP);
3791  _prflag(RSEQ);
3792  _prflag(FLOW);
3793  _prflag(BLCK);
3794  _prflag(QMRK);
3795  _prflag(RKEY);
3796  _prflag(RVAL);
3797  _prflag(RKCL);
3798  _prflag(RNXT);
3799  _prflag(SSCL);
3800  _prflag(QSCL);
3801  _prflag(RSET);
3802  _prflag(RDOC);
3803  _prflag(NDOC);
3804  _prflag(USTY);
3805  _prflag(RSEQIMAP);
3806 
3807  #undef _prflag
3808 
3809  if(pos == 0)
3810  if(buf.len > 0)
3811  buf[pos++] = '0';
3812 
3813  RYML_CHECK(pos <= buf.len);
3814 
3815  return buf.first(pos);
3816 }
3817 
3818 #endif // RYML_DBG !!! <----------------------------------
3819 
3820 
3821 //-----------------------------------------------------------------------------
3822 //-----------------------------------------------------------------------------
3823 //-----------------------------------------------------------------------------
3824 
3825 template<class EventHandler>
3827 {
3828  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3829  return m_buf.sub(loc.offset);
3830 }
3831 
3832 template<class EventHandler>
3834 {
3835  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
3836  return location(*node.tree(), node.id());
3837 }
3838 
3839 template<class EventHandler>
3841 {
3842  // try hard to avoid getting the location from a null string.
3843  Location loc;
3844  if(_location_from_node(tree, node, &loc, 0))
3845  return loc;
3846  return val_location(m_buf.str);
3847 }
3848 
3849 template<class EventHandler>
3850 bool ParseEngine<EventHandler>::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const
3851 {
3852  if(tree.has_key(node))
3853  {
3854  csubstr k = tree.key(node);
3855  if(C4_LIKELY(k.str != nullptr))
3856  {
3857  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3858  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3859  *loc = val_location(k.str);
3860  return true;
3861  }
3862  }
3863 
3864  if(tree.has_val(node))
3865  {
3866  csubstr v = tree.val(node);
3867  if(C4_LIKELY(v.str != nullptr))
3868  {
3869  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3870  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3871  *loc = val_location(v.str);
3872  return true;
3873  }
3874  }
3875 
3876  if(tree.is_container(node))
3877  {
3878  if(_location_from_cont(tree, node, loc))
3879  return true;
3880  }
3881 
3882  if(tree.type(node) != NOTYPE && level == 0)
3883  {
3884  // try the prev sibling
3885  {
3886  const id_type prev = tree.prev_sibling(node);
3887  if(prev != NONE)
3888  {
3889  if(_location_from_node(tree, prev, loc, level+1))
3890  return true;
3891  }
3892  }
3893  // try the next sibling
3894  {
3895  const id_type next = tree.next_sibling(node);
3896  if(next != NONE)
3897  {
3898  if(_location_from_node(tree, next, loc, level+1))
3899  return true;
3900  }
3901  }
3902  // try the parent
3903  {
3904  const id_type parent = tree.parent(node);
3905  if(parent != NONE)
3906  {
3907  if(_location_from_node(tree, parent, loc, level+1))
3908  return true;
3909  }
3910  }
3911  }
3912 
3913  return false;
3914 }
3915 
3916 template<class EventHandler>
3917 bool ParseEngine<EventHandler>::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const
3918 {
3919  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3920  if(!tree.is_stream(node))
3921  {
3922  const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container
3923  if(tree.has_children(node))
3924  {
3925  id_type child = tree.first_child(node);
3926  if(tree.has_key(child))
3927  {
3928  // when a map starts, the container was set after the key
3929  csubstr k = tree.key(child);
3930  if(k.str && node_start > k.str)
3931  node_start = k.str;
3932  }
3933  }
3934  *loc = val_location(node_start);
3935  return true;
3936  }
3937  else // it's a stream
3938  {
3939  *loc = val_location(m_buf.str); // just return the front of the buffer
3940  }
3941  return true;
3942 }
3943 
3944 
3945 template<class EventHandler>
3947 {
3948  if(C4_UNLIKELY(val == nullptr))
3949  return {m_file, 0, 0, 0};
3950  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3951  // NOTE: if any of these checks fails, the parser needs to be
3952  // instantiated with locations enabled.
3953  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3954  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3955  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3956  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3957  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
3958  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3959  // NOTE: the pointer needs to belong to the buffer that was used to parse.
3960  csubstr src = m_buf;
3961  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
3962  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
3963  // ok. search the first stored newline after the given ptr
3964  using lineptr_type = size_t const* C4_RESTRICT;
3965  lineptr_type lineptr = nullptr;
3966  size_t offset = (size_t)(val - src.begin());
3967  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
3968  {
3969  // just do a linear search if the size is small.
3970  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3971  {
3972  if(*curr > offset)
3973  {
3974  lineptr = curr;
3975  break;
3976  }
3977  }
3978  }
3979  else
3980  {
3981  // do a bisection search if the size is not small.
3982  //
3983  // We could use std::lower_bound but this is simple enough and
3984  // spares the costly include of <algorithm>.
3985  size_t count = m_newline_offsets_size;
3986  size_t step;
3987  lineptr_type it;
3988  lineptr = m_newline_offsets;
3989  while(count)
3990  {
3991  step = count >> 1;
3992  it = lineptr + step;
3993  if(*it < offset)
3994  {
3995  lineptr = ++it;
3996  count -= step + 1;
3997  }
3998  else
3999  {
4000  count = step;
4001  }
4002  }
4003  }
4004  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4005  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4006  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4007  Location loc;
4008  loc.name = m_file;
4009  loc.offset = offset;
4010  loc.line = (size_t)(lineptr - m_newline_offsets);
4011  if(lineptr > m_newline_offsets)
4012  loc.col = (offset - *(lineptr-1) - 1u);
4013  else
4014  loc.col = offset;
4015  return loc;
4016 }
4017 
4018 template<class EventHandler>
4020 {
4021  m_newline_offsets_buf = m_buf;
4022  size_t numnewlines = 1u + m_buf.count('\n');
4023  _resize_locations(numnewlines);
4024  m_newline_offsets_size = 0;
4025  for(size_t i = 0; i < m_buf.len; i++)
4026  if(m_buf[i] == '\n')
4027  m_newline_offsets[m_newline_offsets_size++] = i;
4028  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4029  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4030 }
4031 
4032 template<class EventHandler>
4033 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4034 {
4035  if(numnewlines > m_newline_offsets_capacity)
4036  {
4037  if(m_newline_offsets)
4038  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4039  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4040  m_newline_offsets_capacity = numnewlines;
4041  }
4042 }
4043 
4044 template<class EventHandler>
4045 bool ParseEngine<EventHandler>::_locations_dirty() const
4046 {
4047  return !m_newline_offsets_size;
4048 }
4049 
4050 
4051 //-----------------------------------------------------------------------------
4052 //-----------------------------------------------------------------------------
4053 //-----------------------------------------------------------------------------
4054 
4055 template<class EventHandler>
4056 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4057 {
4058  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4059  {
4060  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4061  if(rem.str[0] == ' ' || rem.str[0] == '\t')
4062  {
4063  _c4dbgpf("starts with whitespace: '{}'", _c4prc(rem.str[0]));
4064  _skipchars(" \t");
4065  rem = m_evt_handler->m_curr->line_contents.rem;
4066  }
4067  // comments
4068  if(rem.begins_with('#'))
4069  {
4070  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4071  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4072  }
4073  }
4074 }
4075 
4076 
4077 //-----------------------------------------------------------------------------
4078 
4079 
4080 template<class EventHandler>
4081 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4082 {
4083  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4084  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4085  _c4err("too many annotations");
4086  dst->annotations[dst->num_entries].str = str;
4087  dst->annotations[dst->num_entries].indentation = indentation;
4088  dst->annotations[dst->num_entries].line = line;
4089  ++dst->num_entries;
4090 }
4091 
4092 template<class EventHandler>
4093 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4094 {
4095  dst->num_entries = 0;
4096 }
4097 
4098 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4099 template<class EventHandler>
4100 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4101 {
4102  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4103  {
4104  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4105  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4106  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4107  size_t to_skip = m_evt_handler->m_curr->indref;
4108  if(m_pending_anchors.num_entries)
4109  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4110  if(m_pending_tags.num_entries)
4111  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4112  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4113  _maybe_skipchars_up_to(' ', to_skip);
4114  return true;
4115  }
4116  return false;
4117 }
4118 #endif
4119 
4120 template<class EventHandler>
4121 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4122 {
4123  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4124 }
4125 
4126 template<class EventHandler>
4127 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4128 {
4129  if(!tag.begins_with("!<"))
4130  {
4131  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4132  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4133  }
4134  else
4135  {
4136  if(C4_UNLIKELY(!tag.ends_with('>')))
4137  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos);
4138  }
4139 }
4140 
4141 template<class EventHandler>
4142 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4143 {
4144  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4145  if(m_pending_tags.num_entries)
4146  {
4147  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4148  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4149  {
4150  _check_tag(m_pending_tags.annotations[0].str);
4151  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4152  _clear_annotations(&m_pending_tags);
4153  }
4154  else
4155  {
4156  _c4err("too many tags");
4157  }
4158  }
4159  if(m_pending_anchors.num_entries)
4160  {
4161  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4162  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4163  {
4164  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4165  _clear_annotations(&m_pending_anchors);
4166  }
4167  else
4168  {
4169  _c4err("too many anchors");
4170  }
4171  }
4172 }
4173 
4174 template<class EventHandler>
4175 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4176 {
4177  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4178  if(m_pending_tags.num_entries)
4179  {
4180  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4181  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4182  {
4183  _check_tag(m_pending_tags.annotations[0].str);
4184  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4185  _clear_annotations(&m_pending_tags);
4186  }
4187  else
4188  {
4189  _c4err("too many tags");
4190  }
4191  }
4192  if(m_pending_anchors.num_entries)
4193  {
4194  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4195  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4196  {
4197  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4198  _clear_annotations(&m_pending_anchors);
4199  }
4200  else
4201  {
4202  _c4err("too many anchors");
4203  }
4204  }
4205 }
4206 
4207 template<class EventHandler>
4208 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4209 {
4210  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4211  if(m_pending_tags.num_entries == 2)
4212  {
4213  _c4dbgp("2 tags, setting entry 0");
4214  _check_tag(m_pending_tags.annotations[0].str);
4215  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4216  }
4217  else if(m_pending_tags.num_entries == 1)
4218  {
4219  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4220  if(m_pending_tags.annotations[0].line < current_line)
4221  {
4222  _c4dbgp("...tag is for the map. setting it.");
4223  _check_tag(m_pending_tags.annotations[0].str);
4224  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4225  _clear_annotations(&m_pending_tags);
4226  }
4227  }
4228  //
4229  if(m_pending_anchors.num_entries == 2)
4230  {
4231  _c4dbgp("2 anchors, setting entry 0");
4232  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4233  }
4234  else if(m_pending_anchors.num_entries == 1)
4235  {
4236  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4237  if(m_pending_anchors.annotations[0].line < current_line)
4238  {
4239  _c4dbgp("...anchor is for the map. setting it.");
4240  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4241  _clear_annotations(&m_pending_anchors);
4242  }
4243  }
4244 }
4245 
4246 template<class EventHandler>
4247 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4248 {
4249  _c4dbgp("annotations_before_start_mapblck_as_key");
4250  if(m_pending_tags.num_entries == 2)
4251  {
4252  _check_tag(m_pending_tags.annotations[0].str);
4253  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4254  }
4255  if(m_pending_anchors.num_entries == 2)
4256  {
4257  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4258  }
4259 }
4260 
4261 template<class EventHandler>
4262 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4263 {
4264  _c4dbgp("annotations_after_start_mapblck");
4265  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4266  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4267  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4268  {
4269  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4270  switch(m_pending_tags.num_entries)
4271  {
4272  case 1u:
4273  _check_tag(m_pending_tags.annotations[0].str);
4274  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4275  _clear_annotations(&m_pending_tags);
4276  break;
4277  case 2u:
4278  _check_tag(m_pending_tags.annotations[1].str);
4279  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4280  _clear_annotations(&m_pending_tags);
4281  break;
4282  }
4283  switch(m_pending_anchors.num_entries)
4284  {
4285  case 1u:
4286  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4287  _clear_annotations(&m_pending_anchors);
4288  break;
4289  case 2u:
4290  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4291  _clear_annotations(&m_pending_anchors);
4292  break;
4293  }
4294  }
4295  _set_indentation(key_indentation);
4296 }
4297 
4298 template<class EventHandler>
4299 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4300 {
4301  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4302  // select the left-most annotation on the max line
4303  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4304  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4305  {
4306  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4307  if(ann.line > curr->line)
4308  curr = &ann;
4309  else if(ann.indentation < curr->indentation)
4310  curr = &ann;
4311  }
4312  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4313  {
4314  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4315  if(ann.line > curr->line)
4316  curr = &ann;
4317  else if(ann.indentation < curr->indentation)
4318  curr = &ann;
4319  }
4320  return curr->line < val_line ? val_indentation : curr->indentation;
4321 }
4322 
4323 template<class EventHandler>
4324 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4325 {
4326  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4327  const size_t pos = rem.find('#');
4328  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4329  if(pos == npos) // no comments
4330  {
4331  m_evt_handler->add_directive(rem);
4332  _line_progressed(rem.len);
4333  }
4334  else
4335  {
4336  csubstr to_comment = rem.first(pos);
4337  csubstr trimmed = to_comment.trimr(" \t");
4338  m_evt_handler->add_directive(trimmed);
4339  _line_progressed(pos);
4340  _skip_comment();
4341  }
4342 }
4343 
4344 
4345 //-----------------------------------------------------------------------------
4346 
4347 template<class EventHandler>
4348 void ParseEngine<EventHandler>::_handle_seq_json()
4349 {
4350 seqjson_start:
4351  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4352 
4353  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4354  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4355  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4356  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4357  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4358 
4359  _handle_flow_skip_whitespace();
4360  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4361  if(!rem.len)
4362  goto seqjson_again;
4363 
4364  if(has_any(RVAL))
4365  {
4366  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4367  const char first = rem.str[0];
4368  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4369  switch(first)
4370  {
4371  case '"':
4372  {
4373  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4374  ScannedScalar sc = _scan_scalar_dquot();
4375  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4376  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4377  addrem_flags(RNXT, RVAL);
4378  break;
4379  }
4380  case '[':
4381  {
4382  _c4dbgp("seqjson[RVAL]: start child seqjson");
4383  addrem_flags(RNXT, RVAL);
4384  m_evt_handler->begin_seq_val_flow();
4385  addrem_flags(RVAL, RNXT);
4386  _line_progressed(1);
4387  break;
4388  }
4389  case '{':
4390  {
4391  _c4dbgp("seqjson[RVAL]: start child mapjson");
4392  addrem_flags(RNXT, RVAL);
4393  m_evt_handler->begin_map_val_flow();
4394  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4395  _line_progressed(1);
4396  goto seqjson_finish;
4397  }
4398  case ']': // this happens on a trailing comma like ", ]"
4399  {
4400  _c4dbgp("seqjson[RVAL]: end!");
4401  rem_flags(RSEQ);
4402  m_evt_handler->end_seq();
4403  _line_progressed(1);
4404  if(!has_all(RSEQ|FLOW))
4405  goto seqjson_finish;
4406  break;
4407  }
4408  default:
4409  {
4410  ScannedScalar sc;
4411  if(_scan_scalar_seq_json(&sc))
4412  {
4413  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4414  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4415  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4416  addrem_flags(RNXT, RVAL);
4417  }
4418  else
4419  {
4420  _c4err("parse error");
4421  }
4422  }
4423  }
4424  }
4425  else // RNXT
4426  {
4427  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4428  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4429  const char first = rem.str[0];
4430  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4431  switch(first)
4432  {
4433  case ',':
4434  {
4435  _c4dbgp("seqjson[RNXT]: expect next val");
4436  addrem_flags(RVAL, RNXT);
4437  m_evt_handler->add_sibling();
4438  _line_progressed(1);
4439  break;
4440  }
4441  case ']':
4442  {
4443  _c4dbgp("seqjson[RNXT]: end!");
4444  m_evt_handler->end_seq();
4445  _line_progressed(1);
4446  goto seqjson_finish;
4447  }
4448  default:
4449  _c4err("parse error");
4450  }
4451  }
4452 
4453  seqjson_again:
4454  _c4dbgt("seqjson: go again", 0);
4455  if(_finished_line())
4456  {
4457  if(C4_LIKELY(!_finished_file()))
4458  {
4459  _line_ended();
4460  _scan_line();
4461  _c4dbgnextline();
4462  }
4463  else
4464  {
4465  _c4err("missing terminating ]");
4466  }
4467  }
4468  goto seqjson_start;
4469 
4470  seqjson_finish:
4471  _c4dbgp("seqjson: finish");
4472 }
4473 
4474 
4475 //-----------------------------------------------------------------------------
4476 
4477 template<class EventHandler>
4478 void ParseEngine<EventHandler>::_handle_map_json()
4479 {
4480 mapjson_start:
4481  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4482 
4483  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4484  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4485  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4486  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4487  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4488 
4489  _handle_flow_skip_whitespace();
4490  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4491  if(!rem.len)
4492  goto mapjson_again;
4493 
4494  if(has_any(RKEY))
4495  {
4496  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4497  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4498  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4499  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4500  const char first = rem.str[0];
4501  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4502  switch(first)
4503  {
4504  case '"':
4505  {
4506  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4507  ScannedScalar sc = _scan_scalar_dquot();
4508  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4509  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4510  addrem_flags(RKCL, RKEY);
4511  break;
4512  }
4513  case '}': // this happens on a trailing comma like ", }"
4514  {
4515  _c4dbgp("mapjson[RKEY]: end!");
4516  m_evt_handler->end_map();
4517  _line_progressed(1);
4518  goto mapjson_finish;
4519  }
4520  default:
4521  _c4err("parse error");
4522  }
4523  }
4524  else if(has_any(RVAL))
4525  {
4526  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4527  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4528  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4529  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4530  const char first = rem.str[0];
4531  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4532  switch(first)
4533  {
4534  case '"':
4535  {
4536  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4537  ScannedScalar sc = _scan_scalar_dquot();
4538  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4539  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4540  addrem_flags(RNXT, RVAL);
4541  break;
4542  }
4543  case '[':
4544  {
4545  _c4dbgp("mapjson[RVAL]: start val seqjson");
4546  addrem_flags(RNXT, RVAL);
4547  m_evt_handler->begin_seq_val_flow();
4548  _set_indentation(m_evt_handler->m_parent->indref);
4549  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4550  _line_progressed(1);
4551  goto mapjson_finish;
4552  }
4553  case '{':
4554  {
4555  _c4dbgp("mapjson[RVAL]: start val mapjson");
4556  addrem_flags(RNXT, RVAL);
4557  m_evt_handler->begin_map_val_flow();
4558  _set_indentation(m_evt_handler->m_parent->indref);
4559  addrem_flags(RKEY, RNXT);
4560  _line_progressed(1);
4561  // keep going in this function
4562  break;
4563  }
4564  default:
4565  {
4566  ScannedScalar sc;
4567  if(_scan_scalar_map_json(&sc))
4568  {
4569  _c4dbgp("mapjson[RVAL]: plain scalar.");
4570  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4571  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4572  addrem_flags(RNXT, RVAL);
4573  }
4574  else
4575  {
4576  _c4err("parse error");
4577  }
4578  break;
4579  }
4580  }
4581  }
4582  else if(has_any(RKCL)) // read the key colon
4583  {
4584  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4585  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4586  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4587  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4588  const char first = rem.str[0];
4589  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4590  if(first == ':')
4591  {
4592  _c4dbgp("mapjson[RKCL]: found the colon");
4593  addrem_flags(RVAL, RKCL);
4594  _line_progressed(1);
4595  }
4596  else
4597  {
4598  _c4err("parse error");
4599  }
4600  }
4601  else if(has_any(RNXT))
4602  {
4603  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4604  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4605  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4606  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4607  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4608  if(rem.begins_with(','))
4609  {
4610  _c4dbgp("mapjson[RNXT]: expect next keyval");
4611  m_evt_handler->add_sibling();
4612  addrem_flags(RKEY, RNXT);
4613  _line_progressed(1);
4614  }
4615  else if(rem.begins_with('}'))
4616  {
4617  _c4dbgp("mapjson[RNXT]: end!");
4618  m_evt_handler->end_map();
4619  _line_progressed(1);
4620  goto mapjson_finish;
4621  }
4622  else
4623  {
4624  _c4err("parse error");
4625  }
4626  }
4627 
4628  mapjson_again:
4629  _c4dbgt("mapjson: go again", 0);
4630  if(_finished_line())
4631  {
4632  if(C4_LIKELY(!_finished_file()))
4633  {
4634  _line_ended();
4635  _scan_line();
4636  _c4dbgnextline();
4637  }
4638  else
4639  {
4640  _c4err("missing terminating }");
4641  }
4642  }
4643  goto mapjson_start;
4644 
4645  mapjson_finish:
4646  _c4dbgp("mapjson: finish");
4647 }
4648 
4649 
4650 //-----------------------------------------------------------------------------
4651 
4652 template<class EventHandler>
4653 void ParseEngine<EventHandler>::_handle_seq_imap()
4654 {
4655 seqimap_start:
4656  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4657 
4658  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4659  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4660  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4661  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4662  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4663 
4664  _handle_flow_skip_whitespace();
4665  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4666  if(!rem.len)
4667  goto seqimap_again;
4668 
4669  if(has_any(RVAL))
4670  {
4671  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4672  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4673  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4674  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4675  const char first = rem.str[0];
4676  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4677  ScannedScalar sc;
4678  if(first == '\'')
4679  {
4680  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4681  sc = _scan_scalar_squot();
4682  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4683  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4684  m_evt_handler->end_map();
4685  goto seqimap_finish;
4686  }
4687  else if(first == '"')
4688  {
4689  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4690  sc = _scan_scalar_dquot();
4691  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4692  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4693  m_evt_handler->end_map();
4694  goto seqimap_finish;
4695  }
4696  // block scalars (ie | and >) cannot appear in flow containers
4697  else if(_scan_scalar_plain_map_flow(&sc))
4698  {
4699  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4700  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4701  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4702  m_evt_handler->end_map();
4703  goto seqimap_finish;
4704  }
4705  else if(first == '[')
4706  {
4707  _c4dbgp("seqimap[RVAL]: start child seqflow");
4708  addrem_flags(RNXT, RVAL);
4709  m_evt_handler->begin_seq_val_flow();
4710  addrem_flags(RVAL, RNXT|RSEQIMAP);
4711  _set_indentation(m_evt_handler->m_parent->indref);
4712  _line_progressed(1);
4713  goto seqimap_finish;
4714  }
4715  else if(first == '{')
4716  {
4717  _c4dbgp("seqimap[RVAL]: start child mapflow");
4718  addrem_flags(RNXT, RVAL);
4719  m_evt_handler->begin_map_val_flow();
4720  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4721  _set_indentation(m_evt_handler->m_parent->indref);
4722  _line_progressed(1);
4723  goto seqimap_finish;
4724  }
4725  else if(first == ',' || first == ']')
4726  {
4727  _c4dbgp("seqimap[RVAL]: finish without val.");
4728  m_evt_handler->set_val_scalar_plain({});
4729  m_evt_handler->end_map();
4730  goto seqimap_finish;
4731  }
4732  else if(first == '&')
4733  {
4734  csubstr anchor = _scan_anchor();
4735  _c4dbgp("seqimap[RVAL]: anchor!");
4736  m_evt_handler->set_val_anchor(anchor);
4737  }
4738  else if(first == '*')
4739  {
4740  csubstr ref = _scan_ref_seq();
4741  _c4dbgp("seqimap[RVAL]: ref!");
4742  m_evt_handler->set_val_ref(ref);
4743  addrem_flags(RNXT, RVAL);
4744  }
4745  else
4746  {
4747  _c4err("parse error");
4748  }
4749  }
4750  else if(has_any(RNXT))
4751  {
4752  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4753  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4754  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4755  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4756  const char first = rem.str[0];
4757  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4758  if(first == ',' || first == ']')
4759  {
4760  // we may get here because a map or a seq started and we
4761  // return later
4762  _c4dbgp("seqimap: done");
4763  m_evt_handler->end_map();
4764  goto seqimap_finish;
4765  }
4766  else
4767  {
4768  _c4err("parse error");
4769  }
4770  }
4771  else if(has_any(QMRK))
4772  {
4773  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4774  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4775  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4776  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4777  const char first = rem.str[0];
4778  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4779  ScannedScalar sc;
4780  if(first == '\'')
4781  {
4782  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4783  sc = _scan_scalar_squot();
4784  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4785  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4786  addrem_flags(RKCL, QMRK);
4787  goto seqimap_again;
4788  }
4789  else if(first == '"')
4790  {
4791  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4792  sc = _scan_scalar_dquot();
4793  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4794  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4795  addrem_flags(RKCL, QMRK);
4796  goto seqimap_again;
4797  }
4798  // block scalars (ie | and >) cannot appear in flow containers
4799  else if(_scan_scalar_plain_map_flow(&sc))
4800  {
4801  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4802  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4803  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4804  addrem_flags(RKCL, QMRK);
4805  goto seqimap_again;
4806  }
4807  else if(first == '[')
4808  {
4809  _c4dbgp("seqimap[QMRK]: start child seqflow");
4810  addrem_flags(RKCL, QMRK);
4811  m_evt_handler->begin_seq_key_flow();
4812  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4813  _set_indentation(m_evt_handler->m_parent->indref);
4814  _line_progressed(1);
4815  goto seqimap_finish;
4816  }
4817  else if(first == '{')
4818  {
4819  _c4dbgp("seqimap[QMRK]: start child mapflow");
4820  addrem_flags(RKCL, QMRK);
4821  m_evt_handler->begin_map_key_flow();
4822  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4823  _set_indentation(m_evt_handler->m_parent->indref);
4824  _line_progressed(1);
4825  goto seqimap_finish;
4826  }
4827  else if(first == ',' || first == ']')
4828  {
4829  _c4dbgp("seqimap[QMRK]: finish without key.");
4830  m_evt_handler->set_key_scalar_plain({});
4831  m_evt_handler->set_val_scalar_plain({});
4832  m_evt_handler->end_map();
4833  goto seqimap_finish;
4834  }
4835  else if(first == '&')
4836  {
4837  csubstr anchor = _scan_anchor();
4838  _c4dbgp("seqimap[QMRK]: anchor!");
4839  m_evt_handler->set_key_anchor(anchor);
4840  }
4841  else if(first == '*')
4842  {
4843  csubstr ref = _scan_ref_seq();
4844  _c4dbgp("seqimap[QMRK]: ref!");
4845  m_evt_handler->set_key_ref(ref);
4846  addrem_flags(RKCL, QMRK);
4847  }
4848  else
4849  {
4850  _c4err("parse error");
4851  }
4852  }
4853  else if(has_any(RKCL))
4854  {
4855  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4856  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4857  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4858  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4859  const char first = rem.str[0];
4860  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4861  if(first == ':')
4862  {
4863  _c4dbgp("seqimap[RKCL]: found ':'");
4864  addrem_flags(RVAL, RKCL);
4865  _line_progressed(1);
4866  goto seqimap_again;
4867  }
4868  else if(first == ',' || first == ']')
4869  {
4870  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
4871  m_evt_handler->set_val_scalar_plain({});
4872  m_evt_handler->end_map();
4873  goto seqimap_finish;
4874  }
4875  else
4876  {
4877  _c4err("parse error");
4878  }
4879  }
4880 
4881  seqimap_again:
4882  _c4dbgt("seqimap: go again", 0);
4883  if(_finished_line())
4884  {
4885  if(C4_LIKELY(!_finished_file()))
4886  {
4887  _line_ended();
4888  _scan_line();
4889  _c4dbgnextline();
4890  }
4891  else
4892  {
4893  _c4err("parse error");
4894  }
4895  }
4896  goto seqimap_start;
4897 
4898  seqimap_finish:
4899  _c4dbgp("seqimap: finish");
4900 }
4901 
4902 
4903 //-----------------------------------------------------------------------------
4904 
4905 template<class EventHandler>
4906 void ParseEngine<EventHandler>::_handle_seq_flow()
4907 {
4908 seqflow_start:
4909  _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4910 
4911  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4912  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4913  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4914  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4915  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4916  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
4917 
4918  _handle_flow_skip_whitespace();
4919  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4920  if(!rem.len)
4921  goto seqflow_again;
4922 
4923  if(has_any(RVAL))
4924  {
4925  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4926  const char first = rem.str[0];
4927  ScannedScalar sc;
4928  if(first == '\'')
4929  {
4930  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
4931  sc = _scan_scalar_squot();
4932  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4933  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4934  addrem_flags(RNXT, RVAL);
4935  }
4936  else if(first == '"')
4937  {
4938  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
4939  sc = _scan_scalar_dquot();
4940  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4941  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4942  addrem_flags(RNXT, RVAL);
4943  }
4944  // block scalars (ie | and >) cannot appear in flow containers
4945  else if(_scan_scalar_plain_seq_flow(&sc))
4946  {
4947  _c4dbgp("seqflow[RVAL]: it's a scalar.");
4948  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4949  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4950  addrem_flags(RNXT, RVAL);
4951  }
4952  else if(first == '[')
4953  {
4954  _c4dbgp("seqflow[RVAL]: start child seqflow");
4955  addrem_flags(RNXT, RVAL);
4956  m_evt_handler->begin_seq_val_flow();
4957  _set_indentation(m_evt_handler->m_parent->indref);
4958  addrem_flags(RVAL, RNXT);
4959  _line_progressed(1);
4960  }
4961  else if(first == '{')
4962  {
4963  _c4dbgp("seqflow[RVAL]: start child mapflow");
4964  addrem_flags(RNXT, RVAL);
4965  m_evt_handler->begin_map_val_flow();
4966  _set_indentation(m_evt_handler->m_parent->indref);
4967  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4968  _line_progressed(1);
4969  goto seqflow_finish;
4970  }
4971  else if(first == ']') // this happens on a trailing comma like ", ]"
4972  {
4973  _c4dbgp("seqflow[RVAL]: end!");
4974  _line_progressed(1);
4975  m_evt_handler->end_seq();
4976  goto seqflow_finish;
4977  }
4978  else if(first == '*')
4979  {
4980  csubstr ref = _scan_ref_seq();
4981  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
4982  m_evt_handler->set_val_ref(ref);
4983  addrem_flags(RNXT, RVAL);
4984  }
4985  else if(first == '&')
4986  {
4987  csubstr anchor = _scan_anchor();
4988  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
4989  m_evt_handler->set_val_anchor(anchor);
4990  if(_maybe_scan_following_comma())
4991  {
4992  _c4dbgp("seqflow[RVAL]: empty scalar!");
4993  m_evt_handler->set_val_scalar_plain({});
4994  m_evt_handler->add_sibling();
4995  }
4996  }
4997  else if(first == '!')
4998  {
4999  csubstr tag = _scan_tag();
5000  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5001  _check_tag(tag);
5002  m_evt_handler->set_val_tag(tag);
5003  if(_maybe_scan_following_comma())
5004  {
5005  _c4dbgp("seqflow[RVAL]: empty scalar!");
5006  m_evt_handler->set_val_scalar_plain({});
5007  m_evt_handler->add_sibling();
5008  }
5009  }
5010  else if(first == ':')
5011  {
5012  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5013  addrem_flags(RNXT, RVAL);
5014  m_evt_handler->begin_map_val_flow();
5015  _set_indentation(m_evt_handler->m_parent->indref);
5016  m_evt_handler->set_key_scalar_plain({});
5017  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5018  _line_progressed(1);
5019  goto seqflow_finish;
5020  }
5021  else if(first == '?')
5022  {
5023  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5024  addrem_flags(RNXT, RVAL);
5025  m_was_inside_qmrk = true;
5026  m_evt_handler->begin_map_val_flow();
5027  _set_indentation(m_evt_handler->m_parent->indref);
5028  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5029  _line_progressed(1);
5030  _maybe_skip_whitespace_tokens();
5031  goto seqflow_finish;
5032  }
5033  else
5034  {
5035  _c4err("parse error");
5036  }
5037  }
5038  else // RNXT
5039  {
5040  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5041  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5042  const char first = rem.str[0];
5043  if(first == ',')
5044  {
5045  _c4dbgp("seqflow[RNXT]: expect next val");
5046  addrem_flags(RVAL, RNXT);
5047  m_evt_handler->add_sibling();
5048  _line_progressed(1);
5049  }
5050  else if(first == ']')
5051  {
5052  _c4dbgp("seqflow[RNXT]: end!");
5053  m_evt_handler->end_seq();
5054  _line_progressed(1);
5055  goto seqflow_finish;
5056  }
5057  else if(first == ':')
5058  {
5059  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5060  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5061  _set_indentation(m_evt_handler->m_parent->indref);
5062  _line_progressed(1);
5063  addrem_flags(RSEQIMAP|RVAL, RNXT);
5064  goto seqflow_finish;
5065  }
5066  else
5067  {
5068  _c4err("parse error");
5069  }
5070  }
5071 
5072  seqflow_again:
5073  _c4dbgt("seqflow: go again", 0);
5074  if(_finished_line())
5075  {
5076  if(C4_LIKELY(!_finished_file()))
5077  {
5078  _line_ended();
5079  _scan_line();
5080  _c4dbgnextline();
5081  }
5082  else
5083  {
5084  _c4err("missing terminating ]");
5085  }
5086  }
5087  goto seqflow_start;
5088 
5089  seqflow_finish:
5090  _c4dbgp("seqflow: finish");
5091 }
5092 
5093 
5094 //-----------------------------------------------------------------------------
5095 
5096 template<class EventHandler>
5097 void ParseEngine<EventHandler>::_handle_map_flow()
5098 {
5099 mapflow_start:
5100  _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5101 
5102  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5103  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5104  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5105  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5106 
5107  _handle_flow_skip_whitespace();
5108  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5109  if(!rem.len)
5110  goto mapflow_again;
5111 
5112  if(has_any(RKEY))
5113  {
5114  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5115  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5116  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5117  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5118  const char first = rem.str[0];
5119  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5120  ScannedScalar sc;
5121  if(first == '\'')
5122  {
5123  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5124  sc = _scan_scalar_squot();
5125  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5126  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5127  addrem_flags(RKCL, RKEY|QMRK);
5128  }
5129  else if(first == '"')
5130  {
5131  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5132  sc = _scan_scalar_dquot();
5133  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5134  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5135  addrem_flags(RKCL, RKEY|QMRK);
5136  }
5137  // block scalars (ie | and >) cannot appear in flow containers
5138  else if(_scan_scalar_plain_map_flow(&sc))
5139  {
5140  _c4dbgp("mapflow[RKEY]: plain scalar");
5141  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5142  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5143  addrem_flags(RKCL, RKEY|QMRK);
5144  }
5145  else if(first == '?')
5146  {
5147  _c4dbgp("mapflow[RKEY]: explicit key");
5148  _line_progressed(1);
5149  addrem_flags(QMRK, RKEY);
5150  _maybe_skip_whitespace_tokens();
5151  }
5152  else if(first == ':')
5153  {
5154  _c4dbgp("mapflow[RKEY]: setting empty key");
5155  m_evt_handler->set_key_scalar_plain({});
5156  addrem_flags(RVAL, RKEY|QMRK);
5157  _line_progressed(1);
5158  _maybe_skip_whitespace_tokens();
5159  }
5160  else if(first == '}') // this happens on a trailing comma like ", }"
5161  {
5162  _c4dbgp("mapflow[RKEY]: end!");
5163  m_evt_handler->end_map();
5164  _line_progressed(1);
5165  goto mapflow_finish;
5166  }
5167  else if(first == '&')
5168  {
5169  csubstr anchor = _scan_anchor();
5170  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5171  m_evt_handler->set_key_anchor(anchor);
5172  }
5173  else if(first == '*')
5174  {
5175  csubstr ref = _scan_ref_map();
5176  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5177  m_evt_handler->set_key_ref(ref);
5178  addrem_flags(RKCL, RKEY);
5179  }
5180  else if(first == '[')
5181  {
5182  // RYML's tree cannot store container keys, but that's
5183  // handled inside the tree sink. Other sink types may be
5184  // able to handle it.
5185  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5186  addrem_flags(RKCL, RKEY);
5187  m_evt_handler->begin_seq_key_flow();
5188  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5189  _set_indentation(m_evt_handler->m_parent->indref);
5190  _line_progressed(1);
5191  goto mapflow_finish;
5192  }
5193  else if(first == '{')
5194  {
5195  // RYML's tree cannot store container keys, but that's
5196  // handled inside the tree sink. Other sink types may be
5197  // able to handle it.
5198  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5199  addrem_flags(RKCL, RKEY);
5200  m_evt_handler->begin_map_key_flow();
5201  addrem_flags(RKEY, RVAL|RKCL);
5202  _set_indentation(m_evt_handler->m_parent->indref);
5203  _line_progressed(1);
5204  // keep going in this function
5205  }
5206  else if(first == '!')
5207  {
5208  csubstr tag = _scan_tag();
5209  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5210  _check_tag(tag);
5211  m_evt_handler->set_key_tag(tag);
5212  }
5213  else
5214  {
5215  _c4err("parse error");
5216  }
5217  }
5218  else if(has_any(RKCL)) // read the key colon
5219  {
5220  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5221  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5222  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5223  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5224  const char first = rem.str[0];
5225  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5226  if(first == ':')
5227  {
5228  _c4dbgp("mapflow[RKCL]: found the colon");
5229  addrem_flags(RVAL, RKCL);
5230  _line_progressed(1);
5231  }
5232  else if(first == '}')
5233  {
5234  _c4dbgp("mapflow[RKCL]: end with missing val!");
5235  addrem_flags(RVAL, RKCL);
5236  m_evt_handler->set_val_scalar_plain({});
5237  m_evt_handler->end_map();
5238  _line_progressed(1);
5239  goto mapflow_finish;
5240  }
5241  else if(first == ',')
5242  {
5243  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5244  m_evt_handler->set_val_scalar_plain({});
5245  m_evt_handler->add_sibling();
5246  addrem_flags(RKEY, RKCL);
5247  _line_progressed(1);
5248  }
5249  else
5250  {
5251  _c4err("parse error");
5252  }
5253  }
5254  else if(has_any(RVAL))
5255  {
5256  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5257  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5258  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5259  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5260  const char first = rem.str[0];
5261  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5262  ScannedScalar sc;
5263  if(first == '\'')
5264  {
5265  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5266  sc = _scan_scalar_squot();
5267  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5268  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5269  addrem_flags(RNXT, RVAL);
5270  }
5271  else if(first == '"')
5272  {
5273  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5274  sc = _scan_scalar_dquot();
5275  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5276  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5277  addrem_flags(RNXT, RVAL);
5278  }
5279  // block scalars (ie | and >) cannot appear in flow containers
5280  else if(_scan_scalar_plain_map_flow(&sc))
5281  {
5282  _c4dbgp("mapflow[RVAL]: plain scalar.");
5283  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5284  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5285  addrem_flags(RNXT, RVAL);
5286  }
5287  else if(first == '[')
5288  {
5289  _c4dbgp("mapflow[RVAL]: start val seqflow");
5290  addrem_flags(RNXT, RVAL);
5291  m_evt_handler->begin_seq_val_flow();
5292  _set_indentation(m_evt_handler->m_parent->indref);
5293  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5294  _line_progressed(1);
5295  goto mapflow_finish;
5296  }
5297  else if(first == '{')
5298  {
5299  _c4dbgp("mapflow[RVAL]: start val mapflow");
5300  addrem_flags(RNXT, RVAL);
5301  m_evt_handler->begin_map_val_flow();
5302  _set_indentation(m_evt_handler->m_parent->indref);
5303  addrem_flags(RKEY, RNXT);
5304  _line_progressed(1);
5305  // keep going in this function
5306  }
5307  else if(first == '}')
5308  {
5309  _c4dbgp("mapflow[RVAL]: end!");
5310  m_evt_handler->set_val_scalar_plain({});
5311  m_evt_handler->end_map();
5312  _line_progressed(1);
5313  goto mapflow_finish;
5314  }
5315  else if(first == '*')
5316  {
5317  csubstr ref = _scan_ref_map();
5318  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5319  m_evt_handler->set_val_ref(ref);
5320  addrem_flags(RNXT, RVAL);
5321  }
5322  else if(first == '&')
5323  {
5324  csubstr anchor = _scan_anchor();
5325  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5326  m_evt_handler->set_val_anchor(anchor);
5327  }
5328  else if(first == '!')
5329  {
5330  csubstr tag = _scan_tag();
5331  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5332  _check_tag(tag);
5333  m_evt_handler->set_val_tag(tag);
5334  }
5335  else
5336  {
5337  _c4err("parse error");
5338  }
5339  }
5340  else if(has_any(RNXT))
5341  {
5342  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5343  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5344  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5345  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5346  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5347  if(rem.begins_with(','))
5348  {
5349  _c4dbgp("mapflow[RNXT]: expect next keyval");
5350  m_evt_handler->add_sibling();
5351  addrem_flags(RKEY, RNXT);
5352  _line_progressed(1);
5353  }
5354  else if(rem.begins_with('}'))
5355  {
5356  _c4dbgp("mapflow[RNXT]: end!");
5357  m_evt_handler->end_map();
5358  _line_progressed(1);
5359  goto mapflow_finish;
5360  }
5361  else
5362  {
5363  _c4err("parse error");
5364  }
5365  }
5366  else if(has_any(QMRK))
5367  {
5368  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5369  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5370  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5371  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5372  const char first = rem.str[0];
5373  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5374  ScannedScalar sc;
5375  if(first == '\'')
5376  {
5377  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5378  sc = _scan_scalar_squot();
5379  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5380  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5381  addrem_flags(RKCL, QMRK);
5382  }
5383  else if(first == '"')
5384  {
5385  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5386  sc = _scan_scalar_dquot();
5387  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5388  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5389  addrem_flags(RKCL, QMRK);
5390  }
5391  // block scalars (ie | and >) cannot appear in flow containers
5392  else if(_scan_scalar_plain_map_flow(&sc))
5393  {
5394  _c4dbgp("mapflow[QMRK]: plain scalar");
5395  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5396  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5397  addrem_flags(RKCL, QMRK);
5398  }
5399  else if(first == ':')
5400  {
5401  _c4dbgp("mapflow[QMRK]: setting empty key");
5402  m_evt_handler->set_key_scalar_plain({});
5403  addrem_flags(RVAL, QMRK);
5404  _line_progressed(1);
5405  _maybe_skip_whitespace_tokens();
5406  }
5407  else if(first == '}') // this happens on a trailing comma like ", }"
5408  {
5409  _c4dbgp("mapflow[QMRK]: end!");
5410  m_evt_handler->set_key_scalar_plain({});
5411  m_evt_handler->set_val_scalar_plain({});
5412  m_evt_handler->end_map();
5413  _line_progressed(1);
5414  goto mapflow_finish;
5415  }
5416  else if(first == '&')
5417  {
5418  csubstr anchor = _scan_anchor();
5419  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5420  m_evt_handler->set_key_anchor(anchor);
5421  }
5422  else if(first == '*')
5423  {
5424  csubstr ref = _scan_ref_map();
5425  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5426  m_evt_handler->set_key_ref(ref);
5427  addrem_flags(RKCL, QMRK);
5428  }
5429  else if(first == '[')
5430  {
5431  // RYML's tree cannot store container keys, but that's
5432  // handled inside the tree sink. Other sink types may be
5433  // able to handle it.
5434  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5435  addrem_flags(RKCL, QMRK);
5436  m_evt_handler->begin_seq_key_flow();
5437  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5438  _set_indentation(m_evt_handler->m_parent->indref);
5439  _line_progressed(1);
5440  goto mapflow_finish;
5441  }
5442  else if(first == '{')
5443  {
5444  // RYML's tree cannot store container keys, but that's
5445  // handled inside the tree sink. Other sink types may be
5446  // able to handle it.
5447  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5448  addrem_flags(RKCL, QMRK);
5449  m_evt_handler->begin_map_key_flow();
5450  _set_indentation(m_evt_handler->m_parent->indref);
5451  addrem_flags(RKEY, RKCL);
5452  _line_progressed(1);
5453  // keep going in this function
5454  }
5455  else if(first == '!')
5456  {
5457  csubstr tag = _scan_tag();
5458  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5459  _check_tag(tag);
5460  m_evt_handler->set_key_tag(tag);
5461  }
5462  else
5463  {
5464  _c4err("parse error");
5465  }
5466  }
5467 
5468  mapflow_again:
5469  _c4dbgt("mapflow: go again", 0);
5470  if(_finished_line())
5471  {
5472  if(C4_LIKELY(!_finished_file()))
5473  {
5474  _line_ended();
5475  _scan_line();
5476  _c4dbgnextline();
5477  }
5478  else
5479  {
5480  _c4err("missing terminating }");
5481  }
5482  }
5483  goto mapflow_start;
5484 
5485  mapflow_finish:
5486  _c4dbgp("mapflow: finish");
5487 }
5488 
5489 
5490 //-----------------------------------------------------------------------------
5491 
5492 template<class EventHandler>
5493 void ParseEngine<EventHandler>::_handle_seq_block()
5494 {
5495 seqblck_start:
5496  _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5497 
5498  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5499  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5500  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5501  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5502 
5503  _maybe_skip_comment();
5504  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5505  if(!rem.len)
5506  goto seqblck_again;
5507 
5508  if(has_any(RVAL))
5509  {
5510  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5511  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5512  if(m_evt_handler->m_curr->at_line_beginning())
5513  {
5514  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5515  if(m_evt_handler->m_curr->indentation_ge())
5516  {
5517  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5518  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5519  rem = m_evt_handler->m_curr->line_contents.rem;
5520  if(!rem.len)
5521  goto seqblck_again;
5522  }
5523  else if(m_evt_handler->m_curr->indentation_lt())
5524  {
5525  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5526  _handle_indentation_pop_from_block_seq();
5527  goto seqblck_finish;
5528  }
5529  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5530  {
5531  _c4dbgp("seqblck[RVAL]: empty line!");
5532  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5533  goto seqblck_again;
5534  }
5535  }
5536  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5537  else
5538  {
5539  // accomodate annotation on the previous line. eg:
5540  // - &elm
5541  // foo # <-- on this line
5542  // - &elm
5543  // &foo foo: bar # <-- on this line
5544  if(rem.str[0] == ' ')
5545  {
5546  if(_handle_indentation_from_annotations())
5547  {
5548  _c4dbgp("seqblck[RVAL]: annotations!");
5549  rem = m_evt_handler->m_curr->line_contents.rem;
5550  if(!rem.len)
5551  goto seqblck_again;
5552  }
5553  }
5554  }
5555  #endif
5556  _RYML_CB_ASSERT(callbacks(), rem.len);
5557  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5558  const char first = rem.str[0];
5559  const size_t startline = m_evt_handler->m_curr->pos.line;
5560  // warning: the gcc optimizer on x86 builds is brittle with
5561  // this function:
5562  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5563  ScannedScalar sc;
5564  if(first == '\'')
5565  {
5566  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5567  sc = _scan_scalar_squot();
5568  if(!_maybe_scan_following_colon())
5569  {
5570  _c4dbgp("seqblck[RVAL]: set as val");
5571  _handle_annotations_before_blck_val_scalar();
5572  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5573  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5574  addrem_flags(RNXT, RVAL);
5575  }
5576  else
5577  {
5578  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5579  addrem_flags(RNXT, RVAL);
5580  _handle_annotations_before_start_mapblck(startline);
5581  m_evt_handler->begin_map_val_block();
5582  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5583  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5584  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5585  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5586  _maybe_skip_whitespace_tokens();
5587  goto seqblck_finish;
5588  }
5589  }
5590  else if(first == '"')
5591  {
5592  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5593  sc = _scan_scalar_dquot();
5594  if(!_maybe_scan_following_colon())
5595  {
5596  _c4dbgp("seqblck[RVAL]: set as val");
5597  _handle_annotations_before_blck_val_scalar();
5598  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5599  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5600  addrem_flags(RNXT, RVAL);
5601  }
5602  else
5603  {
5604  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5605  addrem_flags(RNXT, RVAL);
5606  _handle_annotations_before_start_mapblck(startline);
5607  m_evt_handler->begin_map_val_block();
5608  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5609  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5610  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5611  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5612  _maybe_skip_whitespace_tokens();
5613  goto seqblck_finish;
5614  }
5615  }
5616  // block scalars can only appear as keys when in QMRK scope
5617  // (ie, after ? tokens), so no need to scan following colon in
5618  // here.
5619  else if(first == '|')
5620  {
5621  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5622  ScannedBlock sb;
5623  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5624  _handle_annotations_before_blck_val_scalar();
5625  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5626  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5627  addrem_flags(RNXT, RVAL);
5628  }
5629  else if(first == '>')
5630  {
5631  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5632  ScannedBlock sb;
5633  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5634  _handle_annotations_before_blck_val_scalar();
5635  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5636  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5637  addrem_flags(RNXT, RVAL);
5638  }
5639  else if(_scan_scalar_plain_seq_blck(&sc))
5640  {
5641  _c4dbgp("seqblck[RVAL]: plain scalar.");
5642  if(!_maybe_scan_following_colon())
5643  {
5644  _c4dbgp("seqblck[RVAL]: set as val");
5645  _handle_annotations_before_blck_val_scalar();
5646  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
5647  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5648  addrem_flags(RNXT, RVAL);
5649  }
5650  else
5651  {
5652  if(startindent > m_evt_handler->m_curr->indref)
5653  {
5654  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5655  addrem_flags(RNXT, RVAL);
5656  _handle_annotations_before_start_mapblck(startline);
5657  m_evt_handler->begin_map_val_block();
5658  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5659  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5660  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5661  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5662  _maybe_skip_whitespace_tokens();
5663  goto seqblck_finish;
5664  }
5665  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
5666  {
5667  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5668  m_evt_handler->set_val_scalar_plain({});
5669  m_evt_handler->end_seq();
5670  m_evt_handler->add_sibling();
5671  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5672  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5673  addrem_flags(RVAL, RNXT|RKEY);
5674  _maybe_skip_whitespace_tokens();
5675  goto seqblck_finish;
5676  }
5677  else
5678  {
5679  _c4err("parse error");
5680  }
5681  }
5682  }
5683  else if(first == '[')
5684  {
5685  _c4dbgp("seqblck[RVAL]: start child seqflow");
5686  addrem_flags(RNXT, RVAL);
5687  m_evt_handler->begin_seq_val_flow();
5688  addrem_flags(FLOW|RVAL, BLCK|RNXT);
5689  _line_progressed(1);
5690  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5691  goto seqblck_finish;
5692  }
5693  else if(first == '{')
5694  {
5695  _c4dbgp("seqblck[RVAL]: start child mapflow");
5696  addrem_flags(RNXT, RVAL);
5697  _handle_annotations_before_blck_val_scalar();
5698  m_evt_handler->begin_map_val_flow();
5699  addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
5700  _line_progressed(1);
5701  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5702  goto seqblck_finish;
5703  }
5704  else if(first == '-')
5705  {
5706  if(startindent == m_evt_handler->m_curr->indref)
5707  {
5708  _c4dbgp("seqblck[RVAL]: prev val was empty");
5709  _handle_annotations_before_blck_val_scalar();
5710  m_evt_handler->set_val_scalar_plain({});
5711  // keep in RVAL, but for the next sibling
5712  m_evt_handler->add_sibling();
5713  }
5714  else
5715  {
5716  _c4dbgp("seqblck[RVAL]: start child seqblck");
5717  _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5718  addrem_flags(RNXT, RVAL);
5719  _handle_annotations_before_blck_val_scalar();
5720  m_evt_handler->begin_seq_val_block();
5721  addrem_flags(RVAL, RNXT);
5722  _save_indentation();
5723  // keep going on inside this function
5724  }
5725  _line_progressed(1);
5726  _maybe_skip_whitespace_tokens();
5727  }
5728  else if(first == ':')
5729  {
5730  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5731  addrem_flags(RNXT, RVAL);
5732  _handle_annotations_before_start_mapblck(startline);
5733  m_evt_handler->begin_map_val_block();
5734  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5735  m_evt_handler->set_key_scalar_plain({});
5736  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5737  _line_progressed(1);
5738  _maybe_skip_whitespace_tokens();
5739  goto seqblck_finish;
5740  }
5741  else if(first == '&')
5742  {
5743  const csubstr anchor = _scan_anchor();
5744  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5745  // we need to buffer the anchors, as there may be two
5746  // consecutive anchors in here
5747  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5748  }
5749  else if(first == '*')
5750  {
5751  csubstr ref = _scan_ref_seq();
5752  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5753  if(!_maybe_scan_following_colon())
5754  {
5755  _c4dbgp("seqblck[RVAL]: set ref as val!");
5756  _handle_annotations_before_blck_val_scalar();
5757  m_evt_handler->set_val_ref(ref);
5758  addrem_flags(RNXT, RVAL);
5759  }
5760  else
5761  {
5762  _c4dbgp("seqblck[RVAL]: ref is key of map");
5763  addrem_flags(RNXT, RVAL);
5764  _handle_annotations_before_start_mapblck(startline);
5765  m_evt_handler->begin_map_val_block();
5766  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5767  m_evt_handler->set_key_ref(ref);
5768  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5769  _set_indentation(startindent);
5770  _maybe_skip_whitespace_tokens();
5771  goto seqblck_finish;
5772  }
5773  }
5774  else if(first == '!')
5775  {
5776  csubstr tag = _scan_tag();
5777  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5778  // we need to buffer the tags, as there may be two
5779  // consecutive tags in here
5780  _add_annotation(&m_pending_tags, tag, startindent, startline);
5781  }
5782  else if(first == '?')
5783  {
5784  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5785  addrem_flags(RNXT, RVAL);
5786  m_was_inside_qmrk = true;
5787  m_evt_handler->begin_map_val_block();
5788  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5789  _save_indentation();
5790  _line_progressed(1);
5791  _maybe_skip_whitespace_tokens();
5792  goto seqblck_finish;
5793  }
5794  else
5795  {
5796  _c4err("parse error");
5797  }
5798  }
5799  else // RNXT
5800  {
5801  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5802  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5803  //
5804  // handle indentation
5805  //
5806  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5807  if(C4_UNLIKELY(!_at_line_begin()))
5808  _c4err("parse error");
5809  if(m_evt_handler->m_curr->indentation_ge())
5810  {
5811  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5812  _line_progressed(m_evt_handler->m_curr->indref);
5813  _maybe_skip_whitespace_tokens();
5814  rem = m_evt_handler->m_curr->line_contents.rem;
5815  if(!rem.len)
5816  goto seqblck_again;
5817  }
5818  else if(m_evt_handler->m_curr->indentation_lt())
5819  {
5820  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5821  _handle_indentation_pop_from_block_seq();
5822  if(has_all(RSEQ|BLCK))
5823  {
5824  _c4dbgp("seqblck[RNXT]: still seqblck!");
5825  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5826  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5827  rem = m_evt_handler->m_curr->line_contents.rem;
5828  if(!rem.len)
5829  goto seqblck_again;
5830  }
5831  else
5832  {
5833  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5834  goto seqblck_finish;
5835  }
5836  }
5837  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5838  {
5839  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5840  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5841  rem = m_evt_handler->m_curr->line_contents.rem;
5842  if(!rem.len)
5843  goto seqblck_again;
5844  }
5845  //
5846  // now handle the tokens
5847  //
5848  const char first = rem.str[0];
5849  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5850  if(first == '-')
5851  {
5852  if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5853  {
5854  _c4dbgp("seqblck[RNXT]: expect next val");
5855  addrem_flags(RVAL, RNXT);
5856  m_evt_handler->add_sibling();
5857  _line_progressed(1);
5858  _maybe_skip_whitespace_tokens();
5859  }
5860  else
5861  {
5862  _c4dbgp("seqblck[RNXT]: start doc");
5863  _start_doc_suddenly();
5864  _line_progressed(3);
5865  _maybe_skip_whitespace_tokens();
5866  goto seqblck_finish;
5867  }
5868  }
5869  else if(first == ':')
5870  {
5871  // This happens for example in `- [a: b]: c` (after
5872  // terminating the seq, ie, after `]`). All other cases
5873  // (ie colon after scalars) are caught elsewhere (ie, in
5874  // RVAL state).
5875  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
5876  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
5877  {
5878  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
5879  m_evt_handler->end_seq();
5880  goto seqblck_finish;
5881  }
5882  else
5883  {
5884  _c4err("parse error");
5885  }
5886  }
5887  else if(first == '.')
5888  {
5889  _c4dbgp("seqblck[RNXT]: maybe doc?");
5890  csubstr rs = rem.sub(1);
5891  if(rs == ".." || rs.begins_with(".. "))
5892  {
5893  _c4dbgp("seqblck[RNXT]: end+start doc");
5894  _end_doc_suddenly();
5895  _line_progressed(3);
5896  _maybe_skip_whitespace_tokens();
5897  goto seqblck_finish;
5898  }
5899  else
5900  {
5901  _c4err("parse error");
5902  }
5903  }
5904  else
5905  {
5906  // may be an indentless sequence nested in a map...
5907  //if(m_evt_handler->m_stack.size() >= 2)
5908  #ifdef RYML_DBG
5909  char flagbuf_[128];
5910  for(auto const& s : m_evt_handler->m_stack)
5911  {
5912  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
5913  }
5914  #endif
5915  if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
5916  {
5917  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
5918  _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
5919  _handle_indentation_pop(m_evt_handler->m_parent);
5920  _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
5921  m_evt_handler->add_sibling();
5922  addrem_flags(RKEY, RNXT);
5923  goto seqblck_finish;
5924  }
5925  else //if(first != '*')
5926  {
5927  _c4err("parse error");
5928  }
5929  }
5930  }
5931 
5932  seqblck_again:
5933  _c4dbgt("seqblck: go again", 0);
5934  if(_finished_line())
5935  {
5936  _line_ended();
5937  _scan_line();
5938  if(_finished_file())
5939  {
5940  _c4dbgp("seqblck: finish!");
5941  _end_seq_blck();
5942  goto seqblck_finish;
5943  }
5944  _c4dbgnextline();
5945  }
5946  goto seqblck_start;
5947 
5948  seqblck_finish:
5949  _c4dbgp("seqblck: finish");
5950 }
5951 
5952 
5953 //-----------------------------------------------------------------------------
5954 
5955 template<class EventHandler>
5956 void ParseEngine<EventHandler>::_handle_map_block()
5957 {
5958 mapblck_start:
5959  _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5960 
5961  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
5962  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5963  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5964  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5965  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5966 
5967  _maybe_skip_comment();
5968  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5969  if(!rem.len)
5970  goto mapblck_again;
5971 
5972  if(has_any(RKEY))
5973  {
5974  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5975  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5976  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5977  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5978  //
5979  // handle indentation
5980  //
5981  if(m_evt_handler->m_curr->at_line_beginning())
5982  {
5983  if(m_evt_handler->m_curr->indentation_eq())
5984  {
5985  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
5986  _line_progressed(m_evt_handler->m_curr->indref);
5987  rem = m_evt_handler->m_curr->line_contents.rem;
5988  if(!rem.len)
5989  goto mapblck_again;
5990  }
5991  else if(m_evt_handler->m_curr->indentation_lt())
5992  {
5993  _c4dbgp("mapblck[RKEY]: smaller indentation!");
5994  _handle_indentation_pop_from_block_map();
5995  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5996  if(has_all(RMAP|BLCK))
5997  {
5998  _c4dbgp("mapblck[RKEY]: still mapblck!");
5999  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
6000  rem = m_evt_handler->m_curr->line_contents.rem;
6001  if(!rem.len)
6002  goto mapblck_again;
6003  }
6004  else
6005  {
6006  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6007  goto mapblck_finish;
6008  }
6009  }
6010  else
6011  {
6012  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6013  _c4err("invalid indentation");
6014  }
6015  }
6016  //
6017  // now handle the tokens
6018  //
6019  const char first = rem.str[0];
6020  const size_t startline = m_evt_handler->m_curr->pos.line;
6021  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6022  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6023  ScannedScalar sc;
6024  if(first == '\'')
6025  {
6026  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6027  sc = _scan_scalar_squot();
6028  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6029  _handle_annotations_before_blck_key_scalar();
6030  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6031  addrem_flags(RVAL, RKEY);
6032  if(!_maybe_scan_following_colon())
6033  _c4err("could not find ':' colon after key");
6034  _maybe_skip_whitespace_tokens();
6035  }
6036  else if(first == '"')
6037  {
6038  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6039  sc = _scan_scalar_dquot();
6040  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6041  _handle_annotations_before_blck_key_scalar();
6042  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6043  addrem_flags(RVAL, RKEY);
6044  if(!_maybe_scan_following_colon())
6045  _c4err("could not find ':' colon after key");
6046  _maybe_skip_whitespace_tokens();
6047  }
6048  // block scalars (| and >) can not be used as keys unless they
6049  // appear in an explicit QMRK scope (ie, after the ? token),
6050  else if(C4_UNLIKELY(first == '|'))
6051  {
6052  _c4err("block literal keys must be enclosed in '?'");
6053  }
6054  else if(C4_UNLIKELY(first == '>'))
6055  {
6056  _c4err("block literal keys must be enclosed in '?'");
6057  }
6058  else if(_scan_scalar_plain_map_blck(&sc))
6059  {
6060  _c4dbgp("mapblck[RKEY]: plain scalar");
6061  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6062  _handle_annotations_before_blck_key_scalar();
6063  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6064  addrem_flags(RVAL, RKEY);
6065  if(!_maybe_scan_following_colon())
6066  _c4err("could not find ':' colon after key");
6067  _maybe_skip_whitespace_tokens();
6068  }
6069  else if(first == '?')
6070  {
6071  _c4dbgp("mapblck[RKEY]: key token!");
6072  addrem_flags(QMRK, RKEY);
6073  _line_progressed(1);
6074  _maybe_skip_whitespace_tokens();
6075  m_was_inside_qmrk = true;
6076  goto mapblck_again;
6077  }
6078  else if(first == ':')
6079  {
6080  _c4dbgp("mapblck[RKEY]: setting empty key");
6081  _handle_annotations_before_blck_key_scalar();
6082  m_evt_handler->set_key_scalar_plain({});
6083  addrem_flags(RVAL, RKEY);
6084  _line_progressed(1);
6085  _maybe_skip_whitespace_tokens();
6086  }
6087  else if(first == '*')
6088  {
6089  csubstr ref = _scan_ref_map();
6090  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6091  _handle_annotations_before_blck_key_scalar();
6092  m_evt_handler->set_key_ref(ref);
6093  addrem_flags(RVAL, RKEY);
6094  if(!_maybe_scan_following_colon())
6095  _c4err("could not find ':' colon after key");
6096  _maybe_skip_whitespace_tokens();
6097  }
6098  else if(first == '&')
6099  {
6100  csubstr anchor = _scan_anchor();
6101  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6102  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6103  }
6104  else if(first == '!')
6105  {
6106  csubstr tag = _scan_tag();
6107  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6108  _add_annotation(&m_pending_tags, tag, startindent, startline);
6109  }
6110  else if(first == '[')
6111  {
6112  // RYML's tree cannot store container keys, but that's
6113  // handled inside the tree handler. Other handlers may be
6114  // able to handle it.
6115  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6116  addrem_flags(RKCL, RKEY);
6117  _handle_annotations_before_blck_key_scalar();
6118  m_evt_handler->begin_seq_key_flow();
6119  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
6120  _line_progressed(1);
6121  _set_indentation(startindent);
6122  goto mapblck_finish;
6123  }
6124  else if(first == '{')
6125  {
6126  // RYML's tree cannot store container keys, but that's
6127  // handled inside the tree handler. Other handlers may be
6128  // able to handle it.
6129  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6130  addrem_flags(RKCL, RKEY);
6131  _handle_annotations_before_blck_key_scalar();
6132  m_evt_handler->begin_map_key_flow();
6133  addrem_flags(FLOW|RKEY, BLCK|RKCL);
6134  _line_progressed(1);
6135  _set_indentation(startindent);
6136  goto mapblck_finish;
6137  }
6138  else if(first == '-')
6139  {
6140  _c4dbgp("mapblck[RKEY]: maybe doc?");
6141  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6142  {
6143  _c4dbgp("mapblck[RKEY]: end+start doc");
6144  _start_doc_suddenly();
6145  _line_progressed(3);
6146  _maybe_skip_whitespace_tokens();
6147  goto mapblck_finish;
6148  }
6149  else
6150  {
6151  _c4err("parse error");
6152  }
6153  }
6154  else if(first == '.')
6155  {
6156  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6157  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6158  {
6159  _c4dbgp("mapblck[RKEY]: end doc");
6160  _end_doc_suddenly();
6161  _line_progressed(3);
6162  _maybe_skip_whitespace_tokens();
6163  goto mapblck_finish;
6164  }
6165  else
6166  {
6167  _c4err("parse error");
6168  }
6169  }
6171  else if(first == '\t')
6172  {
6173  _c4dbgp("mapblck[RKEY]: skip tabs");
6174  _maybe_skipchars('\t');
6175  })
6176  else
6177  {
6178  _c4err("parse error");
6179  }
6180  }
6181  else if(has_any(RKCL)) // read the key colon
6182  {
6183  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6184  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6185  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6186  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6187  //
6188  // handle indentation
6189  //
6190  if(m_evt_handler->m_curr->at_line_beginning())
6191  {
6192  if(m_evt_handler->m_curr->indentation_eq())
6193  {
6194  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6195  _line_progressed(m_evt_handler->m_curr->indref);
6196  rem = m_evt_handler->m_curr->line_contents.rem;
6197  if(!rem.len)
6198  goto mapblck_again;
6199  }
6200  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6201  {
6202  _c4err("invalid indentation");
6203  }
6204  }
6205  const char first = rem.str[0];
6206  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6207  if(first == ':')
6208  {
6209  _c4dbgp("mapblck[RKCL]: found the colon");
6210  addrem_flags(RVAL, RKCL);
6211  _line_progressed(1);
6212  _maybe_skip_whitespace_tokens();
6213  }
6214  else if(first == '?')
6215  {
6216  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6217  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6218  m_evt_handler->set_val_scalar_plain({});
6219  m_evt_handler->add_sibling();
6220  addrem_flags(QMRK, RKCL);
6221  _line_progressed(1);
6222  _maybe_skip_whitespace_tokens();
6223  }
6224  else if(first == '-')
6225  {
6226  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6227  {
6228  _c4dbgp("mapblck[RKCL]: end+start doc");
6229  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6230  _start_doc_suddenly();
6231  _line_progressed(3);
6232  _maybe_skip_whitespace_tokens();
6233  goto mapblck_finish;
6234  }
6235  else
6236  {
6237  _c4err("parse error");
6238  }
6239  }
6240  else if(first == '.')
6241  {
6242  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6243  csubstr rs = rem.sub(1);
6244  if(rs == ".." || rs.begins_with(".. "))
6245  {
6246  _c4dbgp("mapblck[RKCL]: end+start doc");
6247  _end_doc_suddenly();
6248  _line_progressed(3);
6249  goto mapblck_finish;
6250  }
6251  else
6252  {
6253  _c4err("parse error");
6254  }
6255  }
6256  else if(m_was_inside_qmrk)
6257  {
6258  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6259  _c4dbgp("mapblck[RKCL]: missing :");
6260  m_evt_handler->set_val_scalar_plain({});
6261  m_evt_handler->add_sibling();
6262  m_was_inside_qmrk = false;
6263  addrem_flags(RKEY, RKCL);
6264  }
6265  else
6266  {
6267  _c4err("parse error");
6268  }
6269  }
6270  else if(has_any(RVAL))
6271  {
6272  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6273  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6274  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6275  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6276  //
6277  // handle indentation
6278  //
6279  if(m_evt_handler->m_curr->at_line_beginning())
6280  {
6281  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6282  m_evt_handler->m_curr->more_indented = false;
6283  if(m_evt_handler->m_curr->indref == npos)
6284  {
6285  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6286  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6287  _line_progressed(m_evt_handler->m_curr->indref);
6288  rem = m_evt_handler->m_curr->line_contents.rem;
6289  if(!rem.len)
6290  goto mapblck_again;
6291  }
6292  else if(m_evt_handler->m_curr->indentation_eq())
6293  {
6294  _c4dbgp("mapblck[RVAL]: skip indentation!");
6295  _line_progressed(m_evt_handler->m_curr->indref);
6296  rem = m_evt_handler->m_curr->line_contents.rem;
6297  if(!rem.len)
6298  goto mapblck_again;
6299  // TODO: this is valid:
6300  //
6301  // ```yaml
6302  // a:
6303  // b:
6304  // ---
6305  // a:
6306  // b
6307  // ---
6308  // a:
6309  // b: c
6310  // ```
6311  //
6312  // ... but this is not:
6313  //
6314  // ```yaml
6315  // a:
6316  // v
6317  // ---
6318  // a: b: c
6319  // ```
6320  //
6321  // here, we probably need to set a boolean on the state
6322  // to disambiguate between these cases.
6323  }
6324  else if(m_evt_handler->m_curr->indentation_gt())
6325  {
6326  _c4dbgp("mapblck[RVAL]: more indented!");
6327  m_evt_handler->m_curr->more_indented = true;
6328  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6329  rem = m_evt_handler->m_curr->line_contents.rem;
6330  if(!rem.len)
6331  goto mapblck_again;
6332  }
6333  else if(m_evt_handler->m_curr->indentation_lt())
6334  {
6335  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6336  _handle_indentation_pop_from_block_map();
6337  if(has_all(RMAP|BLCK))
6338  {
6339  _c4dbgp("mapblck[RVAL]: still mapblck!");
6340  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6341  goto mapblck_again;
6342  }
6343  else
6344  {
6345  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6346  goto mapblck_finish;
6347  }
6348  }
6349  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6350  {
6351  _c4dbgp("mapblck[RVAL]: empty line!");
6352  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6353  goto mapblck_again;
6354  }
6355  }
6356  //
6357  // now handle the tokens
6358  //
6359  const char first = rem.str[0];
6360  const size_t startline = m_evt_handler->m_curr->pos.line;
6361  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6362  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6363  ScannedScalar sc;
6364  if(first == '\'')
6365  {
6366  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6367  sc = _scan_scalar_squot();
6368  if(!_maybe_scan_following_colon())
6369  {
6370  _c4dbgp("mapblck[RVAL]: set as val");
6371  _handle_annotations_before_blck_val_scalar();
6372  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6373  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6374  addrem_flags(RNXT, RVAL);
6375  }
6376  else
6377  {
6378  if(startindent != m_evt_handler->m_curr->indref)
6379  {
6380  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6381  _handle_annotations_before_start_mapblck(startline);
6382  addrem_flags(RNXT, RVAL);
6383  m_evt_handler->begin_map_val_block();
6384  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6385  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6386  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6387  _maybe_skip_whitespace_tokens();
6388  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6389  // keep the child state on RVAL
6390  addrem_flags(RVAL, RNXT);
6391  }
6392  else
6393  {
6394  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6395  m_evt_handler->set_val_scalar_plain({});
6396  m_evt_handler->add_sibling();
6397  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6398  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6399  // keep going on RVAL
6400  _maybe_skip_whitespace_tokens();
6401  }
6402  }
6403  }
6404  else if(first == '"')
6405  {
6406  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6407  sc = _scan_scalar_dquot();
6408  if(!_maybe_scan_following_colon())
6409  {
6410  _c4dbgp("mapblck[RVAL]: set as val");
6411  _handle_annotations_before_blck_val_scalar();
6412  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6413  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6414  addrem_flags(RNXT, RVAL);
6415  }
6416  else
6417  {
6418  if(startindent != m_evt_handler->m_curr->indref)
6419  {
6420  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6421  _handle_annotations_before_start_mapblck(startline);
6422  addrem_flags(RNXT, RVAL);
6423  m_evt_handler->begin_map_val_block();
6424  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6425  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6426  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6427  _maybe_skip_whitespace_tokens();
6428  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6429  // keep the child state on RVAL
6430  addrem_flags(RVAL, RNXT);
6431  }
6432  else
6433  {
6434  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6435  m_evt_handler->set_val_scalar_plain({});
6436  m_evt_handler->add_sibling();
6437  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6438  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6439  // keep going on RVAL
6440  _maybe_skip_whitespace_tokens();
6441  }
6442  }
6443  }
6444  // block scalars can only appear as keys when in QMRK scope
6445  // (ie, after ? tokens), so no need to scan following colon
6446  else if(first == '|')
6447  {
6448  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6449  ScannedBlock sb;
6450  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6451  _handle_annotations_before_blck_val_scalar();
6452  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6453  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6454  addrem_flags(RNXT, RVAL);
6455  }
6456  else if(first == '>')
6457  {
6458  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6459  ScannedBlock sb;
6460  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6461  _handle_annotations_before_blck_val_scalar();
6462  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6463  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6464  addrem_flags(RNXT, RVAL);
6465  }
6466  else if(_scan_scalar_plain_map_blck(&sc))
6467  {
6468  _c4dbgp("mapblck[RVAL]: plain scalar.");
6469  if(!_maybe_scan_following_colon())
6470  {
6471  _c4dbgp("mapblck[RVAL]: set as val");
6472  _handle_annotations_before_blck_val_scalar();
6473  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6474  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6475  addrem_flags(RNXT, RVAL);
6476  }
6477  else
6478  {
6479  if(startindent != m_evt_handler->m_curr->indref)
6480  {
6481  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6482  addrem_flags(RNXT, RVAL);
6483  _handle_annotations_before_start_mapblck(startline);
6484  m_evt_handler->begin_map_val_block();
6485  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6486  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6487  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6488  _maybe_skip_whitespace_tokens();
6489  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6490  // keep the child state on RVAL
6491  addrem_flags(RVAL, RNXT);
6492  }
6493  else
6494  {
6495  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6496  _handle_annotations_before_blck_val_scalar();
6497  m_evt_handler->set_val_scalar_plain({});
6498  m_evt_handler->add_sibling();
6499  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6500  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6501  // keep going on RVAL
6502  _maybe_skip_whitespace_tokens();
6503  }
6504  }
6505  }
6506  else if(first == '-')
6507  {
6508  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6509  {
6510  _c4dbgp("mapblck[RVAL]: start val seqblck");
6511  addrem_flags(RNXT, RVAL);
6512  _handle_annotations_before_blck_val_scalar();
6513  m_evt_handler->begin_seq_val_block();
6514  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6515  _set_indentation(startindent);
6516  _line_progressed(1);
6517  _maybe_skip_whitespace_tokens();
6518  goto mapblck_finish;
6519  }
6520  else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6521  {
6522  _c4dbgp("mapblck[RVAL]: end+start doc");
6523  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6524  _start_doc_suddenly();
6525  _line_progressed(3);
6526  _maybe_skip_whitespace_tokens();
6527  goto mapblck_finish;
6528  }
6529  else
6530  {
6531  _c4err("parse error");
6532  }
6533  }
6534  else if(first == '[')
6535  {
6536  _c4dbgp("mapblck[RVAL]: start val seqflow");
6537  addrem_flags(RNXT, RVAL);
6538  _handle_annotations_before_blck_val_scalar();
6539  m_evt_handler->begin_seq_val_flow();
6540  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT|BLCK);
6541  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6542  _line_progressed(1);
6543  goto mapblck_finish;
6544  }
6545  else if(first == '{')
6546  {
6547  _c4dbgp("mapblck[RVAL]: start val mapflow");
6548  addrem_flags(RNXT, RVAL);
6549  _handle_annotations_before_blck_val_scalar();
6550  m_evt_handler->begin_map_val_flow();
6551  addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
6552  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6553  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6554  _line_progressed(1);
6555  goto mapblck_finish;
6556  }
6557  else if(first == '*')
6558  {
6559  csubstr ref = _scan_ref_map();
6560  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6561  if(startindent == m_evt_handler->m_curr->indref)
6562  {
6563  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6564  m_evt_handler->set_val_ref(ref);
6565  addrem_flags(RNXT, RVAL);
6566  }
6567  else
6568  {
6569  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6570  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6571  if(_maybe_scan_following_colon())
6572  {
6573  _c4dbgp("mapblck[RVAL]: start child map, block");
6574  addrem_flags(RNXT, RVAL);
6575  _handle_annotations_before_blck_val_scalar();
6576  m_evt_handler->begin_map_val_block();
6577  m_evt_handler->set_key_ref(ref);
6578  _set_indentation(startindent);
6579  // keep going in RVAL
6580  addrem_flags(RVAL, RNXT);
6581  }
6582  else
6583  {
6584  _c4dbgp("mapblck[RVAL]: was val ref");
6585  _handle_annotations_before_blck_val_scalar();
6586  m_evt_handler->set_val_ref(ref);
6587  addrem_flags(RNXT, RVAL);
6588  }
6589  }
6590  _maybe_skip_whitespace_tokens();
6591  }
6592  else if(first == '&')
6593  {
6594  csubstr anchor = _scan_anchor();
6595  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6596  if(startindent == m_evt_handler->m_curr->indref)
6597  {
6598  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6599  m_evt_handler->set_val_scalar_plain({});
6600  m_evt_handler->add_sibling();
6601  addrem_flags(RKEY, RVAL);
6602  }
6603  // we need to buffer the anchors, as there may be two
6604  // consecutive anchors in here
6605  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6606  }
6607  else if(first == '!')
6608  {
6609  csubstr tag = _scan_tag();
6610  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6611  if(startindent == m_evt_handler->m_curr->indref)
6612  {
6613  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6614  _handle_annotations_before_blck_val_scalar();
6615  m_evt_handler->set_val_scalar_plain({});
6616  m_evt_handler->add_sibling();
6617  addrem_flags(RKEY, RVAL);
6618  }
6619  // we need to buffer the tags, as there may be two
6620  // consecutive tags in here
6621  _add_annotation(&m_pending_tags, tag, startindent, startline);
6622  }
6623  else if(first == '?')
6624  {
6625  if(startindent == m_evt_handler->m_curr->indref)
6626  {
6627  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6628  _handle_annotations_before_blck_val_scalar();
6629  m_evt_handler->set_val_scalar_plain({});
6630  m_evt_handler->add_sibling();
6631  addrem_flags(QMRK, RVAL);
6632  }
6633  else if(startindent > m_evt_handler->m_curr->indref)
6634  {
6635  _c4dbgp("mapblck[RVAL]: start val mapblck");
6636  addrem_flags(RNXT, RVAL);
6637  _handle_annotations_before_blck_val_scalar();
6638  m_evt_handler->begin_map_val_block();
6639  addrem_flags(QMRK|BLCK, RNXT);
6640  _set_indentation(startindent);
6641  }
6642  else
6643  {
6644  _c4err("parse error");
6645  }
6646  m_was_inside_qmrk = true;
6647  _line_progressed(1);
6648  _maybe_skip_whitespace_tokens();
6649  goto mapblck_again;
6650  }
6651  else if(first == ':')
6652  {
6653  if(startindent == m_evt_handler->m_curr->indref)
6654  {
6655  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6656  m_evt_handler->set_val_scalar_plain({});
6657  m_evt_handler->add_sibling();
6658  m_evt_handler->set_key_scalar_plain({});
6659  _line_progressed(1);
6660  _maybe_skip_whitespace_tokens();
6661  goto mapblck_again;
6662  }
6663  else
6664  {
6665  _c4err("parse error");
6666  }
6667  }
6668  else if(first == '.')
6669  {
6670  _c4dbgp("mapblck[RVAL]: maybe doc?");
6671  csubstr rs = rem.sub(1);
6672  if(rs == ".." || rs.begins_with(".. "))
6673  {
6674  _c4dbgp("seqblck[RVAL]: end doc expl");
6675  _end_doc_suddenly();
6676  _line_progressed(3);
6677  _maybe_skip_whitespace_tokens();
6678  goto mapblck_finish;
6679  }
6680  else
6681  {
6682  _c4err("parse error");
6683  }
6684  }
6686  else if(first == '\t')
6687  {
6688  _c4dbgp("mapblck[RVAL]: skip tabs");
6689  _maybe_skipchars('\t');
6690  })
6691  else
6692  {
6693  _c4err("parse error");
6694  }
6695  }
6696  else if(has_any(RNXT))
6697  {
6698  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6699  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6700  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6701  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6702  //
6703  // handle indentation
6704  //
6705  if(m_evt_handler->m_curr->at_line_beginning())
6706  {
6707  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6708  if(m_evt_handler->m_curr->indentation_eq())
6709  {
6710  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6711  _line_progressed(m_evt_handler->m_curr->indref);
6712  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6713  m_evt_handler->add_sibling();
6714  addrem_flags(RKEY, RNXT);
6715  goto mapblck_again;
6716  }
6717  else if(m_evt_handler->m_curr->indentation_lt())
6718  {
6719  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6720  _handle_indentation_pop_from_block_map();
6721  if(has_all(RMAP|BLCK))
6722  {
6723  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6724  if(!has_any(RKCL))
6725  {
6726  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6727  m_evt_handler->add_sibling();
6728  addrem_flags(RKEY, RNXT);
6729  }
6730  goto mapblck_again;
6731  }
6732  else
6733  {
6734  goto mapblck_finish;
6735  }
6736  }
6737  }
6738  //
6739  // handle tokens
6740  //
6741  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6742  const char first = rem.str[0];
6743  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6744  if(first == ':')
6745  {
6746  if(m_evt_handler->m_curr->more_indented)
6747  {
6748  _c4dbgp("mapblck[RNXT]: start child block map");
6749  C4_NOT_IMPLEMENTED();
6750  //m_evt_handler->actually_as_block_map();
6751  _line_progressed(1);
6752  _set_indentation(m_evt_handler->m_curr->scalar_col);
6753  m_evt_handler->m_curr->more_indented = false;
6754  goto mapblck_again;
6755  }
6756  else
6757  {
6758  _c4err("parse error");
6759  }
6760  }
6761  else if(first == ' ')
6762  {
6763  _c4dbgp("mapblck[RNXT]: skip spaces");
6764  _maybe_skip_whitespace_tokens();
6765  }
6766  else
6767  {
6768  _c4err("parse error");
6769  }
6770  }
6771  else if(has_any(QMRK))
6772  {
6773  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6774  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6775  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6776  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6777  //
6778  // handle indentation
6779  //
6780  if(m_evt_handler->m_curr->at_line_beginning())
6781  {
6782  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
6783  if(m_evt_handler->m_curr->indentation_eq())
6784  {
6785  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6786  _line_progressed(m_evt_handler->m_curr->indref);
6787  rem = m_evt_handler->m_curr->line_contents.rem;
6788  if(!rem.len)
6789  goto mapblck_again;
6790  }
6791  else if(m_evt_handler->m_curr->indentation_lt())
6792  {
6793  _c4dbgp("mapblck[QMRK]: smaller indentation!");
6794  _handle_indentation_pop_from_block_map();
6795  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6796  if(has_all(RMAP|BLCK))
6797  {
6798  _c4dbgp("mapblck[QMRK]: still mapblck!");
6799  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
6800  rem = m_evt_handler->m_curr->line_contents.rem;
6801  if(!rem.len)
6802  goto mapblck_again;
6803  }
6804  else
6805  {
6806  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
6807  goto mapblck_finish;
6808  }
6809  }
6810  // indentation can be larger in QMRK state
6811  else
6812  {
6813  _c4dbgp("mapblck[QMRK]: larger indentation !");
6814  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6815  rem = m_evt_handler->m_curr->line_contents.rem;
6816  if(!rem.len)
6817  goto mapblck_again;
6818  }
6819  }
6820  //
6821  // now handle the tokens
6822  //
6823  const char first = rem.str[0];
6824  const size_t startline = m_evt_handler->m_curr->pos.line;
6825  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6826  _c4dbgpf("mapblck[QMRK]: '{}'", first);
6827  ScannedScalar sc;
6828  if(first == '\'')
6829  {
6830  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
6831  sc = _scan_scalar_squot();
6832  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6833  if(!_maybe_scan_following_colon())
6834  {
6835  _c4dbgp("mapblck[QMRK]: set as key");
6836  _handle_annotations_before_blck_key_scalar();
6837  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6838  addrem_flags(RKCL, QMRK);
6839  }
6840  else
6841  {
6842  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6843  addrem_flags(RKCL, QMRK);
6844  _handle_annotations_before_start_mapblck_as_key();
6845  m_evt_handler->begin_map_key_block();
6846  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6847  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6848  _maybe_skip_whitespace_tokens();
6849  _set_indentation(startindent);
6850  // keep the child state on RVAL
6851  addrem_flags(RVAL, RKCL|QMRK);
6852  }
6853  }
6854  else if(first == '"')
6855  {
6856  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
6857  sc = _scan_scalar_dquot();
6858  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6859  if(!_maybe_scan_following_colon())
6860  {
6861  _c4dbgp("mapblck[QMRK]: set as key");
6862  _handle_annotations_before_blck_key_scalar();
6863  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6864  addrem_flags(RKCL, QMRK);
6865  }
6866  else
6867  {
6868  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6869  addrem_flags(RKCL, QMRK);
6870  _handle_annotations_before_start_mapblck_as_key();
6871  m_evt_handler->begin_map_key_block();
6872  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6873  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6874  _maybe_skip_whitespace_tokens();
6875  _set_indentation(startindent);
6876  // keep the child state on RVAL
6877  addrem_flags(RVAL, RKCL|QMRK);
6878  }
6879  }
6880  else if(first == '|')
6881  {
6882  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
6883  ScannedBlock sb;
6884  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6885  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
6886  _handle_annotations_before_blck_key_scalar();
6887  m_evt_handler->set_key_scalar_literal(maybe_filtered);
6888  addrem_flags(RKCL, QMRK);
6889  }
6890  else if(first == '>')
6891  {
6892  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
6893  ScannedBlock sb;
6894  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6895  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
6896  _handle_annotations_before_blck_key_scalar();
6897  m_evt_handler->set_key_scalar_folded(maybe_filtered);
6898  addrem_flags(RKCL, QMRK);
6899  }
6900  else if(_scan_scalar_plain_map_blck(&sc))
6901  {
6902  _c4dbgp("mapblck[QMRK]: plain scalar");
6903  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6904  if(!_maybe_scan_following_colon())
6905  {
6906  _c4dbgp("mapblck[QMRK]: set as key");
6907  _handle_annotations_before_blck_key_scalar();
6908  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6909  addrem_flags(RKCL, QMRK);
6910  }
6911  else
6912  {
6913  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6914  addrem_flags(RKCL, QMRK);
6915  _handle_annotations_before_start_mapblck_as_key();
6916  m_evt_handler->begin_map_key_block();
6917  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6918  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6919  _maybe_skip_whitespace_tokens();
6920  _set_indentation(startindent);
6921  // keep the child state on RVAL
6922  addrem_flags(RVAL, RKCL|QMRK);
6923  }
6924  }
6925  else if(first == ':')
6926  {
6927  if(startindent == m_evt_handler->m_curr->indref)
6928  {
6929  _c4dbgp("mapblck[QMRK]: empty key");
6930  addrem_flags(RVAL, QMRK);
6931  _handle_annotations_before_blck_key_scalar();
6932  m_evt_handler->set_key_scalar_plain({});
6933  _line_progressed(1);
6934  _maybe_skip_whitespace_tokens();
6935  }
6936  else
6937  {
6938  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
6939  addrem_flags(RKCL, QMRK);
6940  _handle_annotations_before_start_mapblck_as_key();
6941  m_evt_handler->begin_map_key_block();
6942  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6943  m_evt_handler->set_key_scalar_plain({});
6944  _line_progressed(1);
6945  _maybe_skip_whitespace_tokens();
6946  _set_indentation(startindent);
6947  // keep the child state on RVAL
6948  addrem_flags(RVAL, RKCL|QMRK);
6949  }
6950  }
6951  else if(first == '*')
6952  {
6953  csubstr ref = _scan_ref_map();
6954  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
6955  if(!_maybe_scan_following_colon())
6956  {
6957  _c4dbgp("mapblck[QMRK]: set ref as key");
6958  _handle_annotations_before_blck_key_scalar();
6959  m_evt_handler->set_key_ref(ref);
6960  addrem_flags(RKCL, QMRK);
6961  }
6962  else
6963  {
6964  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
6965  addrem_flags(RKCL, QMRK);
6966  _handle_annotations_before_blck_key_scalar();
6967  m_evt_handler->begin_map_key_block();
6968  m_evt_handler->set_key_ref(ref);
6969  _set_indentation(startindent);
6970  // keep the child state on RVAL
6971  addrem_flags(RVAL, RKCL|QMRK);
6972  }
6973  _maybe_skip_whitespace_tokens();
6974  }
6975  else if(first == '&')
6976  {
6977  csubstr anchor = _scan_anchor();
6978  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6979  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6980  }
6981  else if(first == '!')
6982  {
6983  csubstr tag = _scan_tag();
6984  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
6985  _add_annotation(&m_pending_tags, tag, startindent, startline);
6986  }
6987  else if(first == '-')
6988  {
6989  _c4dbgp("mapblck[QMRK]: maybe doc?");
6990  csubstr rs = rem.sub(1);
6991  if(rs == "--" || rs.begins_with("-- "))
6992  {
6993  _c4dbgp("mapblck[QMRK]: end+start doc");
6994  _start_doc_suddenly();
6995  _line_progressed(3);
6996  }
6997  else
6998  {
6999  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7000  addrem_flags(RKCL, RKEY|QMRK);
7001  m_evt_handler->begin_seq_key_block();
7002  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
7003  _set_indentation(startindent);
7004  _line_progressed(1);
7005  }
7006  _maybe_skip_whitespace_tokens();
7007  goto mapblck_finish;
7008  }
7009  else if(first == '[')
7010  {
7011  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7012  addrem_flags(RKCL, RKEY|QMRK);
7013  m_evt_handler->begin_seq_key_flow();
7014  addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
7015  _set_indentation(m_evt_handler->m_parent->indref);
7016  _line_progressed(1);
7017  goto mapblck_finish;
7018  }
7019  else if(first == '{')
7020  {
7021  _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
7022  addrem_flags(RKCL, RKEY|QMRK);
7023  m_evt_handler->begin_map_key_flow();
7024  addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
7025  _set_indentation(m_evt_handler->m_parent->indref);
7026  _line_progressed(1);
7027  goto mapblck_finish;
7028  }
7029  else if(first == '?')
7030  {
7031  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7032  m_evt_handler->set_key_scalar_plain({});
7033  m_evt_handler->set_val_scalar_plain({});
7034  m_evt_handler->add_sibling();
7035  _line_progressed(1);
7036  }
7037  else if(first == '.')
7038  {
7039  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7040  csubstr rs = rem.sub(1);
7041  if(rs == ".." || rs.begins_with(".. "))
7042  {
7043  _c4dbgp("mapblck[QMRK]: end+start doc");
7044  _end_doc_suddenly();
7045  _line_progressed(3);
7046  goto mapblck_finish;
7047  }
7048  else
7049  {
7050  _c4err("parse error");
7051  }
7052  }
7053  else
7054  {
7055  _c4err("parse error");
7056  }
7057  }
7058 
7059  mapblck_again:
7060  _c4dbgt("mapblck: again", 0);
7061  if(_finished_line())
7062  {
7063  _line_ended();
7064  _scan_line();
7065  if(_finished_file())
7066  {
7067  _c4dbgp("mapblck: file finished!");
7068  _end_map_blck();
7069  goto mapblck_finish;
7070  }
7071  _c4dbgnextline();
7072  }
7073  goto mapblck_start;
7074 
7075  mapblck_finish:
7076  _c4dbgp("mapblck: finish");
7077 }
7078 
7079 
7080 //-----------------------------------------------------------------------------
7081 
7082 template<class EventHandler>
7083 void ParseEngine<EventHandler>::_handle_unk_json()
7084 {
7085  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7086 
7087  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7088  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7089 
7090  _maybe_skip_comment();
7091  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7092  if(!rem.len)
7093  return;
7094 
7095  size_t pos = rem.first_not_of(" \t");
7096  if(pos)
7097  {
7098  pos = pos != npos ? pos : rem.len;
7099  _c4dbgpf("skipping indentation of {}", pos);
7100  _line_progressed(pos);
7101  rem = m_evt_handler->m_curr->line_contents.rem;
7102  if(!rem.len)
7103  return;
7104  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7105  }
7106 
7107  if(rem.begins_with('['))
7108  {
7109  _c4dbgp("it's a seq");
7110  m_evt_handler->check_trailing_doc_token();
7111  _maybe_begin_doc();
7112  m_evt_handler->begin_seq_val_flow();
7113  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7114  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7115  m_doc_empty = false;
7116  _line_progressed(1);
7117  }
7118  else if(rem.begins_with('{'))
7119  {
7120  _c4dbgp("it's a map");
7121  m_evt_handler->check_trailing_doc_token();
7122  _maybe_begin_doc();
7123  m_evt_handler->begin_map_val_flow();
7124  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7125  m_doc_empty = false;
7126  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7127  _line_progressed(1);
7128  }
7129  else
7130  {
7131  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7132  _maybe_skip_whitespace_tokens();
7133  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7134  if(!s.len)
7135  return;
7136  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7137  const char first = s.str[0];
7138  ScannedScalar sc;
7139  if(first == '"')
7140  {
7141  _c4dbgp("runk_json: scanning double-quoted scalar");
7142  m_evt_handler->check_trailing_doc_token();
7143  _maybe_begin_doc();
7144  add_flags(RDOC);
7145  m_doc_empty = false;
7146  sc = _scan_scalar_dquot();
7147  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7148  if(!_maybe_scan_following_colon())
7149  {
7150  _c4dbgp("runk_json: set as val");
7151  _handle_annotations_before_blck_val_scalar();
7152  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7153  }
7154  else
7155  {
7156  _c4err("parse error");
7157  }
7158  }
7159  else if(_scan_scalar_plain_unk(&sc))
7160  {
7161  _c4dbgp("runk_json: got a plain scalar");
7162  m_evt_handler->check_trailing_doc_token();
7163  _maybe_begin_doc();
7164  add_flags(RDOC);
7165  m_doc_empty = false;
7166  if(!_maybe_scan_following_colon())
7167  {
7168  _c4dbgp("runk_json: set as val");
7169  _handle_annotations_before_blck_val_scalar();
7170  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7171  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7172  }
7173  else
7174  {
7175  _c4err("parse error");
7176  }
7177  }
7178  else
7179  {
7180  _c4err("parse error");
7181  }
7182  }
7183 }
7184 
7185 
7186 //-----------------------------------------------------------------------------
7187 
7188 template<class EventHandler>
7189 void ParseEngine<EventHandler>::_handle_unk()
7190 {
7191  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7192 
7193  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7194  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7195 
7196  _maybe_skip_comment();
7197  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7198  if(!rem.len)
7199  return;
7200 
7201  size_t pos = rem.first_not_of(" \t");
7202  if(pos)
7203  {
7204  pos = pos != npos ? pos : rem.len;
7205  _c4dbgpf("skipping {} whitespace characters", pos);
7206  _line_progressed(pos);
7207  rem = m_evt_handler->m_curr->line_contents.rem;
7208  if(!rem.len)
7209  return;
7210  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7211  }
7212 
7213  if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7214  {
7215  const char first = rem.str[0];
7216  _c4dbgp("rtop: zero indent + at line begin");
7217  if(first == '-')
7218  {
7219  _c4dbgp("rtop: suspecting doc");
7220  if(_is_doc_begin_token(rem))
7221  {
7222  _c4dbgp("rtop: begin doc");
7223  _maybe_end_doc();
7224  _begin2_doc_expl();
7225  _set_indentation(0);
7226  addrem_flags(RDOC|RUNK, NDOC);
7227  _line_progressed(3u);
7228  _maybe_skip_whitespace_tokens();
7229  return;
7230  }
7231  }
7232  else if(first == '.')
7233  {
7234  _c4dbgp("rtop: suspecting doc end");
7235  if(_is_doc_end_token(rem))
7236  {
7237  _c4dbgp("rtop: end doc");
7238  if(has_any(RDOC))
7239  {
7240  _end2_doc_expl();
7241  }
7242  else
7243  {
7244  _c4dbgp("rtop: ignore end doc");
7245  }
7246  addrem_flags(NDOC|RUNK, RDOC);
7247  _line_progressed(3u);
7248  _maybe_skip_whitespace_tokens();
7249  return;
7250  }
7251  }
7252  else if(first == '%')
7253  {
7254  _c4dbgpf("directive: {}", rem);
7255  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7256  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
7257  _handle_directive(rem);
7258  return;
7259  }
7260  }
7261 
7262  /* no else-if! */
7263  char first = rem.str[0];
7264 
7265  if(first == '[')
7266  {
7267  m_evt_handler->check_trailing_doc_token();
7268  _maybe_begin_doc();
7269  m_doc_empty = false;
7270  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7271  if(C4_LIKELY( ! _annotations_require_key_container()))
7272  {
7273  _c4dbgp("it's a seq, flow");
7274  _handle_annotations_before_blck_val_scalar();
7275  m_evt_handler->begin_seq_val_flow();
7276  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7277  _set_indentation(startindent);
7278  }
7279  else
7280  {
7281  _c4dbgp("start new block map, set flow seq as key (!)");
7282  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7283  m_evt_handler->begin_map_val_block();
7284  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7285  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7286  m_evt_handler->begin_seq_key_flow();
7287  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
7288  _set_indentation(startindent);
7289  }
7290  _line_progressed(1);
7291  }
7292  else if(first == '{')
7293  {
7294  m_evt_handler->check_trailing_doc_token();
7295  _maybe_begin_doc();
7296  m_doc_empty = false;
7297  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7298  if(C4_LIKELY( ! _annotations_require_key_container()))
7299  {
7300  _c4dbgp("it's a map, flow");
7301  _handle_annotations_before_blck_val_scalar();
7302  m_evt_handler->begin_map_val_flow();
7303  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7304  _set_indentation(startindent);
7305  }
7306  else
7307  {
7308  _c4dbgp("start new block map, set flow map as key (!)");
7309  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7310  m_evt_handler->begin_map_val_block();
7311  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7312  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7313  m_evt_handler->begin_map_key_flow();
7314  addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
7315  _set_indentation(startindent);
7316  }
7317  _line_progressed(1);
7318  }
7319  else if(first == '-' && _is_blck_token(rem))
7320  {
7321  _c4dbgp("it's a seq, block");
7322  m_evt_handler->check_trailing_doc_token();
7323  _maybe_begin_doc();
7324  _handle_annotations_before_blck_val_scalar();
7325  m_evt_handler->begin_seq_val_block();
7326  addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7327  m_doc_empty = false;
7328  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7329  _line_progressed(1);
7330  _maybe_skip_whitespace_tokens();
7331  }
7332  else if(first == '?' && _is_blck_token(rem))
7333  {
7334  _c4dbgp("it's a map + this key is complex");
7335  m_evt_handler->check_trailing_doc_token();
7336  _maybe_begin_doc();
7337  _handle_annotations_before_blck_val_scalar();
7338  m_evt_handler->begin_map_val_block();
7339  addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7340  m_doc_empty = false;
7341  m_was_inside_qmrk = true;
7342  _save_indentation();
7343  _line_progressed(1);
7344  _maybe_skip_whitespace_tokens();
7345  }
7346  else if(first == ':' && _is_blck_token(rem))
7347  {
7348  if(m_doc_empty)
7349  {
7350  _c4dbgp("it's a map with an empty key");
7351  m_evt_handler->check_trailing_doc_token();
7352  _maybe_begin_doc();
7353  _handle_annotations_before_blck_val_scalar();
7354  m_evt_handler->begin_map_val_block();
7355  m_evt_handler->set_key_scalar_plain({});
7356  m_doc_empty = false;
7357  _save_indentation();
7358  }
7359  else
7360  {
7361  _c4dbgp("actually prev val is a key!");
7362  size_t prev_indentation = m_evt_handler->m_curr->indref;
7363  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7364  _set_indentation(prev_indentation);
7365  }
7366  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7367  _line_progressed(1);
7368  _maybe_skip_whitespace_tokens();
7369  }
7370  else if(first == '&')
7371  {
7372  csubstr anchor = _scan_anchor();
7373  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7374  m_evt_handler->check_trailing_doc_token();
7375  _maybe_begin_doc();
7376  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7377  const size_t line = m_evt_handler->m_curr->pos.line;
7378  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7379  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7380  m_doc_empty = false;
7381  }
7382  else if(first == '*')
7383  {
7384  csubstr ref = _scan_ref_map();
7385  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7386  m_evt_handler->check_trailing_doc_token();
7387  _maybe_begin_doc();
7388  m_doc_empty = false;
7389  if(!_maybe_scan_following_colon())
7390  {
7391  _c4dbgp("runk: set val ref");
7392  _handle_annotations_before_blck_val_scalar();
7393  m_evt_handler->set_val_ref(ref);
7394  }
7395  else
7396  {
7397  _c4dbgp("runk: start new block map, set ref as key");
7398  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7399  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7400  _handle_annotations_before_start_mapblck(startline);
7401  m_evt_handler->begin_map_val_block();
7402  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7403  m_evt_handler->set_key_ref(ref);
7404  _maybe_skip_whitespace_tokens();
7405  _set_indentation(startindent);
7406  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7407  }
7408  }
7409  else if(first == '!')
7410  {
7411  csubstr tag = _scan_tag();
7412  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7413  // we need to buffer the tags, as there may be two
7414  // consecutive tags in here
7415  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7416  const size_t line = m_evt_handler->m_curr->pos.line;
7417  _add_annotation(&m_pending_tags, tag, indentation, line);
7418  }
7419  else
7420  {
7421  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7422  _maybe_skip_whitespace_tokens();
7423  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7424  if(!s.len)
7425  return;
7426  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7427  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7428  first = s.str[0];
7429  ScannedScalar sc;
7430  if(first == '\'')
7431  {
7432  _c4dbgp("runk: scanning single-quoted scalar");
7433  m_evt_handler->check_trailing_doc_token();
7434  _maybe_begin_doc();
7435  add_flags(RDOC);
7436  m_doc_empty = false;
7437  sc = _scan_scalar_squot();
7438  if(!_maybe_scan_following_colon())
7439  {
7440  _c4dbgp("runk: set as val");
7441  _handle_annotations_before_blck_val_scalar();
7442  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7443  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7444  }
7445  else
7446  {
7447  _c4dbgp("runk: start new block map, set scalar as key");
7448  _handle_annotations_before_start_mapblck(startline);
7449  m_evt_handler->begin_map_val_block();
7450  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7451  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7452  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7453  _maybe_skip_whitespace_tokens();
7454  _set_indentation(startindent);
7455  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7456  }
7457  }
7458  else if(first == '"')
7459  {
7460  _c4dbgp("runk: scanning double-quoted scalar");
7461  m_evt_handler->check_trailing_doc_token();
7462  _maybe_begin_doc();
7463  add_flags(RDOC);
7464  m_doc_empty = false;
7465  sc = _scan_scalar_dquot();
7466  if(!_maybe_scan_following_colon())
7467  {
7468  _c4dbgp("runk: set as val");
7469  _handle_annotations_before_blck_val_scalar();
7470  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7471  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7472  }
7473  else
7474  {
7475  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7476  _handle_annotations_before_start_mapblck(startline);
7477  m_evt_handler->begin_map_val_block();
7478  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7479  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7480  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7481  _maybe_skip_whitespace_tokens();
7482  _set_indentation(startindent);
7483  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7484  }
7485  }
7486  else if(first == '|')
7487  {
7488  _c4dbgp("runk: scanning block-literal scalar");
7489  m_evt_handler->check_trailing_doc_token();
7490  _maybe_begin_doc();
7491  add_flags(RDOC);
7492  m_doc_empty = false;
7493  ScannedBlock sb;
7494  _scan_block(&sb, startindent);
7495  if(C4_LIKELY(!_maybe_scan_following_colon()))
7496  {
7497  _c4dbgp("runk: set as val");
7498  _handle_annotations_before_blck_val_scalar();
7499  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7500  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7501  }
7502  else
7503  {
7504  _c4err("block literal keys must be enclosed in '?'");
7505  }
7506  }
7507  else if(first == '>')
7508  {
7509  _c4dbgp("runk: scanning block-folded scalar");
7510  m_evt_handler->check_trailing_doc_token();
7511  _maybe_begin_doc();
7512  add_flags(RDOC);
7513  m_doc_empty = false;
7514  ScannedBlock sb;
7515  _scan_block(&sb, startindent);
7516  if(C4_LIKELY(!_maybe_scan_following_colon()))
7517  {
7518  _c4dbgp("runk: set as val");
7519  _handle_annotations_before_blck_val_scalar();
7520  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7521  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7522  }
7523  else
7524  {
7525  _c4err("block folded keys must be enclosed in '?'");
7526  }
7527  }
7528  else if(_scan_scalar_plain_unk(&sc))
7529  {
7530  _c4dbgp("runk: got a plain scalar");
7531  m_evt_handler->check_trailing_doc_token();
7532  _maybe_begin_doc();
7533  add_flags(RDOC);
7534  m_doc_empty = false;
7535  if(!_maybe_scan_following_colon())
7536  {
7537  _c4dbgp("runk: set as val");
7538  _handle_annotations_before_blck_val_scalar();
7539  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7540  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7541  }
7542  else
7543  {
7544  _c4dbgp("runk: start new block map, set scalar as key");
7545  _handle_annotations_before_start_mapblck(startline);
7546  m_evt_handler->begin_map_val_block();
7547  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7548  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7549  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7550  _maybe_skip_whitespace_tokens();
7551  _set_indentation(startindent);
7552  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7553  }
7554  }
7555  }
7556 }
7557 
7558 
7559 //-----------------------------------------------------------------------------
7560 
7561 template<class EventHandler>
7562 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7563 {
7564  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7565 
7566  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
7567 
7568  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7569  if(has_any(RNXT))
7570  {
7571  _c4dbgp("usty[RNXT]: finishing!");
7572  _end_stream();
7573  }
7574  #endif
7575 
7576  _maybe_skip_comment();
7577  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7578  if(!rem.len)
7579  return;
7580 
7581  size_t pos = rem.first_not_of(" \t");
7582  if(pos)
7583  {
7584  pos = pos != npos ? pos : rem.len;
7585  _c4dbgpf("skipping indentation of {}", pos);
7586  _line_progressed(pos);
7587  rem = m_evt_handler->m_curr->line_contents.rem;
7588  if(!rem.len)
7589  return;
7590  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7591  }
7592 
7593  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7594  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7595  char first = rem.str[0];
7596  if(has_any(RSEQ)) // destination is a sequence
7597  {
7598  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7599  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7600  if(first == '[')
7601  {
7602  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7603  add_flags(RNXT);
7604  m_evt_handler->_push();
7605  addrem_flags(FLOW|RVAL, RNXT|USTY);
7606  _set_indentation(startindent);
7607  _line_progressed(1);
7608  _maybe_skip_whitespace_tokens();
7609  }
7610  else if(first == '-' && _is_blck_token(rem))
7611  {
7612  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7613  add_flags(RNXT);
7614  m_evt_handler->_push();
7615  addrem_flags(BLCK|RVAL, RNXT|USTY);
7616  _set_indentation(startindent);
7617  _line_progressed(1);
7618  _maybe_skip_whitespace_tokens();
7619  }
7620  else
7621  {
7622  _c4err("can only parse a seq into an existing seq");
7623  }
7624  }
7625  else if(has_any(RMAP)) // destination is a map
7626  {
7627  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7628  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7629  if(first == '{')
7630  {
7631  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7632  add_flags(RNXT);
7633  _handle_annotations_before_blck_val_scalar();
7634  m_evt_handler->_push();
7635  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7636  _set_indentation(startindent);
7637  _line_progressed(1);
7638  _maybe_skip_whitespace_tokens();
7639  }
7640  else if(first == '?' && _is_blck_token(rem))
7641  {
7642  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7643  add_flags(RNXT);
7644  _handle_annotations_before_blck_val_scalar();
7645  m_evt_handler->_push();
7646  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7647  m_was_inside_qmrk = true;
7648  _save_indentation();
7649  _line_progressed(1);
7650  _maybe_skip_whitespace_tokens();
7651  }
7652  else if(first == ':' && _is_blck_token(rem))
7653  {
7654  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7655  add_flags(RNXT);
7656  _handle_annotations_before_blck_val_scalar();
7657  m_evt_handler->_push();
7658  m_evt_handler->set_key_scalar_plain({});
7659  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7660  _save_indentation();
7661  _line_progressed(1);
7662  _maybe_skip_whitespace_tokens();
7663  }
7664  else if(rem.begins_with('&'))
7665  {
7666  csubstr anchor = _scan_anchor();
7667  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7668  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7669  const size_t line = m_evt_handler->m_curr->pos.line;
7670  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7671  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7672  }
7673  else if(first == '*')
7674  {
7675  csubstr ref = _scan_ref_map();
7676  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7677  if(!_maybe_scan_following_colon())
7678  {
7679  _c4err("cannot read a VAL to a map");
7680  }
7681  else
7682  {
7683  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7684  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7685  add_flags(RNXT);
7686  _handle_annotations_before_start_mapblck(startline);
7687  m_evt_handler->_push();
7688  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7689  m_evt_handler->set_key_ref(ref);
7690  _maybe_skip_whitespace_tokens();
7691  _set_indentation(startindent);
7692  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7693  }
7694  }
7695  else if(first == '!')
7696  {
7697  csubstr tag = _scan_tag();
7698  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7699  // we need to buffer the tags, as there may be two
7700  // consecutive tags in here
7701  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7702  const size_t line = m_evt_handler->m_curr->pos.line;
7703  _add_annotation(&m_pending_tags, tag, indentation, line);
7704  }
7705  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7706  {
7707  _c4err("cannot parse a seq into an existing map");
7708  }
7709  else
7710  {
7711  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7712  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7713  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7714  ScannedScalar sc;
7715  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7716  if(first == '\'')
7717  {
7718  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7719  sc = _scan_scalar_squot();
7720  if(!_maybe_scan_following_colon())
7721  {
7722  _c4err("cannot read a VAL to a map");
7723  }
7724  else
7725  {
7726  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7727  add_flags(RNXT);
7728  _handle_annotations_before_start_mapblck(startline);
7729  m_evt_handler->_push();
7730  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7731  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7732  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7733  _set_indentation(startindent);
7734  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7735  _maybe_skip_whitespace_tokens();
7736  }
7737  }
7738  else if(first == '"')
7739  {
7740  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
7741  sc = _scan_scalar_dquot();
7742  if(!_maybe_scan_following_colon())
7743  {
7744  _c4err("cannot read a VAL to a map");
7745  }
7746  else
7747  {
7748  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
7749  add_flags(RNXT);
7750  _handle_annotations_before_start_mapblck(startline);
7751  m_evt_handler->_push();
7752  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7753  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7754  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7755  _set_indentation(startindent);
7756  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7757  _maybe_skip_whitespace_tokens();
7758  }
7759  }
7760  else if(first == '|')
7761  {
7762  _c4err("block literal keys must be enclosed in '?'");
7763  }
7764  else if(first == '>')
7765  {
7766  _c4err("block literal keys must be enclosed in '?'");
7767  }
7768  else if(_scan_scalar_plain_unk(&sc))
7769  {
7770  _c4dbgp("usty[RMAP]: got a plain scalar");
7771  if(!_maybe_scan_following_colon())
7772  {
7773  _c4err("cannot read a VAL to a map");
7774  }
7775  else
7776  {
7777  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7778  add_flags(RNXT);
7779  _handle_annotations_before_start_mapblck(startline);
7780  m_evt_handler->_push();
7781  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7782  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7783  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7784  _set_indentation(startindent);
7785  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7786  _maybe_skip_whitespace_tokens();
7787  }
7788  }
7789  else
7790  {
7791  _c4err("parse error");
7792  }
7793  }
7794  }
7795  else // destination is unknown
7796  {
7797  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7798  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
7799  if(first == '[')
7800  {
7801  _c4dbgp("usty[UNK]: it's a flow seq");
7802  add_flags(RNXT);
7803  _handle_annotations_before_blck_val_scalar();
7804  m_evt_handler->begin_seq_val_flow();
7805  addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
7806  _set_indentation(startindent);
7807  _line_progressed(1);
7808  _maybe_skip_whitespace_tokens();
7809  }
7810  else if(first == '-' && _is_blck_token(rem))
7811  {
7812  _c4dbgp("usty[UNK]: it's a block seq");
7813  add_flags(RNXT);
7814  _handle_annotations_before_blck_val_scalar();
7815  m_evt_handler->begin_seq_val_block();
7816  addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
7817  _set_indentation(startindent);
7818  _line_progressed(1);
7819  _maybe_skip_whitespace_tokens();
7820  }
7821  else if(first == '{')
7822  {
7823  _c4dbgp("usty[UNK]: it's a flow map");
7824  add_flags(RNXT);
7825  _handle_annotations_before_blck_val_scalar();
7826  m_evt_handler->begin_map_val_flow();
7827  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7828  _set_indentation(startindent);
7829  _line_progressed(1);
7830  _maybe_skip_whitespace_tokens();
7831  }
7832  else if(first == '?' && _is_blck_token(rem))
7833  {
7834  _c4dbgp("usty[UNK]: it's a map + this key is complex");
7835  add_flags(RNXT);
7836  _handle_annotations_before_blck_val_scalar();
7837  m_evt_handler->begin_map_val_block();
7838  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7839  m_was_inside_qmrk = true;
7840  _save_indentation();
7841  _line_progressed(1);
7842  _maybe_skip_whitespace_tokens();
7843  }
7844  else if(first == ':' && _is_blck_token(rem))
7845  {
7846  _c4dbgp("usty[UNK]: it's a map with an empty key");
7847  add_flags(RNXT);
7848  _handle_annotations_before_blck_val_scalar();
7849  m_evt_handler->begin_map_val_block();
7850  m_evt_handler->set_key_scalar_plain({});
7851  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7852  _save_indentation();
7853  _line_progressed(1);
7854  _maybe_skip_whitespace_tokens();
7855  }
7856  else if(first == '&')
7857  {
7858  csubstr anchor = _scan_anchor();
7859  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7860  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7861  const size_t line = m_evt_handler->m_curr->pos.line;
7862  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7863  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7864  }
7865  else if(first == '*')
7866  {
7867  csubstr ref = _scan_ref_map();
7868  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
7869  if(!_maybe_scan_following_colon())
7870  {
7871  _c4dbgp("usty[UNK]: set val ref");
7872  _handle_annotations_before_blck_val_scalar();
7873  m_evt_handler->set_val_ref(ref);
7874  }
7875  else
7876  {
7877  _c4dbgp("usty[UNK]: start new block map, set ref as key");
7878  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7879  add_flags(RNXT);
7880  _handle_annotations_before_start_mapblck(startline);
7881  m_evt_handler->begin_map_val_block();
7882  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7883  m_evt_handler->set_key_ref(ref);
7884  _maybe_skip_whitespace_tokens();
7885  _set_indentation(startindent);
7886  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7887  }
7888  }
7889  else if(first == '!')
7890  {
7891  csubstr tag = _scan_tag();
7892  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
7893  // we need to buffer the tags, as there may be two
7894  // consecutive tags in here
7895  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7896  const size_t line = m_evt_handler->m_curr->pos.line;
7897  _add_annotation(&m_pending_tags, tag, indentation, line);
7898  }
7899  else
7900  {
7901  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7902  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7903  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7904  first = rem.str[0];
7905  ScannedScalar sc;
7906  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
7907  if(first == '\'')
7908  {
7909  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
7910  sc = _scan_scalar_squot();
7911  if(!_maybe_scan_following_colon())
7912  {
7913  _c4dbgp("usty[UNK]: set as val");
7914  _handle_annotations_before_blck_val_scalar();
7915  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7916  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7917  _end_stream();
7918  }
7919  else
7920  {
7921  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
7922  add_flags(RNXT);
7923  _handle_annotations_before_start_mapblck(startline);
7924  m_evt_handler->begin_map_val_block();
7925  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7926  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7927  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7928  _set_indentation(startindent);
7929  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7930  _maybe_skip_whitespace_tokens();
7931  }
7932  }
7933  else if(first == '"')
7934  {
7935  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
7936  sc = _scan_scalar_dquot();
7937  if(!_maybe_scan_following_colon())
7938  {
7939  _c4dbgp("usty[UNK]: set as val");
7940  _handle_annotations_before_blck_val_scalar();
7941  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7942  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7943  _end_stream();
7944  }
7945  else
7946  {
7947  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
7948  add_flags(RNXT);
7949  _handle_annotations_before_start_mapblck(startline);
7950  m_evt_handler->begin_map_val_block();
7951  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7952  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7953  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7954  _set_indentation(startindent);
7955  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7956  _maybe_skip_whitespace_tokens();
7957  }
7958  }
7959  else if(first == '|')
7960  {
7961  _c4dbgp("usty[UNK]: scanning block-literal scalar");
7962  ScannedBlock sb;
7963  _scan_block(&sb, startindent);
7964  _c4dbgp("usty[UNK]: set as val");
7965  _handle_annotations_before_blck_val_scalar();
7966  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7967  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7968  _end_stream();
7969  }
7970  else if(first == '>')
7971  {
7972  _c4dbgp("usty[UNK]: scanning block-folded scalar");
7973  ScannedBlock sb;
7974  _scan_block(&sb, startindent);
7975  _c4dbgp("usty[UNK]: set as val");
7976  _handle_annotations_before_blck_val_scalar();
7977  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7978  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7979  _end_stream();
7980  }
7981  else if(_scan_scalar_plain_unk(&sc))
7982  {
7983  _c4dbgp("usty[UNK]: got a plain scalar");
7984  if(!_maybe_scan_following_colon())
7985  {
7986  _c4dbgp("usty[UNK]: set as val");
7987  _handle_annotations_before_blck_val_scalar();
7988  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7989  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7990  _end_stream();
7991  }
7992  else
7993  {
7994  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
7995  add_flags(RNXT);
7996  _handle_annotations_before_start_mapblck(startline);
7997  m_evt_handler->begin_map_val_block();
7998  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7999  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8000  m_evt_handler->set_key_scalar_plain(maybe_filtered);
8001  _set_indentation(startindent);
8002  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
8003  _maybe_skip_whitespace_tokens();
8004  }
8005  }
8006  else
8007  {
8008  _c4err("parse error");
8009  }
8010  }
8011  }
8012 }
8013 
8014 
8015 //-----------------------------------------------------------------------------
8016 
8017 template<class EventHandler>
8018 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8019 {
8020  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8021  m_file = filename;
8022  m_buf = src;
8023  _reset();
8024  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8025  m_evt_handler->begin_stream();
8026  while( ! _finished_file())
8027  {
8028  _scan_line();
8029  while( ! _finished_line())
8030  {
8031  _c4dbgnextline();
8032  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8033  if(has_any(RSEQ))
8034  {
8035  _handle_seq_json();
8036  }
8037  else if(has_any(RMAP))
8038  {
8039  _handle_map_json();
8040  }
8041  else if(has_any(RUNK))
8042  {
8043  _handle_unk_json();
8044  }
8045  else
8046  {
8047  _c4err("internal error");
8048  }
8049  }
8050  if(_finished_file())
8051  break; // it may have finished because of multiline blocks
8052  _line_ended();
8053  }
8054  _end_stream();
8055  m_evt_handler->finish_parse();
8056 }
8057 
8058 
8059 //-----------------------------------------------------------------------------
8060 
8061 template<class EventHandler>
8062 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8063 {
8064  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8065  m_file = filename;
8066  m_buf = src;
8067  _reset();
8068  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8069  m_evt_handler->begin_stream();
8070  while( ! _finished_file())
8071  {
8072  _scan_line();
8073  while( ! _finished_line())
8074  {
8075  _c4dbgnextline();
8076  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8077  if(has_any(FLOW))
8078  {
8079  if(has_none(RSEQIMAP))
8080  {
8081  if(has_any(RSEQ))
8082  {
8083  _handle_seq_flow();
8084  }
8085  else
8086  {
8087  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8088  _handle_map_flow();
8089  }
8090  }
8091  else
8092  {
8093  _handle_seq_imap();
8094  }
8095  }
8096  else if(has_any(BLCK))
8097  {
8098  if(has_any(RSEQ))
8099  {
8100  _handle_seq_block();
8101  }
8102  else
8103  {
8104  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8105  _handle_map_block();
8106  }
8107  }
8108  else if(has_any(RUNK))
8109  {
8110  _handle_unk();
8111  }
8112  else if(has_any(USTY))
8113  {
8114  _handle_usty();
8115  }
8116  else
8117  {
8118  _c4err("internal error");
8119  }
8120  }
8121  if(_finished_file())
8122  break; // it may have finished because of multiline blocks
8123  _line_ended();
8124  }
8125  _end_stream();
8126  m_evt_handler->finish_parse();
8127 }
8128 
8129 } // namespace yml
8130 } // namespace c4
8131 
8132 #undef _c4dbgnextline
8133 
8134 #if defined(_MSC_VER)
8135 # pragma warning(pop)
8136 #elif defined(__clang__)
8137 # pragma clang diagnostic pop
8138 #elif defined(__GNUC__)
8139 # pragma GCC diagnostic pop
8140 #endif
8141 
8142 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
Holds a pointer to an existing tree, and a node id.
Definition: node.hpp:836
Tree const * tree() const noexcept
Definition: node.hpp:908
id_type id() const noexcept
Definition: node.hpp:909
bool readable() const noexcept
because a ConstNodeRef cannot be used to write to the tree, readable() has the same meaning as !...
Definition: node.hpp:894
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&)
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
NodeType type(id_type node) const
Definition: tree.hpp:378
id_type prev_sibling(id_type node) const
Definition: tree.hpp:497
bool has_key(id_type node) const
Definition: tree.hpp:409
id_type parent(id_type node) const
Definition: tree.hpp:495
id_type next_sibling(id_type node) const
Definition: tree.hpp:498
csubstr const & key(id_type node) const
Definition: tree.hpp:381
bool has_val(id_type node) const
Definition: tree.hpp:410
csubstr const & val(id_type node) const
Definition: tree.hpp:387
bool is_container(id_type node) const
Definition: tree.hpp:406
#define RYML_ERRMSG_SIZE
size for the error message buffer
Definition: common.hpp:23
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:48
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1548
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:893
size_t to_chars(substr buf, uint8_t v) noexcept
Definition: charconv.hpp:2328
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:252
@ npos
a null string position
Definition: common.hpp:266
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
Definition: parse.cpp:132
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:259
Definition: common.cpp:12
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _c4dbgfbf(...)
#define _c4dbgchomp(...)
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _c4dbgfsq(fmt,...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _c4dbgfdq(...)
#define _RYML_WITH_TAB_TOKENS(...)
#define _c4dbgfws(...)
#define _c4dbgfps(fmt,...)
#define _c4dbgfbl(...)
#define _c4dbgfb(...)
Filters an input string into a different output string.
a source file position
Definition: common.hpp:296
size_t col
column
Definition: common.hpp:302
size_t line
line
Definition: common.hpp:300
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:298
csubstr name
file name
Definition: common.hpp:304
Options to give to the parser to control its behavior.