rapidyaml  0.7.2
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 #include <c4/dump.hpp>
9 
10 #include <ctype.h>
11 
12 #include "c4/yml/detail/parser_dbg.hpp"
14 #ifdef RYML_DBG
15 #include "c4/yml/detail/print.hpp"
16 #endif
17 
18 
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
23 #else
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
27 #endif
28 
29 
30 // scaffold:
31 #define _c4dbgnextline() \
32  do { \
33  _c4dbgq("\n-----------"); \
34  _c4dbgt("handling line={}, offset={}B", \
35  m_evt_handler->m_curr->pos.line, \
36  m_evt_handler->m_curr->pos.offset); \
37  } while(0)
38 
39 
40 #if defined(_MSC_VER)
41 # pragma warning(push)
42 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
43 # pragma warning(disable: 4702/*unreachable code*/)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
54 # if __GNUC__ >= 7
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
56 # endif
57 #endif
58 
59 namespace c4 {
60 namespace yml {
61 
62 namespace {
63 
64 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
65 {
66  RYML_ASSERT(s.len > 0);
67  RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
68  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
69 }
70 
71 inline bool _is_doc_begin_token(csubstr s)
72 {
73  RYML_ASSERT(s.begins_with('-'));
74  RYML_ASSERT(!s.ends_with("\n"));
75  RYML_ASSERT(!s.ends_with("\r"));
76  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
77  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
78 }
79 
80 inline bool _is_doc_end_token(csubstr s)
81 {
82  RYML_ASSERT(s.begins_with('.'));
83  RYML_ASSERT(!s.ends_with("\n"));
84  RYML_ASSERT(!s.ends_with("\r"));
85  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
86  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
87 }
88 
89 inline bool _is_doc_token(csubstr s) noexcept
90 {
91  //
92  // NOTE: this function was failing under some scenarios when
93  // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
94  // related to optimizer assumptions on the input string and
95  // possibly caused from UB around assignment to that string (the
96  // call site was in _scan_block()). For more details see:
97  //
98  // https://github.com/biojppm/rapidyaml/issues/440
99  //
100  // The current version does not suffer this problem, but it may
101  // appear again.
102  //
103  if(s.len >= 3)
104  {
105  switch(s.str[0])
106  {
107  case '-':
108  //return _is_doc_begin_token(s); // this was failing with gcc -O2
109  return (s.str[1] == '-' && s.str[2] == '-')
110  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
111  case '.':
112  //return _is_doc_end_token(s); // this was failing with gcc -O2
113  return (s.str[1] == '.' && s.str[2] == '.')
114  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
115  }
116  }
117  return false;
118 }
119 
120 inline size_t _is_special_json_scalar(csubstr s)
121 {
122  RYML_ASSERT(s.len);
123  switch(s.str[0])
124  {
125  case 'f':
126  if(s.len >= 5 && s.begins_with("false"))
127  return 5u;
128  break;
129  case 't':
130  if(s.len >= 4 && s.begins_with("true"))
131  return 4u;
132  break;
133  case 'n':
134  if(s.len >= 4 && s.begins_with("null"))
135  return 4u;
136  break;
137  }
138  return 0u;
139 }
140 
141 
142 //-----------------------------------------------------------------------------
143 
144 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
145 {
146  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
147 }
148 
149 //! look for the next newline chars, and jump to the right of those
150 inline substr from_next_line(substr rem)
151 {
152  size_t nlpos = rem.first_of("\r\n");
153  if(nlpos == csubstr::npos)
154  return {};
155  const char nl = rem[nlpos];
156  rem = rem.right_of(nlpos);
157  if(rem.empty())
158  return {};
159  if(_extend_from_combined_newline(nl, rem.front()))
160  rem = rem.sub(1);
161  return rem;
162 }
163 
164 
165 //-----------------------------------------------------------------------------
166 
167 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
168 {
169  RYML_ASSERT(r[*i] == '\n');
170  size_t numnl_following = 0;
171  ++(*i);
172  for( ; *i < r.len; ++(*i))
173  {
174  if(r.str[*i] == '\n')
175  ++numnl_following;
176  // skip leading whitespace
177  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
178  ;
179  else
180  break;
181  }
182  return numnl_following;
183 }
184 
185 /** @p i is set to the first non whitespace character after the line
186  * @return the number of empty lines after the initial position */
187 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
188 {
189  RYML_ASSERT(r[*i] == '\n');
190  size_t numnl_following = 0;
191  ++(*i);
192  if(indentation == 0)
193  {
194  for( ; *i < r.len; ++(*i))
195  {
196  if(r.str[*i] == '\n')
197  ++numnl_following;
198  // skip leading whitespace
199  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
200  ;
201  else
202  break;
203  }
204  }
205  else
206  {
207  for( ; *i < r.len; ++(*i))
208  {
209  if(r.str[*i] == '\n')
210  {
211  ++numnl_following;
212  // skip the indentation after the newline
213  size_t stop = *i + indentation;
214  for( ; *i < r.len; ++(*i))
215  {
216  if(r.str[*i] != ' ' && r.str[*i] != '\r')
217  break;
218  RYML_ASSERT(*i < stop);
219  }
220  C4_UNUSED(stop);
221  }
222  // skip leading whitespace
223  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
224  ;
225  else
226  break;
227  }
228  }
229  return numnl_following;
230 }
231 
232 } // anon namespace
233 
234 
235 //-----------------------------------------------------------------------------
236 //-----------------------------------------------------------------------------
237 //-----------------------------------------------------------------------------
238 
239 template<class EventHandler>
241 {
242  _free();
243  _clr();
244 }
245 
246 template<class EventHandler>
248  : m_options(opts)
249  , m_file()
250  , m_buf()
251  , m_evt_handler(evt_handler)
252  , m_pending_anchors()
253  , m_pending_tags()
254  , m_newline_offsets()
255  , m_newline_offsets_size(0)
256  , m_newline_offsets_capacity(0)
257  , m_newline_offsets_buf()
258 {
259  RYML_CHECK(evt_handler);
260 }
261 
262 template<class EventHandler>
264  : m_options(that.m_options)
265  , m_file(that.m_file)
266  , m_buf(that.m_buf)
267  , m_evt_handler(that.m_evt_handler)
268  , m_pending_anchors(that.m_pending_anchors)
269  , m_pending_tags(that.m_pending_tags)
270  , m_newline_offsets(that.m_newline_offsets)
271  , m_newline_offsets_size(that.m_newline_offsets_size)
272  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
273  , m_newline_offsets_buf(that.m_newline_offsets_buf)
274 {
275  that._clr();
276 }
277 
278 template<class EventHandler>
280  : m_options(that.m_options)
281  , m_file(that.m_file)
282  , m_buf(that.m_buf)
283  , m_evt_handler(that.m_evt_handler)
284  , m_pending_anchors(that.m_pending_anchors)
285  , m_pending_tags(that.m_pending_tags)
286  , m_newline_offsets()
287  , m_newline_offsets_size()
288  , m_newline_offsets_capacity()
289  , m_newline_offsets_buf()
290 {
291  if(that.m_newline_offsets_capacity)
292  {
293  _resize_locations(that.m_newline_offsets_capacity);
294  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
295  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
296  m_newline_offsets_size = that.m_newline_offsets_size;
297  }
298 }
299 
300 template<class EventHandler>
302 {
303  _free();
304  m_options = (that.m_options);
305  m_file = (that.m_file);
306  m_buf = (that.m_buf);
307  m_evt_handler = that.m_evt_handler;
308  m_pending_anchors = that.m_pending_anchors;
309  m_pending_tags = that.m_pending_tags;
310  m_newline_offsets = (that.m_newline_offsets);
311  m_newline_offsets_size = (that.m_newline_offsets_size);
312  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
313  m_newline_offsets_buf = (that.m_newline_offsets_buf);
314  that._clr();
315  return *this;
316 }
317 
318 template<class EventHandler>
320 {
321  _free();
322  m_options = (that.m_options);
323  m_file = (that.m_file);
324  m_buf = (that.m_buf);
325  m_evt_handler = that.m_evt_handler;
326  m_pending_anchors = that.m_pending_anchors;
327  m_pending_tags = that.m_pending_tags;
328  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
329  _resize_locations(that.m_newline_offsets_capacity);
330  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
331  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
332  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
333  m_newline_offsets_size = that.m_newline_offsets_size;
334  m_newline_offsets_buf = that.m_newline_offsets_buf;
335  return *this;
336 }
337 
338 template<class EventHandler>
340 {
341  m_options = {};
342  m_file = {};
343  m_buf = {};
344  m_evt_handler = {};
345  m_pending_anchors = {};
346  m_pending_tags = {};
347  m_newline_offsets = {};
348  m_newline_offsets_size = {};
349  m_newline_offsets_capacity = {};
350  m_newline_offsets_buf = {};
351 }
352 
353 template<class EventHandler>
354 void ParseEngine<EventHandler>::_free()
355 {
356  if(m_newline_offsets)
357  {
358  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
359  m_newline_offsets = nullptr;
360  m_newline_offsets_size = 0u;
361  m_newline_offsets_capacity = 0u;
362  m_newline_offsets_buf = 0u;
363  }
364 }
365 
366 
367 //-----------------------------------------------------------------------------
368 
369 template<class EventHandler>
370 void ParseEngine<EventHandler>::_reset()
371 {
372  m_pending_anchors = {};
373  m_pending_tags = {};
374  if(m_options.locations())
375  {
376  _prepare_locations();
377  }
378  m_was_inside_qmrk = false;
379 }
380 
381 
382 //-----------------------------------------------------------------------------
383 
384 template<class EventHandler>
385 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
386 {
387  #define _ryml_relocate(s) \
388  if(s.is_sub(prev_arena)) \
389  { \
390  s.str = next_arena.str + (s.str - prev_arena.str); \
391  }
392  _ryml_relocate(m_buf);
393  _ryml_relocate(m_newline_offsets_buf);
394  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
395  _ryml_relocate(m_pending_tags.annotations[i].str);
396  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
397  _ryml_relocate(m_pending_anchors.annotations[i].str);
398  #undef _ryml_relocate
399 }
400 
401 template<class EventHandler>
402 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
403 {
404  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
405 }
406 
407 
408 //-----------------------------------------------------------------------------
409 
410 template<class EventHandler>
411 template<class DumpFn>
412 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
413 {
414  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
415  auto const& lc = st->line_contents;
416  csubstr contents = lc.stripped;
417  if(contents.len)
418  {
419  // print the yaml src line
420  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
421  if(m_file.len)
422  {
423  detail::_dump(dumpfn, "{}:", m_file);
424  offs += m_file.len + 1;
425  }
426  detail::_dump(dumpfn, "{}:{}: ", st->pos.line, st->pos.col);
427  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
428  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
429  detail::_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
430  // highlight the remaining portion of the previous line
431  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
432  size_t lastcol = firstcol + lc.rem.len;
433  for(size_t i = 0; i < offs + firstcol; ++i)
434  dumpfn(" ");
435  dumpfn("^");
436  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
437  dumpfn("~");
438  detail::_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
439  }
440  else
441  {
442  dumpfn("\n");
443  }
444 
445 #ifdef RYML_DBG
446  // next line: print the state flags
447  {
448  char flagbuf_[128];
449  detail::_dump(dumpfn, "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
450  }
451 #endif
452 }
453 
454 
455 //-----------------------------------------------------------------------------
456 
457 template<class EventHandler>
458 template<class ...Args>
459 void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
460 {
461  char errmsg[RYML_ERRMSG_SIZE];
462  detail::_SubstrWriter writer(errmsg);
463  auto dumpfn = [&writer](csubstr s){ writer.append(s); };
464  detail::_dump(dumpfn, fmt, args...);
465  writer.append('\n');
466  _fmt_msg(dumpfn);
467  size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
468  m_evt_handler->cancel_parse();
469  m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
470 }
471 
472 
473 //-----------------------------------------------------------------------------
474 #ifdef RYML_DBG
475 template<class EventHandler>
476 template<class ...Args>
477 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
478 {
479  if(_dbg_enabled())
480  {
481  auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
482  detail::_dump(dumpfn, fmt, args...);
483  dumpfn("\n");
484  _fmt_msg(dumpfn);
485  }
486 }
487 #endif
488 
489 
490 //-----------------------------------------------------------------------------
491 template<class EventHandler>
492 bool ParseEngine<EventHandler>::_finished_file() const
493 {
494  bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
495  if(ret)
496  {
497  _c4dbgp("finished file!!!");
498  }
499  return ret;
500 }
501 
502 template<class EventHandler>
503 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
504 {
505  return m_evt_handler->m_curr->line_contents.rem.empty();
506 }
507 
508 
509 //-----------------------------------------------------------------------------
510 
511 template<class EventHandler>
512 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
513 {
514  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
515  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
516  {
517  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
518  if(pos == npos)
519  pos = rem.len; // maybe the line is just all whitespace
520  _c4dbgpf("skip {} whitespace characters", pos);
521  _line_progressed(pos);
522  }
523 }
524 
525 template<class EventHandler>
526 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
527 {
528  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
529  if(rem.len && rem.str[0] == c)
530  {
531  size_t pos = rem.first_not_of(c);
532  if(pos == npos)
533  pos = rem.len; // maybe the line is just all c
534  _c4dbgpf("skip {}x'{}'", pos, c);
535  _line_progressed(pos);
536  }
537 }
538 
539 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
540 template<class EventHandler>
541 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
542 {
543  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
544  if(rem.len && rem.str[0] == c)
545  {
546  size_t pos = rem.first_not_of(c);
547  if(pos == npos)
548  pos = rem.len; // maybe the line is just all c
549  if(pos > max_to_skip)
550  pos = max_to_skip;
551  _c4dbgpf("skip {}x'{}'", pos, c);
552  _line_progressed(pos);
553  }
554 }
555 #endif
556 
557 template<class EventHandler>
558 template<size_t N>
559 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
560 {
561  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
562  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
563  if(pos == npos)
564  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
565  _c4dbgpf("skip {} characters", pos);
566  _line_progressed(pos);
567 }
568 
569 template<class EventHandler>
570 void ParseEngine<EventHandler>::_skip_comment()
571 {
572  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
573  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
574  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
575  csubstr full = m_evt_handler->m_curr->line_contents.full;
576  // raise an error if the comment is not preceded by whitespace
577  if(!full.begins_with('#'))
578  {
579  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
580  const char c = full[(size_t)(rem.str - full.str - 1)];
581  if(C4_UNLIKELY(c != ' ' && c != '\t'))
582  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
583  }
584  else
585  {
586  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
587  }
588  _c4dbgpf("comment was '{}'", rem);
589  _line_progressed(rem.len);
590 }
591 
592 template<class EventHandler>
593 void ParseEngine<EventHandler>::_maybe_skip_comment()
594 {
595  csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
596  if(s.begins_with('#'))
597  {
598  _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
599  _skip_comment();
600  }
601 }
602 
603 template<class EventHandler>
604 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
605 {
606  if(m_evt_handler->m_curr->line_contents.rem.len)
607  {
608  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
609  {
610  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
611  if(pos == npos)
612  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
613  _c4dbgpf("skip {}x'{}'", pos, ' ');
614  _line_progressed(pos);
615  }
616  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
617  {
618  _c4dbgp("found ':' colon next");
619  _line_progressed(1);
620  return true;
621  }
622  }
623  return false;
624 }
625 
626 template<class EventHandler>
627 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
628 {
629  if(m_evt_handler->m_curr->line_contents.rem.len)
630  {
631  if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
632  {
633  size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
634  if(pos == npos)
635  pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
636  _c4dbgpf("skip {}x'{}'", pos, ' ');
637  _line_progressed(pos);
638  }
639  if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
640  {
641  _c4dbgp("found ',' comma next");
642  _line_progressed(1);
643  return true;
644  }
645  }
646  return false;
647 }
648 
649 
650 //-----------------------------------------------------------------------------
651 
652 template<class EventHandler>
653 csubstr ParseEngine<EventHandler>::_scan_anchor()
654 {
655  csubstr s = m_evt_handler->m_curr->line_contents.rem;
656  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
657  csubstr anchor = s.range(1, s.first_of(' '));
658  _line_progressed(1u + anchor.len);
659  _maybe_skipchars(' ');
660  return anchor;
661 }
662 
663 template<class EventHandler>
664 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
665 {
666  csubstr s = m_evt_handler->m_curr->line_contents.rem;
667  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
668  csubstr ref = s.first(s.first_of(",] :"));
669  _line_progressed(ref.len);
670  return ref;
671 }
672 
673 template<class EventHandler>
674 csubstr ParseEngine<EventHandler>::_scan_ref_map()
675 {
676  csubstr s = m_evt_handler->m_curr->line_contents.rem;
677  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
678  csubstr ref = s.first(s.first_of(",} "));
679  _line_progressed(ref.len);
680  return ref;
681 }
682 
683 template<class EventHandler>
684 csubstr ParseEngine<EventHandler>::_scan_tag()
685 {
686  csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
687  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
688  csubstr t;
689  if(rem.begins_with("!!"))
690  {
691  _c4dbgp("begins with '!!'");
692  if(has_any(FLOW))
693  t = rem.left_of(rem.first_of(" ,"));
694  else
695  t = rem.left_of(rem.first_of(' '));
696  }
697  else if(rem.begins_with("!<"))
698  {
699  _c4dbgp("begins with '!<'");
700  t = rem.left_of(rem.first_of('>'), true);
701  }
702  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
703  else if(rem.begins_with("!h!"))
704  {
705  _c4dbgp("begins with '!h!'");
706  t = rem.left_of(rem.first_of(' '));
707  }
708  #endif
709  else
710  {
711  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
712  _c4dbgp("begins with '!'");
713  if(has_any(FLOW))
714  t = rem.left_of(rem.first_of(" ,"));
715  else
716  t = rem.left_of(rem.first_of(' '));
717  }
718  _line_progressed(t.len);
719  _maybe_skip_whitespace_tokens();
720  return t;
721 }
722 
723 
724 //-----------------------------------------------------------------------------
725 
726 template<class EventHandler>
727 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
728 {
729  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
730 
731  // it's not a scalar if it starts with any of these characters:
732  switch(s.str[0])
733  {
734  // these are all legal tokens which mean no scalar is starting:
735  case '[':
736  case ']':
737  case '{':
738  case '}':
739  case '!':
740  case '&':
741  case '*':
742  case '|':
743  case '>':
744  case '#':
745  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
746  return false;
747  // '-' and ':' are illegal at the beginning if not followed by a scalar character
748  case '-':
749  case ':':
750  if(s.len > 1)
751  {
752  switch(s.str[1])
753  {
754  case '\n':
755  case '\r':
756  case '{':
757  case '[':
758  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
759  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
760  break;
761  case ' ':
762  case '}':
763  case ']':
764  if(s.str[0] == ':')
765  {
766  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
767  return false;
768  }
769  break;
770  default:
771  break;
772  }
773  }
774  else
775  {
776  return false;
777  }
778  break;
779  case '?':
780  if(s.len > 1)
781  {
782  switch(s.str[1])
783  {
784  case ' ':
785  case '\n':
786  case '\r':
787  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
788  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
789  return false;
790  case '{':
791  case '}':
792  case '[':
793  case ']':
794  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
795  break;
796  default:
797  break;
798  }
799  }
800  else
801  {
802  return false;
803  }
804  break;
805  // everything else is a legal starting character
806  default:
807  break;
808  }
809 
810  return true;
811 }
812 
813 template<class EventHandler>
814 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
815 {
816  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
817  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
818  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
819  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
820  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
821 
822  substr s = m_evt_handler->m_curr->line_contents.rem;
823  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
824  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
825 
826  if(!s.len)
827  return false;
828 
829  if(!_is_valid_start_scalar_plain_flow(s))
830  return false;
831 
832  _c4dbgp("scanning seqflow scalar...");
833 
834  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
835  bool needs_filter = false;
836  while(true)
837  {
838  _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
839  for(size_t i = 0; i < s.len; ++i)
840  {
841  const char c = s.str[i];
842  switch(c)
843  {
844  case ',':
845  _c4dbgpf("found terminating character at {}: '{}'", i, c);
846  _line_progressed(i);
847  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
848  {
849  goto ended_scalar;
850  }
851  else
852  {
853  _c4dbgp("at the beginning. no scalar here.");
854  return false;
855  }
856  break;
857  case ']':
858  _c4dbgpf("found terminating character at {}: '{}'", i, c);
859  _line_progressed(i);
860  goto ended_scalar;
861  break;
862  case '#':
863  _c4dbgp("found suspicious '#'");
864  if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
865  {
866  _c4dbgpf("found terminating character at {}: '{}'", i, c);
867  _line_progressed(i);
868  goto ended_scalar;
869  }
870  break;
871  case ':':
872  _c4dbgp("found suspicious ':'");
873  if(s.len > i+1)
874  {
875  const char next = s.str[i+1];
876  _c4dbgpf("next char is '{}'", _c4prc(next));
877  if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
878  {
879  _c4dbgp("map starting!");
880  if(m_evt_handler->m_curr->pos.offset + i > start_offset)
881  {
882  _c4dbgp("scalar finished!");
883  _line_progressed(i);
884  goto ended_scalar;
885  }
886  else
887  {
888  _c4dbgp("at the beginning. no scalar here.");
889  return false;
890  }
891  }
892  else
893  {
894  _c4dbgp("it's a scalar indeed.");
895  ++i; // skip the next char
896  }
897  }
898  else if(s.len == i+1)
899  {
900  _c4dbgp("':' at line end. map starting!");
901  return false;
902  }
903  break;
904  case '[':
905  case '{':
906  case '}':
907  _line_progressed(i);
908  _c4err("invalid character: '{}'", c); // noreturn
909  default:
910  ;
911  }
912  }
913  _line_progressed(s.len);
914  if(!_finished_file())
915  {
916  _c4dbgp("next line!");
917  _line_ended();
918  _scan_line();
919  }
920  else
921  {
922  _c4dbgp("file finished!");
923  goto ended_scalar;
924  }
925  s = m_evt_handler->m_curr->line_contents.rem;
926  needs_filter = true;
927  }
928 
929 ended_scalar:
930 
931  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
932  sc->needs_filter = needs_filter;
933 
934  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
935 
936  return true;
937 }
938 
939 template<class EventHandler>
940 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
941 {
942  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
943  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
944  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
945  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
946  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
947 
948  substr s = m_evt_handler->m_curr->line_contents.rem;
949  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
950 
951  if(!s.len)
952  return false;
953 
954  if(!_is_valid_start_scalar_plain_flow(s))
955  return false;
956 
957  _c4dbgp("scanning scalar...");
958 
959  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
960  bool needs_filter = false;
961  while(true)
962  {
963  for(size_t i = 0; i < s.len; ++i)
964  {
965  const char c = s.str[i];
966  switch(c)
967  {
968  case ',':
969  case '}':
970  _line_progressed(i);
971  _c4dbgpf("found terminating character: '{}'", c);
972  goto ended_scalar;
973  case ':':
974  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
975  {
976  _line_progressed(i);
977  _c4dbgpf("found terminating character: '{}'", c);
978  goto ended_scalar;
979  }
980  break;
981  case '{':
982  case '[':
983  _line_progressed(i);
984  _c4err("invalid character: '{}'", c); // noreturn
985  break;
986  case ']':
987  _line_progressed(i);
988  if(has_any(RSEQIMAP))
989  goto ended_scalar;
990  else
991  _c4err("invalid character: '{}'", c); // noreturn
992  break;
993  case '#':
994  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
995  {
996  _line_progressed(i);
997  _c4dbgpf("found terminating character: '{}'", c);
998  goto ended_scalar;
999  }
1000  break;
1001  default:
1002  ;
1003  }
1004  }
1005  _c4dbgp("next line!");
1006  _line_progressed(s.len);
1007  if(!_finished_file())
1008  {
1009  _c4dbgp("next line!");
1010  _line_ended();
1011  _scan_line();
1012  }
1013  else
1014  {
1015  _c4dbgp("file finished!");
1016  goto ended_scalar;
1017  }
1018  s = m_evt_handler->m_curr->line_contents.rem;
1019  needs_filter = true;
1020  }
1021 
1022 ended_scalar:
1023 
1024  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1025  sc->needs_filter = needs_filter;
1026 
1027  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1028 
1029  return true;
1030 }
1031 
1032 template<class EventHandler>
1033 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1034 {
1035  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1036  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1037  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1038  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1039 
1040  substr s = m_evt_handler->m_curr->line_contents.rem;
1041  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1042 
1043  if(!s.len)
1044  return false;
1045 
1046  _c4dbgp("scanning scalar...");
1047 
1048  switch(s.str[0])
1049  {
1050  case ']':
1051  case '{':
1052  case ',':
1053  _c4dbgp("not a scalar.");
1054  return false;
1055  }
1056 
1057  {
1058  const size_t len = _is_special_json_scalar(s);
1059  if(len)
1060  {
1061  sc->scalar = s.first(len);
1062  sc->needs_filter = false;
1063  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1064  _line_progressed(len);
1065  return true;
1066  }
1067  }
1068 
1069  // must be a number
1070  size_t i = 0;
1071  for( ; i < s.len; ++i)
1072  {
1073  const char c = s.str[i];
1074  switch(c)
1075  {
1076  case ',':
1077  case ']':
1078  case ' ':
1079  case '\t':
1080  _c4dbgpf("found terminating character: '{}'", c);
1081  goto ended_scalar;
1082  case '#':
1083  if(!i || s.str[i-1] == ' ')
1084  {
1085  _c4dbgpf("found terminating character: '{}'", c);
1086  goto ended_scalar;
1087  }
1088  break;
1089  default:
1090  ;
1091  }
1092  }
1093 
1094 ended_scalar:
1095 
1096  if(C4_LIKELY(i > 0))
1097  {
1098  _line_progressed(i);
1099  sc->scalar = s.first(i);
1100  sc->needs_filter = false;
1101  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1102  return true;
1103  }
1104 
1105  return false;
1106 }
1107 
1108 template<class EventHandler>
1109 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1110 {
1111  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1112  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1113  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1114  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1115  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1116 
1117  substr s = m_evt_handler->m_curr->line_contents.rem;
1118  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1119 
1120  if(!s.len)
1121  return false;
1122 
1123  _c4dbgp("scanning scalar...");
1124 
1125  {
1126  const size_t len = _is_special_json_scalar(s);
1127  if(len)
1128  {
1129  sc->scalar = s.first(len);
1130  sc->needs_filter = false;
1131  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1132  _line_progressed(len);
1133  return true;
1134  }
1135  }
1136 
1137  // must be a number
1138  size_t i = 0;
1139  for( ; i < s.len; ++i)
1140  {
1141  const char c = s.str[i];
1142  switch(c)
1143  {
1144  case ',':
1145  case '}':
1146  case ' ':
1147  case '\t':
1148  _c4dbgpf("found terminating character: '{}'", c);
1149  goto ended_scalar;
1150  case '#':
1151  if(!i || s.str[i-1] == ' ')
1152  {
1153  _c4dbgpf("found terminating character: '{}'", c);
1154  goto ended_scalar;
1155  }
1156  break;
1157  default:
1158  ;
1159  }
1160  }
1161 
1162 ended_scalar:
1163 
1164  if(C4_LIKELY(i > 0))
1165  {
1166  _line_progressed(i);
1167  sc->scalar = s.first(i);
1168  sc->needs_filter = false;
1169  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1170  return true;
1171  }
1172 
1173  return false;
1174 }
1175 
1176 template<class EventHandler>
1177 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1178 {
1179  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1180  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1181 }
1182 
1183 template<class EventHandler>
1184 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1185 {
1186  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1187  return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1188 }
1189 
1190 template<class EventHandler>
1191 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1192 {
1193  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1194  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1195  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
1196 
1197  substr s = m_evt_handler->m_curr->line_contents.rem;
1198  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1199 
1200  if(!s.len)
1201  return false;
1202 
1203  switch(s.str[0])
1204  {
1205  case '-':
1206  if(_is_blck_token(s))
1207  {
1208  return false;
1209  }
1210  else if(_is_doc_begin(s))
1211  {
1212  _c4dbgp("token is doc start");
1213  return false;
1214  }
1215  break;
1216  case ':':
1217  case '?':
1218  if(_is_blck_token(s))
1219  return false;
1220  break;
1221  case '[':
1222  case '{':
1223  case '&':
1224  case '*':
1225  case '!':
1226  _RYML_WITH_TAB_TOKENS(case '\t':)
1227  return false;
1228  case '.':
1229  if(_is_doc_end(s))
1230  {
1231  _c4dbgp("token is doc end");
1232  return false;
1233  }
1234  break;
1235  }
1236 
1237  _c4dbgpf("plain scalar! indentation={}", indentation);
1238 
1239  const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1240  const size_t start_line = m_evt_handler->m_curr->pos.line;
1241 
1242  bool needs_filter = false;
1243  while(true)
1244  {
1245  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1246  for(size_t i = 0; i < s.len; ++i)
1247  {
1248  const char curr = s.str[i];
1249  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1250  switch(curr)
1251  {
1252  case ':':
1253  _c4dbgpf("[{}]: got suspicious ':'", i);
1254  // are there more characters?
1255  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1256  {
1257  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1258  _line_progressed(i);
1259  // ': ' is accepted only on the first line
1260  if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1261  {
1262  _c4dbgp("start line. scalar ends here");
1263  goto ended_scalar;
1264  }
1265  else
1266  {
1267  _c4err("parse error");
1268  }
1269  }
1270  else
1271  {
1272  size_t j = i;
1273  while(j + 1 < s.len && s.str[j+1] == ':')
1274  {
1275  _c4dbgp("skip colon");
1276  ++j;
1277  }
1278  i = j > i ? j-1 : i;
1279  _c4dbgp("nothing to see here");
1280  }
1281  break;
1282  case '#':
1283  _c4dbgp("got suspicious '#'");
1284  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1285  {
1286  _c4dbgp("comment! scalar ends here");
1287  _line_progressed(i);
1288  goto ended_scalar;
1289  }
1290  else
1291  {
1292  _c4dbgp("nothing to see here");
1293  }
1294  break;
1295  }
1296  }
1297  _line_progressed(s.len);
1298  csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1299  next_peeked = next_peeked.trimr("\n\r");
1300  const size_t next_indentation = next_peeked.first_not_of(' ');
1301  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1302  if(next_indentation < indentation)
1303  {
1304  _c4dbgp("smaller indentation! scalar ended");
1305  goto ended_scalar;
1306  }
1307  else if(next_indentation == 0 && next_peeked.len > 0)
1308  {
1309  const char first = next_peeked.str[0];
1310  switch(first)
1311  {
1312  case '-':
1313  next_peeked = next_peeked.trimr("\n\r");
1314  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1315  if(_is_doc_begin_token(next_peeked))
1316  {
1317  _c4dbgp("doc begin! scalar ended");
1318  goto ended_scalar;
1319  }
1320  break;
1321  case '.':
1322  next_peeked = next_peeked.trimr("\n\r");
1323  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1324  if(_is_doc_end_token(next_peeked))
1325  {
1326  _c4dbgp("doc end! scalar ended");
1327  goto ended_scalar;
1328  }
1329  break;
1330  }
1331  }
1332  // load with next line
1333  _c4dbgp("next line!");
1334  if(!_finished_file())
1335  {
1336  _c4dbgp("next line!");
1337  _line_ended();
1338  _scan_line();
1339  }
1340  else
1341  {
1342  _c4dbgp("file finished!");
1343  goto ended_scalar;
1344  }
1345  s = m_evt_handler->m_curr->line_contents.rem;
1346  needs_filter = true;
1347  }
1348 
1349 ended_scalar:
1350 
1351  sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1352  sc->needs_filter = needs_filter;
1353 
1354  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1355 
1356  return true;
1357 }
1358 
1359 template<class EventHandler>
1360 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1361 {
1362  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1363  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1364  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1365  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1366  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1367  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1368  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1369 }
1370 
1371 template<class EventHandler>
1372 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1373 {
1374  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1375  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1376  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1377  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1378  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1379  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1380 }
1381 
1382 template<class EventHandler>
1383 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1384 {
1385  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1386  return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1387 }
1388 
1389 
1390 //-----------------------------------------------------------------------------
1391 
1392 template<class EventHandler>
1393 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1394 {
1395  substr rem{}; // declare here because of the goto
1396  size_t nlpos{}; // declare here because of the goto
1397  pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1398  if(pos >= m_buf.len)
1399  goto next_is_empty;
1400 
1401  // look for the next newline chars, and jump to the right of those
1402  rem = from_next_line(m_buf.sub(pos));
1403  if(rem.empty())
1404  goto next_is_empty;
1405 
1406  // now get everything up to and including the following newline chars
1407  nlpos = rem.first_of("\r\n");
1408  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1409  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1410  rem = rem.left_of(nlpos, /*include_pos*/true);
1411 
1412  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1413  return rem;
1414 
1415 next_is_empty:
1416  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1417  return {};
1418 }
1419 
1420 //-----------------------------------------------------------------------------
1421 
1422 template<class EventHandler>
1423 void ParseEngine<EventHandler>::_scan_line()
1424 {
1425  if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1426  m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1427  else
1428  m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1429 }
1430 
1431 template<class EventHandler>
1432 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1433 {
1434  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1435  m_evt_handler->m_curr->pos.offset += ahead;
1436  m_evt_handler->m_curr->pos.col += ahead;
1437  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1438  m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1439 }
1440 
1441 template<class EventHandler>
1442 void ParseEngine<EventHandler>::_line_ended()
1443 {
1444  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1445  m_evt_handler->m_curr->pos.line,
1446  m_evt_handler->m_curr->line_contents.full.len,
1447  m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1448  m_evt_handler->m_curr->pos.col, 1);
1449  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1450  m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1451  ++m_evt_handler->m_curr->pos.line;
1452  m_evt_handler->m_curr->pos.col = 1;
1453 }
1454 
1455 template<class EventHandler>
1456 void ParseEngine<EventHandler>::_line_ended_undo()
1457 {
1458  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1459  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1460  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1461  const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1462  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1463  m_evt_handler->m_curr->pos.offset -= delta;
1464  --m_evt_handler->m_curr->pos.line;
1465  m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1466  // don't forget to undo also the changes to the remainder of the line
1467  //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
1468  m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1469 }
1470 
1471 
1472 //-----------------------------------------------------------------------------
1473 template<class EventHandler>
1474 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1475 {
1476  m_evt_handler->m_curr->indref = indentation;
1477  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1478 }
1479 
1480 template<class EventHandler>
1481 void ParseEngine<EventHandler>::_save_indentation()
1482 {
1483  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1484  m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1485  _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1486 }
1487 
1488 
1489 //-----------------------------------------------------------------------------
1490 
1491 template<class EventHandler>
1492 void ParseEngine<EventHandler>::_end_map_blck()
1493 {
1494  _c4dbgp("mapblck: end");
1495  if(has_any(RKCL|RVAL))
1496  {
1497  _c4dbgp("mapblck: set missing val");
1498  _handle_annotations_before_blck_val_scalar();
1499  m_evt_handler->set_val_scalar_plain({});
1500  }
1501  else if(has_any(QMRK))
1502  {
1503  _c4dbgp("mapblck: set missing keyval");
1504  _handle_annotations_before_blck_key_scalar();
1505  m_evt_handler->set_key_scalar_plain({});
1506  _handle_annotations_before_blck_val_scalar();
1507  m_evt_handler->set_val_scalar_plain({});
1508  }
1509  m_evt_handler->end_map();
1510 }
1511 
1512 template<class EventHandler>
1513 void ParseEngine<EventHandler>::_end_seq_blck()
1514 {
1515  if(has_any(RVAL))
1516  {
1517  _c4dbgp("seqblck: set missing val");
1518  _handle_annotations_before_blck_val_scalar();
1519  m_evt_handler->set_val_scalar_plain({});
1520  }
1521  m_evt_handler->end_seq();
1522 }
1523 
1524 template<class EventHandler>
1525 void ParseEngine<EventHandler>::_end2_map()
1526 {
1527  _c4dbgp("map: end");
1528  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1529  if(has_any(BLCK))
1530  {
1531  _end_map_blck();
1532  }
1533  else
1534  {
1535  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1536  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1537  m_evt_handler->_pop();
1538  }
1539 }
1540 
1541 template<class EventHandler>
1542 void ParseEngine<EventHandler>::_end2_seq()
1543 {
1544  _c4dbgp("seq: end");
1545  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1546  if(has_any(BLCK))
1547  {
1548  _end_seq_blck();
1549  }
1550  else
1551  {
1552  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1553  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1554  m_evt_handler->_pop();
1555  }
1556 }
1557 
1558 template<class EventHandler>
1559 void ParseEngine<EventHandler>::_begin2_doc()
1560 {
1561  m_doc_empty = true;
1562  add_flags(RDOC);
1563  m_evt_handler->begin_doc();
1564  m_evt_handler->m_curr->indref = 0; // ?
1565 }
1566 
1567 template<class EventHandler>
1568 void ParseEngine<EventHandler>::_begin2_doc_expl()
1569 {
1570  m_doc_empty = true;
1571  add_flags(RDOC);
1572  m_evt_handler->begin_doc_expl();
1573  m_evt_handler->m_curr->indref = 0; // ?
1574 }
1575 
1576 template<class EventHandler>
1577 void ParseEngine<EventHandler>::_end2_doc()
1578 {
1579  _c4dbgp("doc: end");
1580  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1581  if(m_doc_empty)
1582  {
1583  _c4dbgp("doc was empty; add empty val");
1584  m_evt_handler->set_val_scalar_plain({});
1585  }
1586  m_evt_handler->end_doc();
1587 }
1588 
1589 template<class EventHandler>
1590 void ParseEngine<EventHandler>::_end2_doc_expl()
1591 {
1592  _c4dbgp("doc: end");
1593  if(m_doc_empty)
1594  {
1595  _c4dbgp("doc: no children; add empty val");
1596  m_evt_handler->set_val_scalar_plain({});
1597  }
1598  m_evt_handler->end_doc_expl();
1599 }
1600 
1601 template<class EventHandler>
1602 void ParseEngine<EventHandler>::_maybe_begin_doc()
1603 {
1604  if(has_none(RDOC))
1605  {
1606  _c4dbgp("doc must be started");
1607  _begin2_doc();
1608  }
1609 }
1610 template<class EventHandler>
1611 void ParseEngine<EventHandler>::_maybe_end_doc()
1612 {
1613  if(has_any(RDOC))
1614  {
1615  _c4dbgp("doc must be finished");
1616  _end2_doc();
1617  }
1618 }
1619 
1620 template<class EventHandler>
1621 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1622 {
1623  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1624  if(m_evt_handler->m_stack[0].flags & RDOC)
1625  {
1626  _c4dbgp("root is RDOC");
1627  if(m_evt_handler->m_curr->level != 0)
1628  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1629  }
1630  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1631  {
1632  _c4dbgp("root is STREAM");
1633  if(m_evt_handler->m_curr->level != 1)
1634  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1635  }
1636  else
1637  {
1638  _c4err("internal error");
1639  }
1640  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1641 }
1642 
1643 template<class EventHandler>
1644 void ParseEngine<EventHandler>::_end_doc_suddenly()
1645 {
1646  _c4dbgp("end doc suddenly");
1647  _end_doc_suddenly__pop();
1648  _end2_doc_expl();
1649  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1650 }
1651 
1652 template<class EventHandler>
1653 void ParseEngine<EventHandler>::_start_doc_suddenly()
1654 {
1655  _c4dbgp("start doc suddenly");
1656  _end_doc_suddenly__pop();
1657  _end2_doc();
1658  _begin2_doc_expl();
1659 }
1660 
1661 template<class EventHandler>
1662 void ParseEngine<EventHandler>::_end_stream()
1663 {
1664  _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1665  if(has_all(RSEQ|FLOW))
1666  _c4err("missing terminating ]");
1667  else if(has_all(RMAP|FLOW))
1668  _c4err("missing terminating }");
1669  if(m_evt_handler->m_stack.size() > 1)
1670  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1671  if(has_all(RDOC))
1672  {
1673  _end2_doc();
1674  }
1675  else if(has_all(RTOP|RUNK))
1676  {
1677  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1678  {
1679  if(m_doc_empty)
1680  {
1681  m_evt_handler->begin_doc();
1682  _handle_annotations_before_blck_val_scalar();
1683  m_evt_handler->set_val_scalar_plain({});
1684  m_evt_handler->end_doc();
1685  }
1686  }
1687  }
1688  m_evt_handler->end_stream();
1689 }
1690 
1691 
1692 template<class EventHandler>
1693 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1694 {
1695  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1696  while(m_evt_handler->m_curr != popto)
1697  {
1698  if(has_any(RSEQ))
1699  {
1700  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1701  _end2_seq();
1702  }
1703  else if(has_any(RMAP))
1704  {
1705  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1706  _end2_map();
1707  }
1708  else
1709  {
1710  break;
1711  }
1712  }
1713  _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1714 }
1715 
1716 template<class EventHandler>
1717 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1718 {
1719  // search the stack frame to jump to based on its indentation
1720  using state_type = typename EventHandler::state;
1721  state_type const* popto = nullptr;
1722  auto &stack = m_evt_handler->m_stack;
1723  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1724  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1725  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1726  #ifdef RYML_DBG
1727  if(_dbg_enabled())
1728  {
1729  char flagbuf_[128];
1730  for(state_type const& s : stack)
1731  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1732  }
1733  #endif
1734  for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1735  {
1736  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1737  if(s->indref == ind)
1738  {
1739  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1740  popto = s;
1741  break;
1742  }
1743  }
1744  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1745  {
1746  _c4err("parse error: incorrect indentation?");
1747  }
1748  _handle_indentation_pop(popto);
1749 }
1750 
1751 template<class EventHandler>
1752 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1753 {
1754  // search the stack frame to jump to based on its indentation
1755  using state_type = typename EventHandler::state;
1756  auto &stack = m_evt_handler->m_stack;
1757  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1758  _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1759  const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1760  state_type const* popto = nullptr;
1761  #ifdef RYML_DBG
1762  char flagbuf_[128];
1763  if(_dbg_enabled())
1764  {
1765  for(state_type const& s : stack)
1766  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1767  }
1768  #endif
1769  for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1770  {
1771  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1772  if(s->indref < ind)
1773  {
1774  break;
1775  }
1776  else if(s->indref == ind)
1777  {
1778  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1779  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1780  {
1781  break;
1782  }
1783  popto = s;
1784  if(has_all(RSEQ|BLCK, s))
1785  {
1786  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1787  const size_t first = rem.first_not_of(' ');
1788  _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
1789  rem = rem.right_of(first, true);
1790  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1791  if(rem.begins_with('-') && _is_blck_token(rem))
1792  {
1793  _c4dbgp("parent was indentless seq");
1794  break;
1795  }
1796  }
1797  }
1798  }
1799  if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1800  {
1801  _c4err("parse error: incorrect indentation?");
1802  }
1803  _handle_indentation_pop(popto);
1804 }
1805 
1806 
1807 //-----------------------------------------------------------------------------
1808 template<class EventHandler>
1809 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1810 {
1811  // quoted scalars can spread over multiple lines!
1812  // nice explanation here: http://yaml-multiline.info/
1813 
1814  // a span to the end of the file
1815  size_t b = m_evt_handler->m_curr->pos.offset;
1816  substr s = m_buf.sub(b);
1817  if(s.begins_with(' '))
1818  {
1819  s = s.triml(' ');
1820  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1821  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1822  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1823  }
1824  b = m_evt_handler->m_curr->pos.offset; // take this into account
1825  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1826 
1827  // skip the opening quote
1828  _line_progressed(1);
1829  s = s.sub(1);
1830 
1831  bool needs_filter = false;
1832 
1833  size_t numlines = 1; // we already have one line
1834  size_t pos = npos; // find the pos of the matching quote
1835  while( ! _finished_file())
1836  {
1837  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1838  bool line_is_blank = true;
1839  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1840  for(size_t i = 0; i < line.len; ++i)
1841  {
1842  const char curr = line.str[i];
1843  if(curr == '\'') // single quotes are escaped with two single quotes
1844  {
1845  const char next = i+1 < line.len ? line.str[i+1] : '~';
1846  if(next != '\'') // so just look for the first quote
1847  { // without another after it
1848  pos = i;
1849  break;
1850  }
1851  else
1852  {
1853  needs_filter = true; // needs filter to remove escaped quotes
1854  ++i; // skip the escaped quote
1855  }
1856  }
1857  else if(curr != ' ')
1858  {
1859  line_is_blank = false;
1860  }
1861  }
1862 
1863  // leading whitespace also needs filtering
1864  needs_filter = needs_filter
1865  || (numlines > 1)
1866  || line_is_blank
1867  || (_at_line_begin() && line.begins_with(' '));
1868 
1869  if(pos == npos)
1870  {
1871  _line_progressed(line.len);
1872  ++numlines;
1873  }
1874  else
1875  {
1876  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1877  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
1878  _line_progressed(pos + 1); // progress beyond the quote
1879  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1880  break;
1881  }
1882 
1883  _line_ended();
1884  _scan_line();
1885  }
1886 
1887  if(pos == npos)
1888  {
1889  _c4err("reached end of file while looking for closing quote");
1890  }
1891  else
1892  {
1893  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1894  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1895  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
1896  s = s.sub(0, pos-1);
1897  }
1898 
1899  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
1900 
1901  return ScannedScalar { s, needs_filter };
1902 }
1903 
1904 
1905 //-----------------------------------------------------------------------------
1906 template<class EventHandler>
1907 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1908 {
1909  // quoted scalars can spread over multiple lines!
1910  // nice explanation here: http://yaml-multiline.info/
1911 
1912  // a span to the end of the file
1913  size_t b = m_evt_handler->m_curr->pos.offset;
1914  substr s = m_buf.sub(b);
1915  if(s.begins_with(' '))
1916  {
1917  s = s.triml(' ');
1918  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1919  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1920  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1921  }
1922  b = m_evt_handler->m_curr->pos.offset; // take this into account
1923  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
1924 
1925  // skip the opening quote
1926  _line_progressed(1);
1927  s = s.sub(1);
1928 
1929  bool needs_filter = false;
1930 
1931  size_t numlines = 1; // we already have one line
1932  size_t pos = npos; // find the pos of the matching quote
1933  while( ! _finished_file())
1934  {
1935  const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1936  bool line_is_blank = true;
1937  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1938  for(size_t i = 0; i < line.len; ++i)
1939  {
1940  const char curr = line.str[i];
1941  if(curr != ' ')
1942  line_is_blank = false;
1943  // every \ is an escape
1944  if(curr == '\\')
1945  {
1946  const char next = i+1 < line.len ? line.str[i+1] : '~';
1947  needs_filter = true;
1948  if(next == '"' || next == '\\')
1949  ++i;
1950  }
1951  else if(curr == '"')
1952  {
1953  pos = i;
1954  break;
1955  }
1956  }
1957 
1958  // leading whitespace also needs filtering
1959  needs_filter = needs_filter
1960  || (numlines > 1)
1961  || line_is_blank
1962  || (_at_line_begin() && line.begins_with(' '));
1963 
1964  if(pos == npos)
1965  {
1966  _line_progressed(line.len);
1967  ++numlines;
1968  }
1969  else
1970  {
1971  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1972  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"');
1973  _line_progressed(pos + 1); // progress beyond the quote
1974  pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
1975  break;
1976  }
1977 
1978  _line_ended();
1979  _scan_line();
1980  }
1981 
1982  if(pos == npos)
1983  {
1984  _c4err("reached end of file looking for closing quote");
1985  }
1986  else
1987  {
1988  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1989  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
1990  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1991  s = s.sub(0, pos-1);
1992  }
1993 
1994  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
1995 
1996  return ScannedScalar { s, needs_filter };
1997 }
1998 
1999 
2000 //-----------------------------------------------------------------------------
2001 template<class EventHandler>
2002 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2003 {
2004  _c4dbgpf("blck: indref={}", indref);
2005  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
2006 
2007  // nice explanation here: http://yaml-multiline.info/
2008  csubstr s = m_evt_handler->m_curr->line_contents.rem;
2009  csubstr trimmed = s.triml(' ');
2010  if(trimmed.str > s.str)
2011  {
2012  _c4dbgp("skipping whitespace");
2013  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2014  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2015  s = trimmed;
2016  }
2017  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2018 
2019  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2020 
2021  // parse the spec
2022  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2023  size_t indentation = npos; // have to find out if no spec is given
2024  csubstr digits;
2025  if(s.len > 1)
2026  {
2027  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2028  csubstr t = s.sub(1);
2029  _c4dbgpf("blck: spec is multichar: '{}'", t);
2030  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2031  size_t pos = t.first_of("-+");
2032  _c4dbgpf("blck: spec chomp char at {}", pos);
2033  if(pos != npos)
2034  {
2035  if(t[pos] == '-')
2036  chomp = CHOMP_STRIP;
2037  else if(t[pos] == '+')
2038  chomp = CHOMP_KEEP;
2039  if(pos == 0)
2040  t = t.sub(1);
2041  else
2042  t = t.first(pos);
2043  }
2044  // from here to the end, only digits are considered
2045  digits = t.left_of(t.first_not_of("0123456789"));
2046  if( ! digits.empty())
2047  {
2048  if(C4_UNLIKELY(digits.len > 1))
2049  _c4err("parse error: invalid indentation");
2050  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2051  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2052  _c4err("parse error: could not read indentation as decimal");
2053  if(C4_UNLIKELY( ! indentation))
2054  _c4err("parse error: null indentation");
2055  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2056  indentation += m_evt_handler->m_curr->indref;
2057  }
2058  }
2059 
2060  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2061 
2062  // finish the current line
2063  _line_progressed(s.len);
2064  _line_ended();
2065  _scan_line();
2066 
2067  // start with a zero-length block, already pointing at the right place
2068  substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
2069  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2070 
2071  // read every full line into a raw block,
2072  // from which newlines are to be stripped as needed.
2073  //
2074  // If no explicit indentation was given, pick it from the first
2075  // non-empty line. See
2076  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2077  size_t num_lines = 0;
2078  size_t first = m_evt_handler->m_curr->pos.line;
2079  size_t provisional_indentation = npos;
2080  LineContents lc;
2081  while(( ! _finished_file()))
2082  {
2083  // peek next line, but do not advance immediately
2084  lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2085  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2086  // evaluate termination conditions
2087  if(indentation != npos)
2088  {
2089  _c4dbgpf("blck: indentation={}", indentation);
2090  // stop when the line is deindented and not empty
2091  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2092  {
2093  if(raw_block.len)
2094  {
2095  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2096  }
2097  else
2098  {
2099  _c4err("indentation decreased without any scalar");
2100  }
2101  break;
2102  }
2103  else if(indentation == 0)
2104  {
2105  _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2106  if(_is_doc_token(lc.rem))
2107  {
2108  _c4dbgp("blck: stop. indentation=0 and doc ended");
2109  break;
2110  }
2111  }
2112  }
2113  else
2114  {
2115  const size_t fns = lc.stripped.first_not_of(' ');
2116  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2117  if(fns != npos) // non-empty line
2118  {
2120  if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
2121  _c4err("parse error");
2122  )
2123  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2124  if(provisional_indentation == npos)
2125  {
2126  if(lc.indentation < indref)
2127  {
2128  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2129  if(raw_block.len == 0)
2130  {
2131  _c4dbgp("blck: was empty, undo next line");
2132  _line_ended_undo();
2133  }
2134  break;
2135  }
2136  else if(lc.indentation == m_evt_handler->m_curr->indref)
2137  {
2138  if(has_any(RSEQ|RMAP))
2139  {
2140  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2141  break;
2142  }
2143  }
2144  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2145  indentation = lc.indentation;
2146  }
2147  else
2148  {
2149  if(lc.indentation >= provisional_indentation)
2150  {
2151  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2152  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2153  indentation = lc.indentation;
2154  }
2155  else
2156  {
2157  break;
2158  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2159  }
2160  }
2161  }
2162  else // empty line
2163  {
2164  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2165  if(provisional_indentation != npos)
2166  {
2167  if(lc.stripped.len >= provisional_indentation)
2168  {
2169  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2170  provisional_indentation = lc.stripped.len;
2171  }
2172  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2173  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2174  {
2175  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2176  provisional_indentation = lc.indentation;
2177  }
2178  #endif
2179  }
2180  else
2181  {
2182  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2183  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2184  if(provisional_indentation == npos)
2185  {
2186  provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
2187  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2188  }
2189  if(provisional_indentation < indref)
2190  {
2191  provisional_indentation = indref;
2192  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2193  }
2194  }
2195  }
2196  }
2197  // advance now that we know the folded scalar continues
2198  m_evt_handler->m_curr->line_contents = lc;
2199  _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2200  raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2201  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2202  _line_ended();
2203  ++num_lines;
2204  }
2205  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2206  C4_UNUSED(num_lines);
2207  C4_UNUSED(first);
2208 
2209  if(indentation == npos)
2210  {
2211  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2212  indentation = provisional_indentation;
2213  }
2214 
2215  if(num_lines)
2216  _line_ended_undo();
2217 
2218  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2219 
2220  sb->scalar = raw_block;
2221  sb->indentation = indentation;
2222  sb->chomp = chomp;
2223 }
2224 
2225 
2226 //-----------------------------------------------------------------------------
2227 //-----------------------------------------------------------------------------
2228 //-----------------------------------------------------------------------------
2229 
2230 // a debugging scaffold:
2231 #if 0
2232 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2233 #else
2234 #define _c4dbgfws(...)
2235 #endif
2236 
2237 template<class EventHandler>
2238 template<class FilterProcessor>
2239 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2240 {
2241  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2242  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2243 
2244  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2245  if(first_pos != npos)
2246  {
2247  const char first_char = proc.src[first_pos];
2248  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2249  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2250  {
2251  _c4dbgfws("whitespace is trailing on line", "");
2252  proc.skip(first_pos - proc.rpos);
2253  }
2254  else // a legit whitespace
2255  {
2256  proc.copy();
2257  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2258  }
2259  return true;
2260  }
2261  _c4dbgfws("whitespace is trailing on line", "");
2262  return false;
2263 }
2264 
2265 template<class EventHandler>
2266 template<class FilterProcessor>
2267 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2268 {
2269  if(!_filter_ws_handle_to_first_non_space(proc))
2270  {
2271  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2272  proc.copy(proc.src.len - proc.rpos);
2273  }
2274 }
2275 
2276 template<class EventHandler>
2277 template<class FilterProcessor>
2278 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2279 {
2280  if(!_filter_ws_handle_to_first_non_space(proc))
2281  {
2282  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2283  proc.skip(proc.src.len - proc.rpos);
2284  }
2285 }
2286 
2287 #undef _c4dbgfws
2288 
2289 
2290 //-----------------------------------------------------------------------------
2291 //-----------------------------------------------------------------------------
2292 //-----------------------------------------------------------------------------
2293 /* plain scalars */
2294 
2295 // a debugging scaffold:
2296 #if 0
2297 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2298 #else
2299 #define _c4dbgfps(fmt, ...)
2300 #endif
2301 
2302 template<class EventHandler>
2303 template<class FilterProcessor>
2304 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2305 {
2306  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2307 
2308  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2309  size_t ii = proc.rpos;
2310  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2311  if(numnl_following)
2312  {
2313  proc.set('\n', numnl_following);
2314  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2315  }
2316  else
2317  {
2318  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2319  if(ret != npos)
2320  {
2321  proc.set(' ');
2322  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2323  }
2324  else
2325  {
2326  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2327  ii = proc.src.len;
2328  }
2329  }
2330  proc.rpos = ii;
2331 }
2332 
2333 template<class EventHandler>
2334 template<class FilterProcessor>
2335 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2336 {
2337  _RYML_CB_ASSERT(this->callbacks(), indentation != npos);
2338  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2339 
2340  while(proc.has_more_chars())
2341  {
2342  const char curr = proc.curr();
2343  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2344  switch(curr)
2345  {
2346  case ' ':
2347  _RYML_WITH_TAB_TOKENS(case '\t':)
2348  _c4dbgfps("whitespace", curr);
2349  _filter_ws_skip_trailing(proc);
2350  break;
2351  case '\n':
2352  _c4dbgfps("newline", curr);
2353  _filter_nl_plain(proc, /*indentation*/indentation);
2354  break;
2355  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2356  _c4dbgfps("carriage return, ignore", curr);
2357  proc.skip();
2358  break;
2359  default:
2360  proc.copy();
2361  break;
2362  }
2363  }
2364 
2365  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2366 
2367  return proc.result();
2368 }
2369 
2370 #undef _c4dbgfps
2371 
2372 
2373 template<class EventHandler>
2374 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2375 {
2376  FilterProcessorSrcDst proc(scalar, dst);
2377  return _filter_plain(proc, indentation);
2378 }
2379 
2380 template<class EventHandler>
2381 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2382 {
2383  FilterProcessorInplaceEndExtending proc(dst, cap);
2384  return _filter_plain(proc, indentation);
2385 }
2386 
2387 
2388 //-----------------------------------------------------------------------------
2389 //-----------------------------------------------------------------------------
2390 //-----------------------------------------------------------------------------
2391 /* single quoted */
2392 
2393 // a debugging scaffold:
2394 #if 0
2395 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2396 #else
2397 #define _c4dbgfsq(fmt, ...)
2398 #endif
2399 
2400 template<class EventHandler>
2401 template<class FilterProcessor>
2402 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2403 {
2404  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2405 
2406  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2407  size_t ii = proc.rpos;
2408  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2409  if(numnl_following)
2410  {
2411  proc.set('\n', numnl_following);
2412  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2413  }
2414  else
2415  {
2416  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2417  if(ret != npos)
2418  {
2419  proc.set(' ');
2420  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2421  }
2422  else
2423  {
2424  proc.set(' ');
2425  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2426  }
2427  }
2428  proc.rpos = ii;
2429 }
2430 
2431 template<class EventHandler>
2432 template<class FilterProcessor>
2433 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2434 {
2435  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2436 
2437  // from the YAML spec for double-quoted scalars:
2438  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2439  while(proc.has_more_chars())
2440  {
2441  const char curr = proc.curr();
2442  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2443  switch(curr)
2444  {
2445  case ' ':
2446  case '\t':
2447  _c4dbgfsq("whitespace", curr);
2448  _filter_ws_copy_trailing(proc);
2449  break;
2450  case '\n':
2451  _c4dbgfsq("newline", curr);
2452  _filter_nl_squoted(proc);
2453  break;
2454  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2455  _c4dbgfsq("skip cr", curr);
2456  proc.skip();
2457  break;
2458  case '\'':
2459  _c4dbgfsq("squote", curr);
2460  if(proc.next() == '\'')
2461  {
2462  _c4dbgfsq("two consecutive squotes", curr);
2463  proc.skip();
2464  proc.copy();
2465  }
2466  else
2467  {
2468  _c4err("filter error");
2469  }
2470  break;
2471  default:
2472  proc.copy();
2473  break;
2474  }
2475  }
2476 
2477  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2478 
2479  return proc.result();
2480 }
2481 
2482 #undef _c4dbgfsq
2483 
2484 template<class EventHandler>
2485 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2486 {
2487  FilterProcessorSrcDst proc(scalar, dst);
2488  return _filter_squoted(proc);
2489 }
2490 
2491 template<class EventHandler>
2493 {
2494  FilterProcessorInplaceEndExtending proc(dst, cap);
2495  return _filter_squoted(proc);
2496 }
2497 
2498 
2499 //-----------------------------------------------------------------------------
2500 //-----------------------------------------------------------------------------
2501 //-----------------------------------------------------------------------------
2502 /* double quoted */
2503 
2504 // a debugging scaffold:
2505 #if 0
2506 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2507 #else
2508 #define _c4dbgfdq(...)
2509 #endif
2510 
2511 template<class EventHandler>
2512 template<class FilterProcessor>
2513 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2514 {
2515  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2516 
2517  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2518  size_t ii = proc.rpos;
2519  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2520  if(numnl_following)
2521  {
2522  proc.set('\n', numnl_following);
2523  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2524  }
2525  else
2526  {
2527  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2528  if(ret != npos)
2529  {
2530  proc.set(' ');
2531  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2532  }
2533  else
2534  {
2535  proc.set(' ');
2536  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2537  }
2538  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2539  {
2540  _c4dbgfdq("backslash at [{}]", ii);
2541  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2542  if(next == ' ' || next == '\t')
2543  {
2544  _c4dbgfdq("extend skip to backslash", "");
2545  ++ii;
2546  }
2547  }
2548  }
2549  proc.rpos = ii;
2550 }
2551 
2552 template<class EventHandler>
2553 template<class FilterProcessor>
2554 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2555 {
2556  char next = proc.next();
2557  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2558  if(next == '\r')
2559  {
2560  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2561  {
2562  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2563  next = '\n';
2564  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2565  }
2566  }
2567 
2568  if(next == '\n')
2569  {
2570  size_t ii = proc.rpos + 2;
2571  for( ; ii < proc.src.len; ++ii)
2572  {
2573  // skip leading whitespace
2574  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2575  ;
2576  else
2577  break;
2578  }
2579  proc.skip(ii - proc.rpos);
2580  }
2581  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2582  {
2583  // escapes for json compatibility
2584  proc.translate_esc(next);
2585  _c4dbgfdq("here, used '{}'", _c4prc(next));
2586  }
2587  else if(next == '\r')
2588  {
2589  proc.skip();
2590  }
2591  else if(next == 'n')
2592  {
2593  proc.translate_esc('\n');
2594  }
2595  else if(next == 'r')
2596  {
2597  proc.translate_esc('\r');
2598  }
2599  else if(next == 't')
2600  {
2601  proc.translate_esc('\t');
2602  }
2603  else if(next == '\\')
2604  {
2605  proc.translate_esc('\\');
2606  }
2607  else if(next == 'x') // UTF8
2608  {
2609  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2610  _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2611  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2612  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2613  uint8_t byteval = {};
2614  if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
2615  _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2616  proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
2617  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2618  }
2619  else if(next == 'u') // UTF16
2620  {
2621  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2622  _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2623  char readbuf[8];
2624  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2625  uint32_t codepoint_val = {};
2626  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2627  _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2628  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2629  if(C4_UNLIKELY(numbytes == 0))
2630  _c4err("failed to decode code point={}", proc.rpos);
2631  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2632  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2633  }
2634  else if(next == 'U') // UTF32
2635  {
2636  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2637  _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2638  char readbuf[8];
2639  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2640  uint32_t codepoint_val = {};
2641  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2642  _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2643  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2644  if(C4_UNLIKELY(numbytes == 0))
2645  _c4err("failed to decode code point={}", proc.rpos);
2646  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2647  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2648  }
2649  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2650  else if(next == '0')
2651  {
2652  proc.translate_esc('\0');
2653  }
2654  else if(next == 'b') // backspace
2655  {
2656  proc.translate_esc('\b');
2657  }
2658  else if(next == 'f') // form feed
2659  {
2660  proc.translate_esc('\f');
2661  }
2662  else if(next == 'a') // bell character
2663  {
2664  proc.translate_esc('\a');
2665  }
2666  else if(next == 'v') // vertical tab
2667  {
2668  proc.translate_esc('\v');
2669  }
2670  else if(next == 'e') // escape character
2671  {
2672  proc.translate_esc('\x1b');
2673  }
2674  else if(next == '_') // unicode non breaking space \u00a0
2675  {
2676  // https://www.compart.com/en/unicode/U+00a0
2677  const char payload[] = {
2678  _RYML_CHCONST(-0x3e, 0xc2),
2679  _RYML_CHCONST(-0x60, 0xa0),
2680  };
2681  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2682  }
2683  else if(next == 'N') // unicode next line \u0085
2684  {
2685  // https://www.compart.com/en/unicode/U+0085
2686  const char payload[] = {
2687  _RYML_CHCONST(-0x3e, 0xc2),
2688  _RYML_CHCONST(-0x7b, 0x85),
2689  };
2690  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2691  }
2692  else if(next == 'L') // unicode line separator \u2028
2693  {
2694  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2695  const char payload[] = {
2696  _RYML_CHCONST(-0x1e, 0xe2),
2697  _RYML_CHCONST(-0x80, 0x80),
2698  _RYML_CHCONST(-0x58, 0xa8),
2699  };
2700  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2701  }
2702  else if(next == 'P') // unicode paragraph separator \u2029
2703  {
2704  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2705  const char payload[] = {
2706  _RYML_CHCONST(-0x1e, 0xe2),
2707  _RYML_CHCONST(-0x80, 0x80),
2708  _RYML_CHCONST(-0x57, 0xa9),
2709  };
2710  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2711  }
2712  else if(next == '\0')
2713  {
2714  proc.skip();
2715  }
2716  else
2717  {
2718  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2719  }
2720  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2721 }
2722 
2723 
2724 template<class EventHandler>
2725 template<class FilterProcessor>
2726 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2727 {
2728  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2729  // from the YAML spec for double-quoted scalars:
2730  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2731  while(proc.has_more_chars())
2732  {
2733  const char curr = proc.curr();
2734  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2735  switch(curr)
2736  {
2737  case ' ':
2738  case '\t':
2739  {
2740  _c4dbgfdq("whitespace", curr);
2741  _filter_ws_copy_trailing(proc);
2742  break;
2743  }
2744  case '\n':
2745  {
2746  _c4dbgfdq("newline", curr);
2747  _filter_nl_dquoted(proc);
2748  break;
2749  }
2750  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2751  {
2752  _c4dbgfdq("carriage return, ignore", curr);
2753  proc.skip();
2754  break;
2755  }
2756  case '\\':
2757  {
2758  _filter_dquoted_backslash(proc);
2759  break;
2760  }
2761  default:
2762  {
2763  proc.copy();
2764  break;
2765  }
2766  }
2767  }
2768  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2769  return proc.result();
2770 }
2771 
2772 #undef _c4dbgfdq
2773 
2774 
2775 template<class EventHandler>
2776 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2777 {
2778  FilterProcessorSrcDst proc(scalar, dst);
2779  return _filter_dquoted(proc);
2780 }
2781 
2782 template<class EventHandler>
2783 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2784 {
2785  FilterProcessorInplaceMidExtending proc(dst, cap);
2786  return _filter_dquoted(proc);
2787 }
2788 
2789 
2790 //-----------------------------------------------------------------------------
2791 //-----------------------------------------------------------------------------
2792 //-----------------------------------------------------------------------------
2793 // block filtering helpers
2794 
2795 template<class EventHandler>
2796 template<class FilterProcessor>
2797 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2798 {
2799  _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2800  _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2801 
2802  // a debugging scaffold:
2803  #if 0
2804  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2805  #else
2806  #define _c4dbgchomp(...)
2807  #endif
2808 
2809  // advance to the last line having spaces beyond the indentation
2810  {
2811  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2812  if(last != npos)
2813  {
2814  _c4dbgchomp("found newline and larger indentation. last={}", last);
2815  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2816  _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2817  // remove indentation spaces, copy the rest
2818  while((proc.rpos < last) && proc.has_more_chars())
2819  {
2820  const char curr = proc.curr();
2821  _c4dbgchomp("curr='{}'", _c4prc(curr));
2822  switch(curr)
2823  {
2824  case '\n':
2825  {
2826  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2827  proc.copy();
2828  // are there spaces after the newline?
2829  csubstr at_next_line = proc.rem();
2830  if(at_next_line.begins_with(' '))
2831  {
2832  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2833  // there are spaces.
2834  size_t first_non_space = at_next_line.first_not_of(' ');
2835  _c4dbgchomp("first_non_space={}", first_non_space);
2836  if(first_non_space == npos)
2837  {
2838  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2839  first_non_space = at_next_line.len;
2840  }
2841  if(first_non_space <= indentation)
2842  {
2843  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2844  proc.skip(first_non_space);
2845  }
2846  else
2847  {
2848  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2849  proc.skip(indentation);
2850  // copy the spaces after the indentation
2851  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2852  proc.copy(first_non_space - indentation);
2853  }
2854  }
2855  break;
2856  }
2857  case '\r':
2858  proc.skip();
2859  break;
2860  default:
2861  _c4err("parse error");
2862  break;
2863  }
2864  }
2865  }
2866  }
2867 
2868  // from now on, we only have line ends (or indentation spaces)
2869  switch(chomp)
2870  {
2871  case CHOMP_CLIP:
2872  {
2873  bool had_one = false;
2874  while(proc.has_more_chars())
2875  {
2876  const char curr = proc.curr();
2877  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
2878  switch(curr)
2879  {
2880  case '\n':
2881  {
2882  _c4dbgchomp("copy newline!", curr);
2883  proc.copy();
2884  proc.set_at_end();
2885  had_one = true;
2886  break;
2887  }
2888  case ' ':
2889  case '\r':
2890  _c4dbgchomp("skip!", curr);
2891  proc.skip();
2892  break;
2893  }
2894  }
2895  if(!had_one) // there were no newline characters. add one.
2896  {
2897  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
2898  proc.set('\n');
2899  }
2900  break;
2901  }
2902  case CHOMP_KEEP:
2903  {
2904  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2905  while(proc.has_more_chars())
2906  {
2907  const char curr = proc.curr();
2908  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
2909  switch(curr)
2910  {
2911  case '\n':
2912  _c4dbgchomp("copy newline!", curr);
2913  proc.copy();
2914  break;
2915  case ' ':
2916  case '\r':
2917  _c4dbgchomp("skip!", curr);
2918  proc.skip();
2919  break;
2920  }
2921  }
2922  break;
2923  }
2924  case CHOMP_STRIP:
2925  {
2926  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
2927  // nothing to do!
2928  break;
2929  }
2930  }
2931 
2932  #undef _c4dbgchomp
2933 }
2934 
2935 
2936 // a debugging scaffold:
2937 #if 0
2938 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2939 #else
2940 #define _c4dbgfb(...)
2941 #endif
2942 
2943 template<class EventHandler>
2944 template<class FilterProcessor>
2945 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2946 {
2947  csubstr rem = proc.rem(); // remaining
2948  if(rem.len)
2949  {
2950  size_t first = rem.first_not_of(' ');
2951  if(first != npos)
2952  {
2953  _c4dbgfb("{} spaces follow before next nonws character", first);
2954  if(first < indentation)
2955  {
2956  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
2957  proc.skip(first);
2958  }
2959  else
2960  {
2961  _c4dbgfb("skip {} spaces from indentation", indentation);
2962  proc.skip(indentation);
2963  }
2964  }
2965  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2966  else
2967  {
2968  _c4dbgfb("all spaces to the end: {} spaces", first);
2969  first = rem.len;
2970  if(first)
2971  {
2972  if(first < indentation)
2973  {
2974  _c4dbgfb("skip everything", first);
2975  proc.skip(proc.src.len - proc.rpos);
2976  }
2977  else
2978  {
2979  _c4dbgfb("skip {} spaces from indentation", indentation);
2980  proc.skip(indentation);
2981  }
2982  }
2983  }
2984  #endif
2985  }
2986 }
2987 
2988 template<class EventHandler>
2989 template<class FilterProcessor>
2990 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
2991 {
2992  csubstr contents = proc.src.trimr(" \n\r");
2993  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
2994  if(!contents.len)
2995  {
2996  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
2997  if(chomp == CHOMP_KEEP && proc.src.len)
2998  {
2999  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3000  while(proc.has_more_chars())
3001  {
3002  const char curr = proc.curr();
3003  if(curr == '\n')
3004  proc.copy();
3005  else
3006  proc.skip();
3007  }
3008  if(!proc.wpos)
3009  {
3010  proc.set('\n');
3011  }
3012  }
3013  }
3014  return contents.len;
3015 }
3016 
3017 template<class EventHandler>
3018 template<class FilterProcessor>
3019 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3020 {
3021  _c4dbgfb("contents_len={}", contents_len);
3022 
3023  _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3024 
3025  // extend contents to just before the first newline at the end,
3026  // in case it is preceded by spaces
3027  size_t firstnewl = proc.src.first_of('\n', contents_len);
3028  if(firstnewl != npos)
3029  {
3030  contents_len = firstnewl;
3031  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3032  }
3033  else
3034  {
3035  contents_len = proc.src.len;
3036  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3037  }
3038 
3039  return contents_len;
3040 }
3041 
3042 #undef _c4dbgfb
3043 
3044 
3045 //-----------------------------------------------------------------------------
3046 //-----------------------------------------------------------------------------
3047 //-----------------------------------------------------------------------------
3048 
3049 // a debugging scaffold:
3050 #if 0
3051 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3052 #else
3053 #define _c4dbgfbl(...)
3054 #endif
3055 
3056 template<class EventHandler>
3057 template<class FilterProcessor>
3058 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3059 {
3060  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3061 
3062  size_t contents_len = _handle_all_whitespace(proc, chomp);
3063  if(!contents_len)
3064  return proc.result();
3065 
3066  contents_len = _extend_to_chomp(proc, contents_len);
3067 
3068  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3069 
3070  _filter_block_indentation(proc, indentation);
3071 
3072  // now filter the bulk
3073  while(proc.has_more_chars(/*maxpos*/contents_len))
3074  {
3075  const char curr = proc.curr();
3076  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3077  switch(curr)
3078  {
3079  case '\n':
3080  {
3081  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3082  proc.copy(); // copy the newline
3083  _filter_block_indentation(proc, indentation);
3084  break;
3085  }
3086  case '\r':
3087  proc.skip();
3088  break;
3089  default:
3090  proc.copy();
3091  break;
3092  }
3093  }
3094 
3095  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3096 
3097  _filter_chomp(proc, chomp, indentation);
3098 
3099  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3100 
3101  return proc.result();
3102 }
3103 
3104 #undef _c4dbgfbl
3105 
3106 template<class EventHandler>
3107 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3108 {
3109  FilterProcessorSrcDst proc(scalar, dst);
3110  return _filter_block_literal(proc, indentation, chomp);
3111 }
3112 
3113 template<class EventHandler>
3114 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3115 {
3116  FilterProcessorInplaceEndExtending proc(scalar, cap);
3117  return _filter_block_literal(proc, indentation, chomp);
3118 }
3119 
3120 
3121 //-----------------------------------------------------------------------------
3122 //-----------------------------------------------------------------------------
3123 //-----------------------------------------------------------------------------
3124 
3125 // a debugging scaffold:
3126 #if 0
3127 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3128 #else
3129 #define _c4dbgfbf(...)
3130 #endif
3131 
3132 
3133 template<class EventHandler>
3134 template<class FilterProcessor>
3135 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3136 {
3137  _filter_block_indentation(proc, indentation);
3138  while(proc.has_more_chars(len))
3139  {
3140  const char curr = proc.curr();
3141  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3142  switch(curr)
3143  {
3144  case '\n':
3145  _c4dbgfbf("newline.", curr);
3146  proc.copy();
3147  _filter_block_indentation(proc, indentation);
3148  break;
3149  case '\r':
3150  proc.skip();
3151  break;
3152  case ' ':
3153  case '\t':
3154  {
3155  size_t first = proc.rem().first_not_of(" \t");
3156  _c4dbgfbf("space. first={}", first);
3157  if(first == npos)
3158  first = proc.rem().len;
3159  _c4dbgfbf("... indentation increased to {}", first);
3160  _filter_block_folded_indented_block(proc, indentation, len, first);
3161  break;
3162  }
3163  default:
3164  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3165  return;
3166  }
3167  }
3168 }
3169 
3170 template<class EventHandler>
3171 template<class FilterProcessor>
3172 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3173 {
3174  switch(num_newl)
3175  {
3176  case 1u:
3177  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3178  wpos_at_first_newl = proc.wpos;
3179  proc.skip();
3180  proc.set(' ');
3181  break;
3182  case 2u:
3183  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3184  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
3185  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3186  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3187  proc.skip();
3188  proc.set_at(wpos_at_first_newl, '\n');
3189  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3190  break;
3191  default:
3192  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3193  proc.copy();
3194  break;
3195  }
3196  return wpos_at_first_newl;
3197 }
3198 
3199 template<class EventHandler>
3200 template<class FilterProcessor>
3201 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3202 {
3203  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
3204  size_t num_newl = 0;
3205  size_t wpos_at_first_newl = npos;
3206  while(proc.has_more_chars(len))
3207  {
3208  const char curr = proc.curr();
3209  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3210  switch(curr)
3211  {
3212  case '\n':
3213  {
3214  _c4dbgfbf("newline. sofar={}", num_newl);
3215  // NOTE: vs2022-32bit-release builds were giving wrong
3216  // results in this block, if it was written as either
3217  // as a switch(num_newl) or its equivalent if-form.
3218  //
3219  // For this reason, we're using a dedicated function
3220  // (**_compress), which seems to work around the issue.
3221  //
3222  // The manifested problem was that somewhere between the
3223  // assignment to curr and this point, proc.wpos (the
3224  // write-position of the processor) jumped to npos, which
3225  // made the write wrap-around! To make things worse,
3226  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3227  // problem go away!
3228  //
3229  // The only way to make the problem appear with prints
3230  // enabled was by disabling all prints in this function
3231  // (including in the block which was moved to the compress
3232  // function) and then selectively enabling only some of
3233  // those prints.
3234  //
3235  // This may be due to some bug in the cl-x86 optimizer; or
3236  // it may be triggered by some UB which may be
3237  // inadvertedly present in this function or in the filter
3238  // processor. This is despite our best efforts to weed out
3239  // any such UB problem: neither clang-tidy nor none of the
3240  // sanitizers, or gcc's -fanalyzer pointed to any problems
3241  // in this code.
3242  //
3243  // In the end, moving this block to a separate function
3244  // was the only way to bury the problem. But it may
3245  // resurface again, as The Undead, rising to from the
3246  // grave to haunt us with his terrible presence.
3247  //
3248  // We may have to revisit this. With a stake, and lots of
3249  // garlic.
3250  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3251  _filter_block_indentation(proc, indentation);
3252  break;
3253  }
3254  case ' ':
3255  case '\t':
3256  {
3257  size_t first = proc.rem().first_not_of(" \t");
3258  _c4dbgfbf("space. first={}", first);
3259  if(first == npos)
3260  first = proc.rem().len;
3261  _c4dbgfbf("... indentation increased to {}", first);
3262  if(num_newl)
3263  {
3264  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3265  proc.set_at(wpos_at_first_newl, '\n');
3266  }
3267  if(num_newl > 1u)
3268  {
3269  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3270  proc.set('\n');
3271  }
3272  _filter_block_folded_indented_block(proc, indentation, len, first);
3273  num_newl = 0;
3274  wpos_at_first_newl = npos;
3275  break;
3276  }
3277  case '\r':
3278  proc.skip();
3279  break;
3280  default:
3281  _c4dbgfbf("not space, not newline. stop.", 0);
3282  return;
3283  }
3284  }
3285 }
3286 
3287 
3288 template<class EventHandler>
3289 template<class FilterProcessor>
3290 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3291 {
3292  _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3293  if(curr_indentation)
3294  proc.copy(curr_indentation);
3295  while(proc.has_more_chars(len))
3296  {
3297  const char curr = proc.curr();
3298  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3299  switch(curr)
3300  {
3301  case '\n':
3302  {
3303  proc.copy();
3304  _filter_block_indentation(proc, indentation);
3305  csubstr rem = proc.rem();
3306  const size_t first = rem.first_not_of(' ');
3307  _c4dbgfbf("newline. firstns={}", first);
3308  if(first == 0)
3309  {
3310  const char c = rem[first];
3311  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3312  if(c == '\n' || c == '\r')
3313  {
3314  ;
3315  }
3316  else
3317  {
3318  _c4dbgfbf("done with indented block", first);
3319  goto endloop;
3320  }
3321  }
3322  else if(first != npos)
3323  {
3324  proc.copy(first);
3325  _c4dbgfbf("copy all {} spaces", first);
3326  }
3327  break;
3328  }
3329  break;
3330  case '\r':
3331  proc.skip();
3332  break;
3333  default:
3334  proc.copy();
3335  break;
3336  }
3337  }
3338  endloop:
3339  return;
3340 }
3341 
3342 
3343 template<class EventHandler>
3344 template<class FilterProcessor>
3345 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3346 {
3347  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3348 
3349  size_t contents_len = _handle_all_whitespace(proc, chomp);
3350  if(!contents_len)
3351  return proc.result();
3352 
3353  contents_len = _extend_to_chomp(proc, contents_len);
3354 
3355  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3356 
3357  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3358 
3359  // now filter the bulk
3360  while(proc.has_more_chars(/*maxpos*/contents_len))
3361  {
3362  const char curr = proc.curr();
3363  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3364  switch(curr)
3365  {
3366  case '\n':
3367  {
3368  _c4dbgfbf("found newline", curr);
3369  _filter_block_folded_newlines(proc, indentation, contents_len);
3370  break;
3371  }
3372  case '\r':
3373  proc.skip();
3374  break;
3375  default:
3376  proc.copy();
3377  break;
3378  }
3379  }
3380 
3381  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3382 
3383  _filter_chomp(proc, chomp, indentation);
3384 
3385  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3386 
3387  return proc.result();
3388 }
3389 
3390 #undef _c4dbgfbf
3391 
3392 template<class EventHandler>
3393 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3394 {
3395  FilterProcessorSrcDst proc(scalar, dst);
3396  return _filter_block_folded(proc, indentation, chomp);
3397 }
3398 
3399 template<class EventHandler>
3400 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3401 {
3402  FilterProcessorInplaceEndExtending proc(scalar, cap);
3403  return _filter_block_folded(proc, indentation, chomp);
3404 }
3405 
3406 
3407 //-----------------------------------------------------------------------------
3408 //-----------------------------------------------------------------------------
3409 //-----------------------------------------------------------------------------
3410 
3411 template<class EventHandler>
3412 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3413 {
3414  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3415  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3416  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3417  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3418  return r.get();
3419 }
3420 
3421 //-----------------------------------------------------------------------------
3422 
3423 template<class EventHandler>
3424 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3425 {
3426  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3427  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3428  _RYML_CB_ASSERT(this->callbacks(), r.valid());
3429  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3430  return r.get();
3431 }
3432 
3433 
3434 //-----------------------------------------------------------------------------
3435 
3436 template<class EventHandler>
3437 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3438 {
3439  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3440  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3441  if(C4_LIKELY(r.valid()))
3442  {
3443  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3444  return r.get();
3445  }
3446  else
3447  {
3448  const size_t len = r.required_len();
3449  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3450  substr dst = m_evt_handler->alloc_arena(len, &s);
3451  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3452  _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3453  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3454  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3455  _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3456  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3457  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3458  return rsd.get();
3459  }
3460 }
3461 
3462 
3463 //-----------------------------------------------------------------------------
3464 template<class EventHandler>
3465 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3466 {
3467  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3468  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3469  if(C4_LIKELY(r.valid()))
3470  {
3471  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3472  return r.get();
3473  }
3474  else
3475  {
3476  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3477  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3478  FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3479  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3480  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3481  return rsd.get();
3482  }
3483 }
3484 
3485 
3486 //-----------------------------------------------------------------------------
3487 template<class EventHandler>
3488 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3489 {
3490  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3491  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3492  if(C4_LIKELY(r.valid()))
3493  {
3494  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3495  return r.get();
3496  }
3497  else
3498  {
3499  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3500  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3501  FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3502  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3503  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3504  return rsd.get();
3505  }
3506 }
3507 
3508 
3509 //-----------------------------------------------------------------------------
3510 
3511 template<class EventHandler>
3512 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3513 {
3514  if(sc.needs_filter)
3515  {
3516  if(m_options.scalar_filtering())
3517  {
3518  return _filter_scalar_plain(sc.scalar, indentation);
3519  }
3520  else
3521  {
3522  _c4dbgp("plain scalar left unfiltered");
3523  m_evt_handler->mark_key_scalar_unfiltered();
3524  }
3525  }
3526  else
3527  {
3528  _c4dbgp("plain scalar doesn't need filtering");
3529  }
3530  return sc.scalar;
3531 }
3532 
3533 template<class EventHandler>
3534 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3535 {
3536  if(sc.needs_filter)
3537  {
3538  if(m_options.scalar_filtering())
3539  {
3540  return _filter_scalar_plain(sc.scalar, indentation);
3541  }
3542  else
3543  {
3544  _c4dbgp("plain scalar left unfiltered");
3545  m_evt_handler->mark_val_scalar_unfiltered();
3546  }
3547  }
3548  else
3549  {
3550  _c4dbgp("plain scalar doesn't need filtering");
3551  }
3552  return sc.scalar;
3553 }
3554 
3555 
3556 //-----------------------------------------------------------------------------
3557 
3558 template<class EventHandler>
3559 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3560 {
3561  if(sc.needs_filter)
3562  {
3563  if(m_options.scalar_filtering())
3564  {
3565  return _filter_scalar_squot(sc.scalar);
3566  }
3567  else
3568  {
3569  _c4dbgp("squo key scalar left unfiltered");
3570  m_evt_handler->mark_key_scalar_unfiltered();
3571  }
3572  }
3573  else
3574  {
3575  _c4dbgp("squo key scalar doesn't need filtering");
3576  }
3577  return sc.scalar;
3578 }
3579 
3580 template<class EventHandler>
3581 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3582 {
3583  if(sc.needs_filter)
3584  {
3585  if(m_options.scalar_filtering())
3586  {
3587  return _filter_scalar_squot(sc.scalar);
3588  }
3589  else
3590  {
3591  _c4dbgp("squo val scalar left unfiltered");
3592  m_evt_handler->mark_val_scalar_unfiltered();
3593  }
3594  }
3595  else
3596  {
3597  _c4dbgp("squo val scalar doesn't need filtering");
3598  }
3599  return sc.scalar;
3600 }
3601 
3602 
3603 //-----------------------------------------------------------------------------
3604 
3605 template<class EventHandler>
3606 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3607 {
3608  if(sc.needs_filter)
3609  {
3610  if(m_options.scalar_filtering())
3611  {
3612  return _filter_scalar_dquot(sc.scalar);
3613  }
3614  else
3615  {
3616  _c4dbgp("dquo scalar left unfiltered");
3617  m_evt_handler->mark_key_scalar_unfiltered();
3618  }
3619  }
3620  else
3621  {
3622  _c4dbgp("dquo scalar doesn't need filtering");
3623  }
3624  return sc.scalar;
3625 }
3626 
3627 template<class EventHandler>
3628 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3629 {
3630  if(sc.needs_filter)
3631  {
3632  if(m_options.scalar_filtering())
3633  {
3634  return _filter_scalar_dquot(sc.scalar);
3635  }
3636  else
3637  {
3638  _c4dbgp("dquo scalar left unfiltered");
3639  m_evt_handler->mark_val_scalar_unfiltered();
3640  }
3641  }
3642  else
3643  {
3644  _c4dbgp("dquo scalar doesn't need filtering");
3645  }
3646  return sc.scalar;
3647 }
3648 
3649 
3650 //-----------------------------------------------------------------------------
3651 
3652 template<class EventHandler>
3653 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3654 {
3655  if(m_options.scalar_filtering())
3656  {
3657  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3658  }
3659  else
3660  {
3661  _c4dbgp("literal scalar left unfiltered");
3662  m_evt_handler->mark_key_scalar_unfiltered();
3663  }
3664  return sb.scalar;
3665 }
3666 
3667 template<class EventHandler>
3668 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3669 {
3670  if(m_options.scalar_filtering())
3671  {
3672  return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3673  }
3674  else
3675  {
3676  _c4dbgp("literal scalar left unfiltered");
3677  m_evt_handler->mark_val_scalar_unfiltered();
3678  }
3679  return sb.scalar;
3680 }
3681 
3682 
3683 //-----------------------------------------------------------------------------
3684 
3685 template<class EventHandler>
3686 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3687 {
3688  if(m_options.scalar_filtering())
3689  {
3690  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3691  }
3692  else
3693  {
3694  _c4dbgp("folded scalar left unfiltered");
3695  m_evt_handler->mark_key_scalar_unfiltered();
3696  }
3697  return sb.scalar;
3698 }
3699 
3700 template<class EventHandler>
3701 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3702 {
3703  if(m_options.scalar_filtering())
3704  {
3705  return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3706  }
3707  else
3708  {
3709  _c4dbgp("folded scalar left unfiltered");
3710  m_evt_handler->mark_val_scalar_unfiltered();
3711  }
3712  return sb.scalar;
3713 }
3714 
3715 
3716 //-----------------------------------------------------------------------------
3717 //-----------------------------------------------------------------------------
3718 //-----------------------------------------------------------------------------
3719 
3720 #ifdef RYML_DBG // !!! <----------------------------------
3721 
3722 template<class EventHandler>
3723 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3724 {
3725  char buf1_[64], buf2_[64], buf3_[64];
3726  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3727  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3728  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3729  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3730  s->flags |= on;
3731 }
3732 
3733 template<class EventHandler>
3734 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3735 {
3736  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3737  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3738  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3739  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3740  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3741  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3742  s->flags |= on;
3743  s->flags &= ~off;
3744 }
3745 
3746 template<class EventHandler>
3747 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3748 {
3749  char buf1_[64], buf2_[64], buf3_[64];
3750  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3751  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3752  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3753  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3754  s->flags &= ~off;
3755 }
3756 
3757 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3758 {
3759  size_t pos = 0;
3760  bool gotone = false;
3761 
3762  #define _prflag(fl) \
3763  if((flags & fl) == (fl)) \
3764  { \
3765  if(gotone) \
3766  { \
3767  if(pos + 1 < buf.len) \
3768  buf[pos] = '|'; \
3769  ++pos; \
3770  } \
3771  csubstr fltxt = #fl; \
3772  if(pos + fltxt.len <= buf.len) \
3773  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3774  pos += fltxt.len; \
3775  gotone = true; \
3776  }
3777 
3778  _prflag(RTOP);
3779  _prflag(RUNK);
3780  _prflag(RMAP);
3781  _prflag(RSEQ);
3782  _prflag(FLOW);
3783  _prflag(BLCK);
3784  _prflag(QMRK);
3785  _prflag(RKEY);
3786  _prflag(RVAL);
3787  _prflag(RKCL);
3788  _prflag(RNXT);
3789  _prflag(SSCL);
3790  _prflag(QSCL);
3791  _prflag(RSET);
3792  _prflag(RDOC);
3793  _prflag(NDOC);
3794  _prflag(USTY);
3795  _prflag(RSEQIMAP);
3796 
3797  #undef _prflag
3798 
3799  if(pos == 0)
3800  if(buf.len > 0)
3801  buf[pos++] = '0';
3802 
3803  RYML_CHECK(pos <= buf.len);
3804 
3805  return buf.first(pos);
3806 }
3807 
3808 #endif // RYML_DBG !!! <----------------------------------
3809 
3810 
3811 //-----------------------------------------------------------------------------
3812 //-----------------------------------------------------------------------------
3813 //-----------------------------------------------------------------------------
3814 
3815 template<class EventHandler>
3817 {
3818  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3819  return m_buf.sub(loc.offset);
3820 }
3821 
3822 template<class EventHandler>
3824 {
3825  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
3826  return location(*node.tree(), node.id());
3827 }
3828 
3829 template<class EventHandler>
3831 {
3832  // try hard to avoid getting the location from a null string.
3833  Location loc;
3834  if(_location_from_node(tree, node, &loc, 0))
3835  return loc;
3836  return val_location(m_buf.str);
3837 }
3838 
3839 template<class EventHandler>
3840 bool ParseEngine<EventHandler>::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const
3841 {
3842  if(tree.has_key(node))
3843  {
3844  csubstr k = tree.key(node);
3845  if(C4_LIKELY(k.str != nullptr))
3846  {
3847  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3848  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3849  *loc = val_location(k.str);
3850  return true;
3851  }
3852  }
3853 
3854  if(tree.has_val(node))
3855  {
3856  csubstr v = tree.val(node);
3857  if(C4_LIKELY(v.str != nullptr))
3858  {
3859  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3860  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3861  *loc = val_location(v.str);
3862  return true;
3863  }
3864  }
3865 
3866  if(tree.is_container(node))
3867  {
3868  if(_location_from_cont(tree, node, loc))
3869  return true;
3870  }
3871 
3872  if(tree.type(node) != NOTYPE && level == 0)
3873  {
3874  // try the prev sibling
3875  {
3876  const id_type prev = tree.prev_sibling(node);
3877  if(prev != NONE)
3878  {
3879  if(_location_from_node(tree, prev, loc, level+1))
3880  return true;
3881  }
3882  }
3883  // try the next sibling
3884  {
3885  const id_type next = tree.next_sibling(node);
3886  if(next != NONE)
3887  {
3888  if(_location_from_node(tree, next, loc, level+1))
3889  return true;
3890  }
3891  }
3892  // try the parent
3893  {
3894  const id_type parent = tree.parent(node);
3895  if(parent != NONE)
3896  {
3897  if(_location_from_node(tree, parent, loc, level+1))
3898  return true;
3899  }
3900  }
3901  }
3902 
3903  return false;
3904 }
3905 
3906 template<class EventHandler>
3907 bool ParseEngine<EventHandler>::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const
3908 {
3909  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3910  if(!tree.is_stream(node))
3911  {
3912  const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container
3913  if(tree.has_children(node))
3914  {
3915  id_type child = tree.first_child(node);
3916  if(tree.has_key(child))
3917  {
3918  // when a map starts, the container was set after the key
3919  csubstr k = tree.key(child);
3920  if(k.str && node_start > k.str)
3921  node_start = k.str;
3922  }
3923  }
3924  *loc = val_location(node_start);
3925  return true;
3926  }
3927  else // it's a stream
3928  {
3929  *loc = val_location(m_buf.str); // just return the front of the buffer
3930  }
3931  return true;
3932 }
3933 
3934 
3935 template<class EventHandler>
3937 {
3938  if(C4_UNLIKELY(val == nullptr))
3939  return {m_file, 0, 0, 0};
3940  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3941  // NOTE: if any of these checks fails, the parser needs to be
3942  // instantiated with locations enabled.
3943  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3944  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3945  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3946  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3947  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
3948  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3949  // NOTE: the pointer needs to belong to the buffer that was used to parse.
3950  csubstr src = m_buf;
3951  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
3952  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
3953  // ok. search the first stored newline after the given ptr
3954  using lineptr_type = size_t const* C4_RESTRICT;
3955  lineptr_type lineptr = nullptr;
3956  size_t offset = (size_t)(val - src.begin());
3957  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
3958  {
3959  // just do a linear search if the size is small.
3960  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3961  {
3962  if(*curr > offset)
3963  {
3964  lineptr = curr;
3965  break;
3966  }
3967  }
3968  }
3969  else
3970  {
3971  // do a bisection search if the size is not small.
3972  //
3973  // We could use std::lower_bound but this is simple enough and
3974  // spares the costly include of <algorithm>.
3975  size_t count = m_newline_offsets_size;
3976  size_t step;
3977  lineptr_type it;
3978  lineptr = m_newline_offsets;
3979  while(count)
3980  {
3981  step = count >> 1;
3982  it = lineptr + step;
3983  if(*it < offset)
3984  {
3985  lineptr = ++it;
3986  count -= step + 1;
3987  }
3988  else
3989  {
3990  count = step;
3991  }
3992  }
3993  }
3994  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
3995  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
3996  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
3997  Location loc;
3998  loc.name = m_file;
3999  loc.offset = offset;
4000  loc.line = (size_t)(lineptr - m_newline_offsets);
4001  if(lineptr > m_newline_offsets)
4002  loc.col = (offset - *(lineptr-1) - 1u);
4003  else
4004  loc.col = offset;
4005  return loc;
4006 }
4007 
4008 template<class EventHandler>
4010 {
4011  m_newline_offsets_buf = m_buf;
4012  size_t numnewlines = 1u + m_buf.count('\n');
4013  _resize_locations(numnewlines);
4014  m_newline_offsets_size = 0;
4015  for(size_t i = 0; i < m_buf.len; i++)
4016  if(m_buf[i] == '\n')
4017  m_newline_offsets[m_newline_offsets_size++] = i;
4018  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4019  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4020 }
4021 
4022 template<class EventHandler>
4023 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4024 {
4025  if(numnewlines > m_newline_offsets_capacity)
4026  {
4027  if(m_newline_offsets)
4028  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4029  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4030  m_newline_offsets_capacity = numnewlines;
4031  }
4032 }
4033 
4034 template<class EventHandler>
4035 bool ParseEngine<EventHandler>::_locations_dirty() const
4036 {
4037  return !m_newline_offsets_size;
4038 }
4039 
4040 
4041 //-----------------------------------------------------------------------------
4042 //-----------------------------------------------------------------------------
4043 //-----------------------------------------------------------------------------
4044 
4045 template<class EventHandler>
4046 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4047 {
4048  if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4049  {
4050  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4051  if(rem.str[0] == ' ' || rem.str[0] == '\t')
4052  {
4053  _c4dbgpf("starts with whitespace: '{}'", _c4prc(rem.str[0]));
4054  _skipchars(" \t");
4055  rem = m_evt_handler->m_curr->line_contents.rem;
4056  }
4057  // comments
4058  if(rem.begins_with('#'))
4059  {
4060  _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4061  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4062  }
4063  }
4064 }
4065 
4066 
4067 //-----------------------------------------------------------------------------
4068 
4069 
4070 template<class EventHandler>
4071 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4072 {
4073  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4074  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4075  _c4err("too many annotations");
4076  dst->annotations[dst->num_entries].str = str;
4077  dst->annotations[dst->num_entries].indentation = indentation;
4078  dst->annotations[dst->num_entries].line = line;
4079  ++dst->num_entries;
4080 }
4081 
4082 template<class EventHandler>
4083 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4084 {
4085  dst->num_entries = 0;
4086 }
4087 
4088 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4089 template<class EventHandler>
4090 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4091 {
4092  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4093  {
4094  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4095  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4096  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4097  size_t to_skip = m_evt_handler->m_curr->indref;
4098  if(m_pending_anchors.num_entries)
4099  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4100  if(m_pending_tags.num_entries)
4101  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4102  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4103  _maybe_skipchars_up_to(' ', to_skip);
4104  return true;
4105  }
4106  return false;
4107 }
4108 #endif
4109 
4110 template<class EventHandler>
4111 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4112 {
4113  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4114 }
4115 
4116 template<class EventHandler>
4117 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4118 {
4119  if(!tag.begins_with("!<"))
4120  {
4121  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4122  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4123  }
4124  else
4125  {
4126  if(C4_UNLIKELY(!tag.ends_with('>')))
4127  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos);
4128  }
4129 }
4130 
4131 template<class EventHandler>
4132 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4133 {
4134  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4135  if(m_pending_tags.num_entries)
4136  {
4137  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4138  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4139  {
4140  _check_tag(m_pending_tags.annotations[0].str);
4141  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4142  _clear_annotations(&m_pending_tags);
4143  }
4144  else
4145  {
4146  _c4err("too many tags");
4147  }
4148  }
4149  if(m_pending_anchors.num_entries)
4150  {
4151  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4152  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4153  {
4154  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4155  _clear_annotations(&m_pending_anchors);
4156  }
4157  else
4158  {
4159  _c4err("too many anchors");
4160  }
4161  }
4162 }
4163 
4164 template<class EventHandler>
4165 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4166 {
4167  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4168  if(m_pending_tags.num_entries)
4169  {
4170  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4171  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4172  {
4173  _check_tag(m_pending_tags.annotations[0].str);
4174  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4175  _clear_annotations(&m_pending_tags);
4176  }
4177  else
4178  {
4179  _c4err("too many tags");
4180  }
4181  }
4182  if(m_pending_anchors.num_entries)
4183  {
4184  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4185  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4186  {
4187  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4188  _clear_annotations(&m_pending_anchors);
4189  }
4190  else
4191  {
4192  _c4err("too many anchors");
4193  }
4194  }
4195 }
4196 
4197 template<class EventHandler>
4198 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4199 {
4200  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4201  if(m_pending_tags.num_entries == 2)
4202  {
4203  _c4dbgp("2 tags, setting entry 0");
4204  _check_tag(m_pending_tags.annotations[0].str);
4205  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4206  }
4207  else if(m_pending_tags.num_entries == 1)
4208  {
4209  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4210  if(m_pending_tags.annotations[0].line < current_line)
4211  {
4212  _c4dbgp("...tag is for the map. setting it.");
4213  _check_tag(m_pending_tags.annotations[0].str);
4214  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4215  _clear_annotations(&m_pending_tags);
4216  }
4217  }
4218  //
4219  if(m_pending_anchors.num_entries == 2)
4220  {
4221  _c4dbgp("2 anchors, setting entry 0");
4222  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4223  }
4224  else if(m_pending_anchors.num_entries == 1)
4225  {
4226  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4227  if(m_pending_anchors.annotations[0].line < current_line)
4228  {
4229  _c4dbgp("...anchor is for the map. setting it.");
4230  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4231  _clear_annotations(&m_pending_anchors);
4232  }
4233  }
4234 }
4235 
4236 template<class EventHandler>
4237 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4238 {
4239  _c4dbgp("annotations_before_start_mapblck_as_key");
4240  if(m_pending_tags.num_entries == 2)
4241  {
4242  _check_tag(m_pending_tags.annotations[0].str);
4243  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4244  }
4245  if(m_pending_anchors.num_entries == 2)
4246  {
4247  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4248  }
4249 }
4250 
4251 template<class EventHandler>
4252 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4253 {
4254  _c4dbgp("annotations_after_start_mapblck");
4255  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4256  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4257  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4258  {
4259  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4260  switch(m_pending_tags.num_entries)
4261  {
4262  case 1u:
4263  _check_tag(m_pending_tags.annotations[0].str);
4264  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4265  _clear_annotations(&m_pending_tags);
4266  break;
4267  case 2u:
4268  _check_tag(m_pending_tags.annotations[1].str);
4269  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4270  _clear_annotations(&m_pending_tags);
4271  break;
4272  }
4273  switch(m_pending_anchors.num_entries)
4274  {
4275  case 1u:
4276  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4277  _clear_annotations(&m_pending_anchors);
4278  break;
4279  case 2u:
4280  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4281  _clear_annotations(&m_pending_anchors);
4282  break;
4283  }
4284  }
4285  _set_indentation(key_indentation);
4286 }
4287 
4288 template<class EventHandler>
4289 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4290 {
4291  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4292  // select the left-most annotation on the max line
4293  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4294  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4295  {
4296  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4297  if(ann.line > curr->line)
4298  curr = &ann;
4299  else if(ann.indentation < curr->indentation)
4300  curr = &ann;
4301  }
4302  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4303  {
4304  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4305  if(ann.line > curr->line)
4306  curr = &ann;
4307  else if(ann.indentation < curr->indentation)
4308  curr = &ann;
4309  }
4310  return curr->line < val_line ? val_indentation : curr->indentation;
4311 }
4312 
4313 template<class EventHandler>
4314 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4315 {
4316  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4317  const size_t pos = rem.find('#');
4318  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4319  if(pos == npos) // no comments
4320  {
4321  m_evt_handler->add_directive(rem);
4322  _line_progressed(rem.len);
4323  }
4324  else
4325  {
4326  csubstr to_comment = rem.first(pos);
4327  csubstr trimmed = to_comment.trimr(" \t");
4328  m_evt_handler->add_directive(trimmed);
4329  _line_progressed(pos);
4330  _skip_comment();
4331  }
4332 }
4333 
4334 
4335 //-----------------------------------------------------------------------------
4336 
4337 template<class EventHandler>
4338 void ParseEngine<EventHandler>::_handle_seq_json()
4339 {
4340 seqjson_start:
4341  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4342 
4343  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4344  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4345  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4346  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4347  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4348 
4349  _handle_flow_skip_whitespace();
4350  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4351  if(!rem.len)
4352  goto seqjson_again;
4353 
4354  if(has_any(RVAL))
4355  {
4356  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4357  const char first = rem.str[0];
4358  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4359  switch(first)
4360  {
4361  case '"':
4362  {
4363  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4364  ScannedScalar sc = _scan_scalar_dquot();
4365  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4366  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4367  addrem_flags(RNXT, RVAL);
4368  break;
4369  }
4370  case '[':
4371  {
4372  _c4dbgp("seqjson[RVAL]: start child seqjson");
4373  addrem_flags(RNXT, RVAL);
4374  m_evt_handler->begin_seq_val_flow();
4375  addrem_flags(RVAL, RNXT);
4376  _line_progressed(1);
4377  break;
4378  }
4379  case '{':
4380  {
4381  _c4dbgp("seqjson[RVAL]: start child mapjson");
4382  addrem_flags(RNXT, RVAL);
4383  m_evt_handler->begin_map_val_flow();
4384  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4385  _line_progressed(1);
4386  goto seqjson_finish;
4387  }
4388  case ']': // this happens on a trailing comma like ", ]"
4389  {
4390  _c4dbgp("seqjson[RVAL]: end!");
4391  rem_flags(RSEQ);
4392  m_evt_handler->end_seq();
4393  _line_progressed(1);
4394  if(!has_all(RSEQ|FLOW))
4395  goto seqjson_finish;
4396  break;
4397  }
4398  default:
4399  {
4400  ScannedScalar sc;
4401  if(_scan_scalar_seq_json(&sc))
4402  {
4403  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4404  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4405  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4406  addrem_flags(RNXT, RVAL);
4407  }
4408  else
4409  {
4410  _c4err("parse error");
4411  }
4412  }
4413  }
4414  }
4415  else // RNXT
4416  {
4417  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4418  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4419  const char first = rem.str[0];
4420  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4421  switch(first)
4422  {
4423  case ',':
4424  {
4425  _c4dbgp("seqjson[RNXT]: expect next val");
4426  addrem_flags(RVAL, RNXT);
4427  m_evt_handler->add_sibling();
4428  _line_progressed(1);
4429  break;
4430  }
4431  case ']':
4432  {
4433  _c4dbgp("seqjson[RNXT]: end!");
4434  m_evt_handler->end_seq();
4435  _line_progressed(1);
4436  goto seqjson_finish;
4437  }
4438  default:
4439  _c4err("parse error");
4440  }
4441  }
4442 
4443  seqjson_again:
4444  _c4dbgt("seqjson: go again", 0);
4445  if(_finished_line())
4446  {
4447  if(C4_LIKELY(!_finished_file()))
4448  {
4449  _line_ended();
4450  _scan_line();
4451  _c4dbgnextline();
4452  }
4453  else
4454  {
4455  _c4err("missing terminating ]");
4456  }
4457  }
4458  goto seqjson_start;
4459 
4460  seqjson_finish:
4461  _c4dbgp("seqjson: finish");
4462 }
4463 
4464 
4465 //-----------------------------------------------------------------------------
4466 
4467 template<class EventHandler>
4468 void ParseEngine<EventHandler>::_handle_map_json()
4469 {
4470 mapjson_start:
4471  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4472 
4473  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4474  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4475  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4476  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4477  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4478 
4479  _handle_flow_skip_whitespace();
4480  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4481  if(!rem.len)
4482  goto mapjson_again;
4483 
4484  if(has_any(RKEY))
4485  {
4486  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4487  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4488  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4489  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4490  const char first = rem.str[0];
4491  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4492  switch(first)
4493  {
4494  case '"':
4495  {
4496  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4497  ScannedScalar sc = _scan_scalar_dquot();
4498  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4499  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4500  addrem_flags(RKCL, RKEY);
4501  break;
4502  }
4503  case '}': // this happens on a trailing comma like ", }"
4504  {
4505  _c4dbgp("mapjson[RKEY]: end!");
4506  m_evt_handler->end_map();
4507  _line_progressed(1);
4508  goto mapjson_finish;
4509  }
4510  default:
4511  _c4err("parse error");
4512  }
4513  }
4514  else if(has_any(RVAL))
4515  {
4516  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4517  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4518  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4519  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4520  const char first = rem.str[0];
4521  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4522  switch(first)
4523  {
4524  case '"':
4525  {
4526  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4527  ScannedScalar sc = _scan_scalar_dquot();
4528  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4529  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4530  addrem_flags(RNXT, RVAL);
4531  break;
4532  }
4533  case '[':
4534  {
4535  _c4dbgp("mapjson[RVAL]: start val seqjson");
4536  addrem_flags(RNXT, RVAL);
4537  m_evt_handler->begin_seq_val_flow();
4538  _set_indentation(m_evt_handler->m_parent->indref);
4539  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4540  _line_progressed(1);
4541  goto mapjson_finish;
4542  }
4543  case '{':
4544  {
4545  _c4dbgp("mapjson[RVAL]: start val mapjson");
4546  addrem_flags(RNXT, RVAL);
4547  m_evt_handler->begin_map_val_flow();
4548  _set_indentation(m_evt_handler->m_parent->indref);
4549  addrem_flags(RKEY, RNXT);
4550  _line_progressed(1);
4551  // keep going in this function
4552  break;
4553  }
4554  default:
4555  {
4556  ScannedScalar sc;
4557  if(_scan_scalar_map_json(&sc))
4558  {
4559  _c4dbgp("mapjson[RVAL]: plain scalar.");
4560  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4561  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4562  addrem_flags(RNXT, RVAL);
4563  }
4564  else
4565  {
4566  _c4err("parse error");
4567  }
4568  break;
4569  }
4570  }
4571  }
4572  else if(has_any(RKCL)) // read the key colon
4573  {
4574  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4575  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4576  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4577  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4578  const char first = rem.str[0];
4579  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4580  if(first == ':')
4581  {
4582  _c4dbgp("mapjson[RKCL]: found the colon");
4583  addrem_flags(RVAL, RKCL);
4584  _line_progressed(1);
4585  }
4586  else
4587  {
4588  _c4err("parse error");
4589  }
4590  }
4591  else if(has_any(RNXT))
4592  {
4593  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4594  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4595  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4596  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4597  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4598  if(rem.begins_with(','))
4599  {
4600  _c4dbgp("mapjson[RNXT]: expect next keyval");
4601  m_evt_handler->add_sibling();
4602  addrem_flags(RKEY, RNXT);
4603  _line_progressed(1);
4604  }
4605  else if(rem.begins_with('}'))
4606  {
4607  _c4dbgp("mapjson[RNXT]: end!");
4608  m_evt_handler->end_map();
4609  _line_progressed(1);
4610  goto mapjson_finish;
4611  }
4612  else
4613  {
4614  _c4err("parse error");
4615  }
4616  }
4617 
4618  mapjson_again:
4619  _c4dbgt("mapjson: go again", 0);
4620  if(_finished_line())
4621  {
4622  if(C4_LIKELY(!_finished_file()))
4623  {
4624  _line_ended();
4625  _scan_line();
4626  _c4dbgnextline();
4627  }
4628  else
4629  {
4630  _c4err("missing terminating }");
4631  }
4632  }
4633  goto mapjson_start;
4634 
4635  mapjson_finish:
4636  _c4dbgp("mapjson: finish");
4637 }
4638 
4639 
4640 //-----------------------------------------------------------------------------
4641 
4642 template<class EventHandler>
4643 void ParseEngine<EventHandler>::_handle_seq_imap()
4644 {
4645 seqimap_start:
4646  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4647 
4648  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4649  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4650  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4651  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4652  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4653 
4654  _handle_flow_skip_whitespace();
4655  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4656  if(!rem.len)
4657  goto seqimap_again;
4658 
4659  if(has_any(RVAL))
4660  {
4661  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4662  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4663  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4664  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4665  const char first = rem.str[0];
4666  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4667  ScannedScalar sc;
4668  if(first == '\'')
4669  {
4670  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4671  sc = _scan_scalar_squot();
4672  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4673  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4674  m_evt_handler->end_map();
4675  goto seqimap_finish;
4676  }
4677  else if(first == '"')
4678  {
4679  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4680  sc = _scan_scalar_dquot();
4681  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4682  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4683  m_evt_handler->end_map();
4684  goto seqimap_finish;
4685  }
4686  // block scalars (ie | and >) cannot appear in flow containers
4687  else if(_scan_scalar_plain_map_flow(&sc))
4688  {
4689  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4690  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4691  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4692  m_evt_handler->end_map();
4693  goto seqimap_finish;
4694  }
4695  else if(first == '[')
4696  {
4697  _c4dbgp("seqimap[RVAL]: start child seqflow");
4698  addrem_flags(RNXT, RVAL);
4699  m_evt_handler->begin_seq_val_flow();
4700  addrem_flags(RVAL, RNXT|RSEQIMAP);
4701  _set_indentation(m_evt_handler->m_parent->indref);
4702  _line_progressed(1);
4703  goto seqimap_finish;
4704  }
4705  else if(first == '{')
4706  {
4707  _c4dbgp("seqimap[RVAL]: start child mapflow");
4708  addrem_flags(RNXT, RVAL);
4709  m_evt_handler->begin_map_val_flow();
4710  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4711  _set_indentation(m_evt_handler->m_parent->indref);
4712  _line_progressed(1);
4713  goto seqimap_finish;
4714  }
4715  else if(first == ',' || first == ']')
4716  {
4717  _c4dbgp("seqimap[RVAL]: finish without val.");
4718  m_evt_handler->set_val_scalar_plain({});
4719  m_evt_handler->end_map();
4720  goto seqimap_finish;
4721  }
4722  else if(first == '&')
4723  {
4724  csubstr anchor = _scan_anchor();
4725  _c4dbgp("seqimap[RVAL]: anchor!");
4726  m_evt_handler->set_val_anchor(anchor);
4727  }
4728  else if(first == '*')
4729  {
4730  csubstr ref = _scan_ref_seq();
4731  _c4dbgp("seqimap[RVAL]: ref!");
4732  m_evt_handler->set_val_ref(ref);
4733  addrem_flags(RNXT, RVAL);
4734  }
4735  else
4736  {
4737  _c4err("parse error");
4738  }
4739  }
4740  else if(has_any(RNXT))
4741  {
4742  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4743  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4744  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4745  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4746  const char first = rem.str[0];
4747  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4748  if(first == ',' || first == ']')
4749  {
4750  // we may get here because a map or a seq started and we
4751  // return later
4752  _c4dbgp("seqimap: done");
4753  m_evt_handler->end_map();
4754  goto seqimap_finish;
4755  }
4756  else
4757  {
4758  _c4err("parse error");
4759  }
4760  }
4761  else if(has_any(QMRK))
4762  {
4763  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4764  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4765  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4766  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4767  const char first = rem.str[0];
4768  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4769  ScannedScalar sc;
4770  if(first == '\'')
4771  {
4772  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4773  sc = _scan_scalar_squot();
4774  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4775  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4776  addrem_flags(RKCL, QMRK);
4777  goto seqimap_again;
4778  }
4779  else if(first == '"')
4780  {
4781  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4782  sc = _scan_scalar_dquot();
4783  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4784  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4785  addrem_flags(RKCL, QMRK);
4786  goto seqimap_again;
4787  }
4788  // block scalars (ie | and >) cannot appear in flow containers
4789  else if(_scan_scalar_plain_map_flow(&sc))
4790  {
4791  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4792  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4793  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4794  addrem_flags(RKCL, QMRK);
4795  goto seqimap_again;
4796  }
4797  else if(first == '[')
4798  {
4799  _c4dbgp("seqimap[QMRK]: start child seqflow");
4800  addrem_flags(RKCL, QMRK);
4801  m_evt_handler->begin_seq_key_flow();
4802  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4803  _set_indentation(m_evt_handler->m_parent->indref);
4804  _line_progressed(1);
4805  goto seqimap_finish;
4806  }
4807  else if(first == '{')
4808  {
4809  _c4dbgp("seqimap[QMRK]: start child mapflow");
4810  addrem_flags(RKCL, QMRK);
4811  m_evt_handler->begin_map_key_flow();
4812  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4813  _set_indentation(m_evt_handler->m_parent->indref);
4814  _line_progressed(1);
4815  goto seqimap_finish;
4816  }
4817  else if(first == ',' || first == ']')
4818  {
4819  _c4dbgp("seqimap[QMRK]: finish without key.");
4820  m_evt_handler->set_key_scalar_plain({});
4821  m_evt_handler->set_val_scalar_plain({});
4822  m_evt_handler->end_map();
4823  goto seqimap_finish;
4824  }
4825  else if(first == '&')
4826  {
4827  csubstr anchor = _scan_anchor();
4828  _c4dbgp("seqimap[QMRK]: anchor!");
4829  m_evt_handler->set_key_anchor(anchor);
4830  }
4831  else if(first == '*')
4832  {
4833  csubstr ref = _scan_ref_seq();
4834  _c4dbgp("seqimap[QMRK]: ref!");
4835  m_evt_handler->set_key_ref(ref);
4836  addrem_flags(RKCL, QMRK);
4837  }
4838  else
4839  {
4840  _c4err("parse error");
4841  }
4842  }
4843  else if(has_any(RKCL))
4844  {
4845  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4846  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4847  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4848  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4849  const char first = rem.str[0];
4850  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4851  if(first == ':')
4852  {
4853  _c4dbgp("seqimap[RKCL]: found ':'");
4854  addrem_flags(RVAL, RKCL);
4855  _line_progressed(1);
4856  goto seqimap_again;
4857  }
4858  else if(first == ',' || first == ']')
4859  {
4860  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
4861  m_evt_handler->set_val_scalar_plain({});
4862  m_evt_handler->end_map();
4863  goto seqimap_finish;
4864  }
4865  else
4866  {
4867  _c4err("parse error");
4868  }
4869  }
4870 
4871  seqimap_again:
4872  _c4dbgt("seqimap: go again", 0);
4873  if(_finished_line())
4874  {
4875  if(C4_LIKELY(!_finished_file()))
4876  {
4877  _line_ended();
4878  _scan_line();
4879  _c4dbgnextline();
4880  }
4881  else
4882  {
4883  _c4err("parse error");
4884  }
4885  }
4886  goto seqimap_start;
4887 
4888  seqimap_finish:
4889  _c4dbgp("seqimap: finish");
4890 }
4891 
4892 
4893 //-----------------------------------------------------------------------------
4894 
4895 template<class EventHandler>
4896 void ParseEngine<EventHandler>::_handle_seq_flow()
4897 {
4898 seqflow_start:
4899  _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4900 
4901  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4902  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4903  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4904  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4905  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4906  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
4907 
4908  _handle_flow_skip_whitespace();
4909  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4910  if(!rem.len)
4911  goto seqflow_again;
4912 
4913  if(has_any(RVAL))
4914  {
4915  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4916  const char first = rem.str[0];
4917  ScannedScalar sc;
4918  if(first == '\'')
4919  {
4920  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
4921  sc = _scan_scalar_squot();
4922  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4923  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4924  addrem_flags(RNXT, RVAL);
4925  }
4926  else if(first == '"')
4927  {
4928  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
4929  sc = _scan_scalar_dquot();
4930  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4931  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4932  addrem_flags(RNXT, RVAL);
4933  }
4934  // block scalars (ie | and >) cannot appear in flow containers
4935  else if(_scan_scalar_plain_seq_flow(&sc))
4936  {
4937  _c4dbgp("seqflow[RVAL]: it's a scalar.");
4938  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4939  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4940  addrem_flags(RNXT, RVAL);
4941  }
4942  else if(first == '[')
4943  {
4944  _c4dbgp("seqflow[RVAL]: start child seqflow");
4945  addrem_flags(RNXT, RVAL);
4946  m_evt_handler->begin_seq_val_flow();
4947  _set_indentation(m_evt_handler->m_parent->indref);
4948  addrem_flags(RVAL, RNXT);
4949  _line_progressed(1);
4950  }
4951  else if(first == '{')
4952  {
4953  _c4dbgp("seqflow[RVAL]: start child mapflow");
4954  addrem_flags(RNXT, RVAL);
4955  m_evt_handler->begin_map_val_flow();
4956  _set_indentation(m_evt_handler->m_parent->indref);
4957  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4958  _line_progressed(1);
4959  goto seqflow_finish;
4960  }
4961  else if(first == ']') // this happens on a trailing comma like ", ]"
4962  {
4963  _c4dbgp("seqflow[RVAL]: end!");
4964  _line_progressed(1);
4965  m_evt_handler->end_seq();
4966  goto seqflow_finish;
4967  }
4968  else if(first == '*')
4969  {
4970  csubstr ref = _scan_ref_seq();
4971  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
4972  m_evt_handler->set_val_ref(ref);
4973  addrem_flags(RNXT, RVAL);
4974  }
4975  else if(first == '&')
4976  {
4977  csubstr anchor = _scan_anchor();
4978  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
4979  m_evt_handler->set_val_anchor(anchor);
4980  if(_maybe_scan_following_comma())
4981  {
4982  _c4dbgp("seqflow[RVAL]: empty scalar!");
4983  m_evt_handler->set_val_scalar_plain({});
4984  m_evt_handler->add_sibling();
4985  }
4986  }
4987  else if(first == '!')
4988  {
4989  csubstr tag = _scan_tag();
4990  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
4991  _check_tag(tag);
4992  m_evt_handler->set_val_tag(tag);
4993  if(_maybe_scan_following_comma())
4994  {
4995  _c4dbgp("seqflow[RVAL]: empty scalar!");
4996  m_evt_handler->set_val_scalar_plain({});
4997  m_evt_handler->add_sibling();
4998  }
4999  }
5000  else if(first == ':')
5001  {
5002  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5003  addrem_flags(RNXT, RVAL);
5004  m_evt_handler->begin_map_val_flow();
5005  _set_indentation(m_evt_handler->m_parent->indref);
5006  m_evt_handler->set_key_scalar_plain({});
5007  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5008  _line_progressed(1);
5009  goto seqflow_finish;
5010  }
5011  else if(first == '?')
5012  {
5013  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5014  addrem_flags(RNXT, RVAL);
5015  m_was_inside_qmrk = true;
5016  m_evt_handler->begin_map_val_flow();
5017  _set_indentation(m_evt_handler->m_parent->indref);
5018  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5019  _line_progressed(1);
5020  _maybe_skip_whitespace_tokens();
5021  goto seqflow_finish;
5022  }
5023  else
5024  {
5025  _c4err("parse error");
5026  }
5027  }
5028  else // RNXT
5029  {
5030  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5031  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5032  const char first = rem.str[0];
5033  if(first == ',')
5034  {
5035  _c4dbgp("seqflow[RNXT]: expect next val");
5036  addrem_flags(RVAL, RNXT);
5037  m_evt_handler->add_sibling();
5038  _line_progressed(1);
5039  }
5040  else if(first == ']')
5041  {
5042  _c4dbgp("seqflow[RNXT]: end!");
5043  m_evt_handler->end_seq();
5044  _line_progressed(1);
5045  goto seqflow_finish;
5046  }
5047  else if(first == ':')
5048  {
5049  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5050  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5051  _set_indentation(m_evt_handler->m_parent->indref);
5052  _line_progressed(1);
5053  addrem_flags(RSEQIMAP|RVAL, RNXT);
5054  goto seqflow_finish;
5055  }
5056  else
5057  {
5058  _c4err("parse error");
5059  }
5060  }
5061 
5062  seqflow_again:
5063  _c4dbgt("seqflow: go again", 0);
5064  if(_finished_line())
5065  {
5066  if(C4_LIKELY(!_finished_file()))
5067  {
5068  _line_ended();
5069  _scan_line();
5070  _c4dbgnextline();
5071  }
5072  else
5073  {
5074  _c4err("missing terminating ]");
5075  }
5076  }
5077  goto seqflow_start;
5078 
5079  seqflow_finish:
5080  _c4dbgp("seqflow: finish");
5081 }
5082 
5083 
5084 //-----------------------------------------------------------------------------
5085 
5086 template<class EventHandler>
5087 void ParseEngine<EventHandler>::_handle_map_flow()
5088 {
5089 mapflow_start:
5090  _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5091 
5092  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5093  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5094  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5095  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5096 
5097  _handle_flow_skip_whitespace();
5098  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5099  if(!rem.len)
5100  goto mapflow_again;
5101 
5102  if(has_any(RKEY))
5103  {
5104  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5105  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5106  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5107  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5108  const char first = rem.str[0];
5109  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5110  ScannedScalar sc;
5111  if(first == '\'')
5112  {
5113  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5114  sc = _scan_scalar_squot();
5115  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5116  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5117  addrem_flags(RKCL, RKEY|QMRK);
5118  }
5119  else if(first == '"')
5120  {
5121  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5122  sc = _scan_scalar_dquot();
5123  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5124  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5125  addrem_flags(RKCL, RKEY|QMRK);
5126  }
5127  // block scalars (ie | and >) cannot appear in flow containers
5128  else if(_scan_scalar_plain_map_flow(&sc))
5129  {
5130  _c4dbgp("mapflow[RKEY]: plain scalar");
5131  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5132  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5133  addrem_flags(RKCL, RKEY|QMRK);
5134  }
5135  else if(first == '?')
5136  {
5137  _c4dbgp("mapflow[RKEY]: explicit key");
5138  _line_progressed(1);
5139  addrem_flags(QMRK, RKEY);
5140  _maybe_skip_whitespace_tokens();
5141  }
5142  else if(first == ':')
5143  {
5144  _c4dbgp("mapflow[RKEY]: setting empty key");
5145  m_evt_handler->set_key_scalar_plain({});
5146  addrem_flags(RVAL, RKEY|QMRK);
5147  _line_progressed(1);
5148  _maybe_skip_whitespace_tokens();
5149  }
5150  else if(first == '}') // this happens on a trailing comma like ", }"
5151  {
5152  _c4dbgp("mapflow[RKEY]: end!");
5153  m_evt_handler->end_map();
5154  _line_progressed(1);
5155  goto mapflow_finish;
5156  }
5157  else if(first == '&')
5158  {
5159  csubstr anchor = _scan_anchor();
5160  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5161  m_evt_handler->set_key_anchor(anchor);
5162  }
5163  else if(first == '*')
5164  {
5165  csubstr ref = _scan_ref_map();
5166  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5167  m_evt_handler->set_key_ref(ref);
5168  addrem_flags(RKCL, RKEY);
5169  }
5170  else if(first == '[')
5171  {
5172  // RYML's tree cannot store container keys, but that's
5173  // handled inside the tree sink. Other sink types may be
5174  // able to handle it.
5175  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5176  addrem_flags(RKCL, RKEY);
5177  m_evt_handler->begin_seq_key_flow();
5178  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5179  _set_indentation(m_evt_handler->m_parent->indref);
5180  _line_progressed(1);
5181  goto mapflow_finish;
5182  }
5183  else if(first == '{')
5184  {
5185  // RYML's tree cannot store container keys, but that's
5186  // handled inside the tree sink. Other sink types may be
5187  // able to handle it.
5188  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5189  addrem_flags(RKCL, RKEY);
5190  m_evt_handler->begin_map_key_flow();
5191  addrem_flags(RKEY, RVAL|RKCL);
5192  _set_indentation(m_evt_handler->m_parent->indref);
5193  _line_progressed(1);
5194  // keep going in this function
5195  }
5196  else if(first == '!')
5197  {
5198  csubstr tag = _scan_tag();
5199  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5200  _check_tag(tag);
5201  m_evt_handler->set_key_tag(tag);
5202  }
5203  else
5204  {
5205  _c4err("parse error");
5206  }
5207  }
5208  else if(has_any(RKCL)) // read the key colon
5209  {
5210  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5211  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5212  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5213  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5214  const char first = rem.str[0];
5215  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5216  if(first == ':')
5217  {
5218  _c4dbgp("mapflow[RKCL]: found the colon");
5219  addrem_flags(RVAL, RKCL);
5220  _line_progressed(1);
5221  }
5222  else if(first == '}')
5223  {
5224  _c4dbgp("mapflow[RKCL]: end with missing val!");
5225  addrem_flags(RVAL, RKCL);
5226  m_evt_handler->set_val_scalar_plain({});
5227  m_evt_handler->end_map();
5228  _line_progressed(1);
5229  goto mapflow_finish;
5230  }
5231  else if(first == ',')
5232  {
5233  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5234  m_evt_handler->set_val_scalar_plain({});
5235  m_evt_handler->add_sibling();
5236  addrem_flags(RKEY, RKCL);
5237  _line_progressed(1);
5238  }
5239  else
5240  {
5241  _c4err("parse error");
5242  }
5243  }
5244  else if(has_any(RVAL))
5245  {
5246  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5247  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5248  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5249  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5250  const char first = rem.str[0];
5251  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5252  ScannedScalar sc;
5253  if(first == '\'')
5254  {
5255  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5256  sc = _scan_scalar_squot();
5257  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5258  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5259  addrem_flags(RNXT, RVAL);
5260  }
5261  else if(first == '"')
5262  {
5263  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5264  sc = _scan_scalar_dquot();
5265  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5266  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5267  addrem_flags(RNXT, RVAL);
5268  }
5269  // block scalars (ie | and >) cannot appear in flow containers
5270  else if(_scan_scalar_plain_map_flow(&sc))
5271  {
5272  _c4dbgp("mapflow[RVAL]: plain scalar.");
5273  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5274  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5275  addrem_flags(RNXT, RVAL);
5276  }
5277  else if(first == '[')
5278  {
5279  _c4dbgp("mapflow[RVAL]: start val seqflow");
5280  addrem_flags(RNXT, RVAL);
5281  m_evt_handler->begin_seq_val_flow();
5282  _set_indentation(m_evt_handler->m_parent->indref);
5283  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5284  _line_progressed(1);
5285  goto mapflow_finish;
5286  }
5287  else if(first == '{')
5288  {
5289  _c4dbgp("mapflow[RVAL]: start val mapflow");
5290  addrem_flags(RNXT, RVAL);
5291  m_evt_handler->begin_map_val_flow();
5292  _set_indentation(m_evt_handler->m_parent->indref);
5293  addrem_flags(RKEY, RNXT);
5294  _line_progressed(1);
5295  // keep going in this function
5296  }
5297  else if(first == '}')
5298  {
5299  _c4dbgp("mapflow[RVAL]: end!");
5300  m_evt_handler->set_val_scalar_plain({});
5301  m_evt_handler->end_map();
5302  _line_progressed(1);
5303  goto mapflow_finish;
5304  }
5305  else if(first == '*')
5306  {
5307  csubstr ref = _scan_ref_map();
5308  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5309  m_evt_handler->set_val_ref(ref);
5310  addrem_flags(RNXT, RVAL);
5311  }
5312  else if(first == '&')
5313  {
5314  csubstr anchor = _scan_anchor();
5315  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5316  m_evt_handler->set_val_anchor(anchor);
5317  }
5318  else if(first == '!')
5319  {
5320  csubstr tag = _scan_tag();
5321  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5322  _check_tag(tag);
5323  m_evt_handler->set_val_tag(tag);
5324  }
5325  else
5326  {
5327  _c4err("parse error");
5328  }
5329  }
5330  else if(has_any(RNXT))
5331  {
5332  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5333  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5334  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5335  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5336  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5337  if(rem.begins_with(','))
5338  {
5339  _c4dbgp("mapflow[RNXT]: expect next keyval");
5340  m_evt_handler->add_sibling();
5341  addrem_flags(RKEY, RNXT);
5342  _line_progressed(1);
5343  }
5344  else if(rem.begins_with('}'))
5345  {
5346  _c4dbgp("mapflow[RNXT]: end!");
5347  m_evt_handler->end_map();
5348  _line_progressed(1);
5349  goto mapflow_finish;
5350  }
5351  else
5352  {
5353  _c4err("parse error");
5354  }
5355  }
5356  else if(has_any(QMRK))
5357  {
5358  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5359  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5360  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5361  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5362  const char first = rem.str[0];
5363  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5364  ScannedScalar sc;
5365  if(first == '\'')
5366  {
5367  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5368  sc = _scan_scalar_squot();
5369  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5370  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5371  addrem_flags(RKCL, QMRK);
5372  }
5373  else if(first == '"')
5374  {
5375  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5376  sc = _scan_scalar_dquot();
5377  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5378  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5379  addrem_flags(RKCL, QMRK);
5380  }
5381  // block scalars (ie | and >) cannot appear in flow containers
5382  else if(_scan_scalar_plain_map_flow(&sc))
5383  {
5384  _c4dbgp("mapflow[QMRK]: plain scalar");
5385  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5386  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5387  addrem_flags(RKCL, QMRK);
5388  }
5389  else if(first == ':')
5390  {
5391  _c4dbgp("mapflow[QMRK]: setting empty key");
5392  m_evt_handler->set_key_scalar_plain({});
5393  addrem_flags(RVAL, QMRK);
5394  _line_progressed(1);
5395  _maybe_skip_whitespace_tokens();
5396  }
5397  else if(first == '}') // this happens on a trailing comma like ", }"
5398  {
5399  _c4dbgp("mapflow[QMRK]: end!");
5400  m_evt_handler->set_key_scalar_plain({});
5401  m_evt_handler->set_val_scalar_plain({});
5402  m_evt_handler->end_map();
5403  _line_progressed(1);
5404  goto mapflow_finish;
5405  }
5406  else if(first == '&')
5407  {
5408  csubstr anchor = _scan_anchor();
5409  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5410  m_evt_handler->set_key_anchor(anchor);
5411  }
5412  else if(first == '*')
5413  {
5414  csubstr ref = _scan_ref_map();
5415  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5416  m_evt_handler->set_key_ref(ref);
5417  addrem_flags(RKCL, QMRK);
5418  }
5419  else if(first == '[')
5420  {
5421  // RYML's tree cannot store container keys, but that's
5422  // handled inside the tree sink. Other sink types may be
5423  // able to handle it.
5424  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5425  addrem_flags(RKCL, QMRK);
5426  m_evt_handler->begin_seq_key_flow();
5427  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5428  _set_indentation(m_evt_handler->m_parent->indref);
5429  _line_progressed(1);
5430  goto mapflow_finish;
5431  }
5432  else if(first == '{')
5433  {
5434  // RYML's tree cannot store container keys, but that's
5435  // handled inside the tree sink. Other sink types may be
5436  // able to handle it.
5437  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5438  addrem_flags(RKCL, QMRK);
5439  m_evt_handler->begin_map_key_flow();
5440  _set_indentation(m_evt_handler->m_parent->indref);
5441  addrem_flags(RKEY, RKCL);
5442  _line_progressed(1);
5443  // keep going in this function
5444  }
5445  else if(first == '!')
5446  {
5447  csubstr tag = _scan_tag();
5448  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5449  _check_tag(tag);
5450  m_evt_handler->set_key_tag(tag);
5451  }
5452  else
5453  {
5454  _c4err("parse error");
5455  }
5456  }
5457 
5458  mapflow_again:
5459  _c4dbgt("mapflow: go again", 0);
5460  if(_finished_line())
5461  {
5462  if(C4_LIKELY(!_finished_file()))
5463  {
5464  _line_ended();
5465  _scan_line();
5466  _c4dbgnextline();
5467  }
5468  else
5469  {
5470  _c4err("missing terminating }");
5471  }
5472  }
5473  goto mapflow_start;
5474 
5475  mapflow_finish:
5476  _c4dbgp("mapflow: finish");
5477 }
5478 
5479 
5480 //-----------------------------------------------------------------------------
5481 
5482 template<class EventHandler>
5483 void ParseEngine<EventHandler>::_handle_seq_block()
5484 {
5485 seqblck_start:
5486  _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5487 
5488  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5489  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5490  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5491  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5492 
5493  _maybe_skip_comment();
5494  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5495  if(!rem.len)
5496  goto seqblck_again;
5497 
5498  if(has_any(RVAL))
5499  {
5500  _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5501  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5502  if(m_evt_handler->m_curr->at_line_beginning())
5503  {
5504  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5505  if(m_evt_handler->m_curr->indentation_ge())
5506  {
5507  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5508  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5509  rem = m_evt_handler->m_curr->line_contents.rem;
5510  if(!rem.len)
5511  goto seqblck_again;
5512  }
5513  else if(m_evt_handler->m_curr->indentation_lt())
5514  {
5515  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5516  _handle_indentation_pop_from_block_seq();
5517  goto seqblck_finish;
5518  }
5519  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5520  {
5521  _c4dbgp("seqblck[RVAL]: empty line!");
5522  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5523  goto seqblck_again;
5524  }
5525  }
5526  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5527  else
5528  {
5529  // accomodate annotation on the previous line. eg:
5530  // - &elm
5531  // foo # <-- on this line
5532  // - &elm
5533  // &foo foo: bar # <-- on this line
5534  if(rem.str[0] == ' ')
5535  {
5536  if(_handle_indentation_from_annotations())
5537  {
5538  _c4dbgp("seqblck[RVAL]: annotations!");
5539  rem = m_evt_handler->m_curr->line_contents.rem;
5540  if(!rem.len)
5541  goto seqblck_again;
5542  }
5543  }
5544  }
5545  #endif
5546  _RYML_CB_ASSERT(callbacks(), rem.len);
5547  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5548  const char first = rem.str[0];
5549  const size_t startline = m_evt_handler->m_curr->pos.line;
5550  // warning: the gcc optimizer on x86 builds is brittle with
5551  // this function:
5552  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5553  ScannedScalar sc;
5554  if(first == '\'')
5555  {
5556  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5557  sc = _scan_scalar_squot();
5558  if(!_maybe_scan_following_colon())
5559  {
5560  _c4dbgp("seqblck[RVAL]: set as val");
5561  _handle_annotations_before_blck_val_scalar();
5562  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5563  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5564  addrem_flags(RNXT, RVAL);
5565  }
5566  else
5567  {
5568  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5569  addrem_flags(RNXT, RVAL);
5570  _handle_annotations_before_start_mapblck(startline);
5571  m_evt_handler->begin_map_val_block();
5572  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5573  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5574  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5575  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5576  _maybe_skip_whitespace_tokens();
5577  goto seqblck_finish;
5578  }
5579  }
5580  else if(first == '"')
5581  {
5582  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5583  sc = _scan_scalar_dquot();
5584  if(!_maybe_scan_following_colon())
5585  {
5586  _c4dbgp("seqblck[RVAL]: set as val");
5587  _handle_annotations_before_blck_val_scalar();
5588  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5589  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5590  addrem_flags(RNXT, RVAL);
5591  }
5592  else
5593  {
5594  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5595  addrem_flags(RNXT, RVAL);
5596  _handle_annotations_before_start_mapblck(startline);
5597  m_evt_handler->begin_map_val_block();
5598  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5599  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5600  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5601  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5602  _maybe_skip_whitespace_tokens();
5603  goto seqblck_finish;
5604  }
5605  }
5606  // block scalars can only appear as keys when in QMRK scope
5607  // (ie, after ? tokens), so no need to scan following colon in
5608  // here.
5609  else if(first == '|')
5610  {
5611  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5612  ScannedBlock sb;
5613  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5614  _handle_annotations_before_blck_val_scalar();
5615  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5616  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5617  addrem_flags(RNXT, RVAL);
5618  }
5619  else if(first == '>')
5620  {
5621  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5622  ScannedBlock sb;
5623  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5624  _handle_annotations_before_blck_val_scalar();
5625  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5626  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5627  addrem_flags(RNXT, RVAL);
5628  }
5629  else if(_scan_scalar_plain_seq_blck(&sc))
5630  {
5631  _c4dbgp("seqblck[RVAL]: plain scalar.");
5632  if(!_maybe_scan_following_colon())
5633  {
5634  _c4dbgp("seqblck[RVAL]: set as val");
5635  _handle_annotations_before_blck_val_scalar();
5636  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
5637  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5638  addrem_flags(RNXT, RVAL);
5639  }
5640  else
5641  {
5642  if(startindent > m_evt_handler->m_curr->indref)
5643  {
5644  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5645  addrem_flags(RNXT, RVAL);
5646  _handle_annotations_before_start_mapblck(startline);
5647  m_evt_handler->begin_map_val_block();
5648  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5649  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5650  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5651  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5652  _maybe_skip_whitespace_tokens();
5653  goto seqblck_finish;
5654  }
5655  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
5656  {
5657  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5658  m_evt_handler->set_val_scalar_plain({});
5659  m_evt_handler->end_seq();
5660  m_evt_handler->add_sibling();
5661  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
5662  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5663  addrem_flags(RVAL, RNXT|RKEY);
5664  _maybe_skip_whitespace_tokens();
5665  goto seqblck_finish;
5666  }
5667  else
5668  {
5669  _c4err("parse error");
5670  }
5671  }
5672  }
5673  else if(first == '[')
5674  {
5675  _c4dbgp("seqblck[RVAL]: start child seqflow");
5676  addrem_flags(RNXT, RVAL);
5677  m_evt_handler->begin_seq_val_flow();
5678  addrem_flags(FLOW|RVAL, BLCK|RNXT);
5679  _line_progressed(1);
5680  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5681  goto seqblck_finish;
5682  }
5683  else if(first == '{')
5684  {
5685  _c4dbgp("seqblck[RVAL]: start child mapflow");
5686  addrem_flags(RNXT, RVAL);
5687  _handle_annotations_before_blck_val_scalar();
5688  m_evt_handler->begin_map_val_flow();
5689  addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
5690  _line_progressed(1);
5691  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5692  goto seqblck_finish;
5693  }
5694  else if(first == '-')
5695  {
5696  if(startindent == m_evt_handler->m_curr->indref)
5697  {
5698  _c4dbgp("seqblck[RVAL]: prev val was empty");
5699  _handle_annotations_before_blck_val_scalar();
5700  m_evt_handler->set_val_scalar_plain({});
5701  // keep in RVAL, but for the next sibling
5702  m_evt_handler->add_sibling();
5703  }
5704  else
5705  {
5706  _c4dbgp("seqblck[RVAL]: start child seqblck");
5707  _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5708  addrem_flags(RNXT, RVAL);
5709  _handle_annotations_before_blck_val_scalar();
5710  m_evt_handler->begin_seq_val_block();
5711  addrem_flags(RVAL, RNXT);
5712  _save_indentation();
5713  // keep going on inside this function
5714  }
5715  _line_progressed(1);
5716  _maybe_skip_whitespace_tokens();
5717  }
5718  else if(first == ':')
5719  {
5720  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5721  addrem_flags(RNXT, RVAL);
5722  _handle_annotations_before_start_mapblck(startline);
5723  m_evt_handler->begin_map_val_block();
5724  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5725  m_evt_handler->set_key_scalar_plain({});
5726  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5727  _line_progressed(1);
5728  _maybe_skip_whitespace_tokens();
5729  goto seqblck_finish;
5730  }
5731  else if(first == '&')
5732  {
5733  const csubstr anchor = _scan_anchor();
5734  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5735  // we need to buffer the anchors, as there may be two
5736  // consecutive anchors in here
5737  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5738  }
5739  else if(first == '*')
5740  {
5741  csubstr ref = _scan_ref_seq();
5742  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5743  if(!_maybe_scan_following_colon())
5744  {
5745  _c4dbgp("seqblck[RVAL]: set ref as val!");
5746  _handle_annotations_before_blck_val_scalar();
5747  m_evt_handler->set_val_ref(ref);
5748  addrem_flags(RNXT, RVAL);
5749  }
5750  else
5751  {
5752  _c4dbgp("seqblck[RVAL]: ref is key of map");
5753  addrem_flags(RNXT, RVAL);
5754  _handle_annotations_before_start_mapblck(startline);
5755  m_evt_handler->begin_map_val_block();
5756  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5757  m_evt_handler->set_key_ref(ref);
5758  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5759  _set_indentation(startindent);
5760  _maybe_skip_whitespace_tokens();
5761  goto seqblck_finish;
5762  }
5763  }
5764  else if(first == '!')
5765  {
5766  csubstr tag = _scan_tag();
5767  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5768  // we need to buffer the tags, as there may be two
5769  // consecutive tags in here
5770  _add_annotation(&m_pending_tags, tag, startindent, startline);
5771  }
5772  else if(first == '?')
5773  {
5774  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5775  addrem_flags(RNXT, RVAL);
5776  m_was_inside_qmrk = true;
5777  m_evt_handler->begin_map_val_block();
5778  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5779  _save_indentation();
5780  _line_progressed(1);
5781  _maybe_skip_whitespace_tokens();
5782  goto seqblck_finish;
5783  }
5784  else
5785  {
5786  _c4err("parse error");
5787  }
5788  }
5789  else // RNXT
5790  {
5791  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5792  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5793  //
5794  // handle indentation
5795  //
5796  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5797  if(C4_UNLIKELY(!_at_line_begin()))
5798  _c4err("parse error");
5799  if(m_evt_handler->m_curr->indentation_ge())
5800  {
5801  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5802  _line_progressed(m_evt_handler->m_curr->indref);
5803  _maybe_skip_whitespace_tokens();
5804  rem = m_evt_handler->m_curr->line_contents.rem;
5805  if(!rem.len)
5806  goto seqblck_again;
5807  }
5808  else if(m_evt_handler->m_curr->indentation_lt())
5809  {
5810  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5811  _handle_indentation_pop_from_block_seq();
5812  if(has_all(RSEQ|BLCK))
5813  {
5814  _c4dbgp("seqblck[RNXT]: still seqblck!");
5815  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5816  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5817  rem = m_evt_handler->m_curr->line_contents.rem;
5818  if(!rem.len)
5819  goto seqblck_again;
5820  }
5821  else
5822  {
5823  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5824  goto seqblck_finish;
5825  }
5826  }
5827  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
5828  {
5829  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5830  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5831  rem = m_evt_handler->m_curr->line_contents.rem;
5832  if(!rem.len)
5833  goto seqblck_again;
5834  }
5835  //
5836  // now handle the tokens
5837  //
5838  const char first = rem.str[0];
5839  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5840  if(first == '-')
5841  {
5842  if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5843  {
5844  _c4dbgp("seqblck[RNXT]: expect next val");
5845  addrem_flags(RVAL, RNXT);
5846  m_evt_handler->add_sibling();
5847  _line_progressed(1);
5848  _maybe_skip_whitespace_tokens();
5849  }
5850  else
5851  {
5852  _c4dbgp("seqblck[RNXT]: start doc");
5853  _start_doc_suddenly();
5854  _line_progressed(3);
5855  _maybe_skip_whitespace_tokens();
5856  goto seqblck_finish;
5857  }
5858  }
5859  else if(first == ':')
5860  {
5861  // This happens for example in `- [a: b]: c` (after
5862  // terminating the seq, ie, after `]`). All other cases
5863  // (ie colon after scalars) are caught elsewhere (ie, in
5864  // RVAL state).
5865  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
5866  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
5867  {
5868  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
5869  m_evt_handler->end_seq();
5870  goto seqblck_finish;
5871  }
5872  else
5873  {
5874  _c4err("parse error");
5875  }
5876  }
5877  else if(first == '.')
5878  {
5879  _c4dbgp("seqblck[RNXT]: maybe doc?");
5880  csubstr rs = rem.sub(1);
5881  if(rs == ".." || rs.begins_with(".. "))
5882  {
5883  _c4dbgp("seqblck[RNXT]: end+start doc");
5884  _end_doc_suddenly();
5885  _line_progressed(3);
5886  _maybe_skip_whitespace_tokens();
5887  goto seqblck_finish;
5888  }
5889  else
5890  {
5891  _c4err("parse error");
5892  }
5893  }
5894  else
5895  {
5896  // may be an indentless sequence nested in a map...
5897  //if(m_evt_handler->m_stack.size() >= 2)
5898  #ifdef RYML_DBG
5899  char flagbuf_[128];
5900  for(auto const& s : m_evt_handler->m_stack)
5901  {
5902  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
5903  }
5904  #endif
5905  if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
5906  {
5907  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
5908  _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
5909  _handle_indentation_pop(m_evt_handler->m_parent);
5910  _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
5911  m_evt_handler->add_sibling();
5912  addrem_flags(RKEY, RNXT);
5913  goto seqblck_finish;
5914  }
5915  else //if(first != '*')
5916  {
5917  _c4err("parse error");
5918  }
5919  }
5920  }
5921 
5922  seqblck_again:
5923  _c4dbgt("seqblck: go again", 0);
5924  if(_finished_line())
5925  {
5926  _line_ended();
5927  _scan_line();
5928  if(_finished_file())
5929  {
5930  _c4dbgp("seqblck: finish!");
5931  _end_seq_blck();
5932  goto seqblck_finish;
5933  }
5934  _c4dbgnextline();
5935  }
5936  goto seqblck_start;
5937 
5938  seqblck_finish:
5939  _c4dbgp("seqblck: finish");
5940 }
5941 
5942 
5943 //-----------------------------------------------------------------------------
5944 
5945 template<class EventHandler>
5946 void ParseEngine<EventHandler>::_handle_map_block()
5947 {
5948 mapblck_start:
5949  _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5950 
5951  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
5952  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5953  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5954  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5955  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5956 
5957  _maybe_skip_comment();
5958  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5959  if(!rem.len)
5960  goto mapblck_again;
5961 
5962  if(has_any(RKEY))
5963  {
5964  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5965  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5966  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5967  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5968  //
5969  // handle indentation
5970  //
5971  if(m_evt_handler->m_curr->at_line_beginning())
5972  {
5973  if(m_evt_handler->m_curr->indentation_eq())
5974  {
5975  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
5976  _line_progressed(m_evt_handler->m_curr->indref);
5977  rem = m_evt_handler->m_curr->line_contents.rem;
5978  if(!rem.len)
5979  goto mapblck_again;
5980  }
5981  else if(m_evt_handler->m_curr->indentation_lt())
5982  {
5983  _c4dbgp("mapblck[RKEY]: smaller indentation!");
5984  _handle_indentation_pop_from_block_map();
5985  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5986  if(has_all(RMAP|BLCK))
5987  {
5988  _c4dbgp("mapblck[RKEY]: still mapblck!");
5989  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
5990  rem = m_evt_handler->m_curr->line_contents.rem;
5991  if(!rem.len)
5992  goto mapblck_again;
5993  }
5994  else
5995  {
5996  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
5997  goto mapblck_finish;
5998  }
5999  }
6000  else
6001  {
6002  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6003  _c4err("invalid indentation");
6004  }
6005  }
6006  //
6007  // now handle the tokens
6008  //
6009  const char first = rem.str[0];
6010  const size_t startline = m_evt_handler->m_curr->pos.line;
6011  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6012  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6013  ScannedScalar sc;
6014  if(first == '\'')
6015  {
6016  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6017  sc = _scan_scalar_squot();
6018  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6019  _handle_annotations_before_blck_key_scalar();
6020  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6021  addrem_flags(RVAL, RKEY);
6022  if(!_maybe_scan_following_colon())
6023  _c4err("could not find ':' colon after key");
6024  _maybe_skip_whitespace_tokens();
6025  }
6026  else if(first == '"')
6027  {
6028  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6029  sc = _scan_scalar_dquot();
6030  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6031  _handle_annotations_before_blck_key_scalar();
6032  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6033  addrem_flags(RVAL, RKEY);
6034  if(!_maybe_scan_following_colon())
6035  _c4err("could not find ':' colon after key");
6036  _maybe_skip_whitespace_tokens();
6037  }
6038  // block scalars (| and >) can not be used as keys unless they
6039  // appear in an explicit QMRK scope (ie, after the ? token),
6040  else if(C4_UNLIKELY(first == '|'))
6041  {
6042  _c4err("block literal keys must be enclosed in '?'");
6043  }
6044  else if(C4_UNLIKELY(first == '>'))
6045  {
6046  _c4err("block literal keys must be enclosed in '?'");
6047  }
6048  else if(_scan_scalar_plain_map_blck(&sc))
6049  {
6050  _c4dbgp("mapblck[RKEY]: plain scalar");
6051  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6052  _handle_annotations_before_blck_key_scalar();
6053  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6054  addrem_flags(RVAL, RKEY);
6055  if(!_maybe_scan_following_colon())
6056  _c4err("could not find ':' colon after key");
6057  _maybe_skip_whitespace_tokens();
6058  }
6059  else if(first == '?')
6060  {
6061  _c4dbgp("mapblck[RKEY]: key token!");
6062  addrem_flags(QMRK, RKEY);
6063  _line_progressed(1);
6064  _maybe_skip_whitespace_tokens();
6065  m_was_inside_qmrk = true;
6066  goto mapblck_again;
6067  }
6068  else if(first == ':')
6069  {
6070  _c4dbgp("mapblck[RKEY]: setting empty key");
6071  _handle_annotations_before_blck_key_scalar();
6072  m_evt_handler->set_key_scalar_plain({});
6073  addrem_flags(RVAL, RKEY);
6074  _line_progressed(1);
6075  _maybe_skip_whitespace_tokens();
6076  }
6077  else if(first == '*')
6078  {
6079  csubstr ref = _scan_ref_map();
6080  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6081  _handle_annotations_before_blck_key_scalar();
6082  m_evt_handler->set_key_ref(ref);
6083  addrem_flags(RVAL, RKEY);
6084  if(!_maybe_scan_following_colon())
6085  _c4err("could not find ':' colon after key");
6086  _maybe_skip_whitespace_tokens();
6087  }
6088  else if(first == '&')
6089  {
6090  csubstr anchor = _scan_anchor();
6091  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6092  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6093  }
6094  else if(first == '!')
6095  {
6096  csubstr tag = _scan_tag();
6097  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6098  _add_annotation(&m_pending_tags, tag, startindent, startline);
6099  }
6100  else if(first == '[')
6101  {
6102  // RYML's tree cannot store container keys, but that's
6103  // handled inside the tree handler. Other handlers may be
6104  // able to handle it.
6105  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6106  addrem_flags(RKCL, RKEY);
6107  _handle_annotations_before_blck_key_scalar();
6108  m_evt_handler->begin_seq_key_flow();
6109  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
6110  _line_progressed(1);
6111  _set_indentation(startindent);
6112  goto mapblck_finish;
6113  }
6114  else if(first == '{')
6115  {
6116  // RYML's tree cannot store container keys, but that's
6117  // handled inside the tree handler. Other handlers may be
6118  // able to handle it.
6119  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6120  addrem_flags(RKCL, RKEY);
6121  _handle_annotations_before_blck_key_scalar();
6122  m_evt_handler->begin_map_key_flow();
6123  addrem_flags(FLOW|RKEY, BLCK|RKCL);
6124  _line_progressed(1);
6125  _set_indentation(startindent);
6126  goto mapblck_finish;
6127  }
6128  else if(first == '-')
6129  {
6130  _c4dbgp("mapblck[RKEY]: maybe doc?");
6131  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6132  {
6133  _c4dbgp("mapblck[RKEY]: end+start doc");
6134  _start_doc_suddenly();
6135  _line_progressed(3);
6136  _maybe_skip_whitespace_tokens();
6137  goto mapblck_finish;
6138  }
6139  else
6140  {
6141  _c4err("parse error");
6142  }
6143  }
6144  else if(first == '.')
6145  {
6146  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6147  if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6148  {
6149  _c4dbgp("mapblck[RKEY]: end doc");
6150  _end_doc_suddenly();
6151  _line_progressed(3);
6152  _maybe_skip_whitespace_tokens();
6153  goto mapblck_finish;
6154  }
6155  else
6156  {
6157  _c4err("parse error");
6158  }
6159  }
6161  else if(first == '\t')
6162  {
6163  _c4dbgp("mapblck[RKEY]: skip tabs");
6164  _maybe_skipchars('\t');
6165  })
6166  else
6167  {
6168  _c4err("parse error");
6169  }
6170  }
6171  else if(has_any(RKCL)) // read the key colon
6172  {
6173  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6174  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6175  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6176  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6177  //
6178  // handle indentation
6179  //
6180  if(m_evt_handler->m_curr->at_line_beginning())
6181  {
6182  if(m_evt_handler->m_curr->indentation_eq())
6183  {
6184  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6185  _line_progressed(m_evt_handler->m_curr->indref);
6186  rem = m_evt_handler->m_curr->line_contents.rem;
6187  if(!rem.len)
6188  goto mapblck_again;
6189  }
6190  else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6191  {
6192  _c4err("invalid indentation");
6193  }
6194  }
6195  const char first = rem.str[0];
6196  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6197  if(first == ':')
6198  {
6199  _c4dbgp("mapblck[RKCL]: found the colon");
6200  addrem_flags(RVAL, RKCL);
6201  _line_progressed(1);
6202  _maybe_skip_whitespace_tokens();
6203  }
6204  else if(first == '?')
6205  {
6206  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6207  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6208  m_evt_handler->set_val_scalar_plain({});
6209  m_evt_handler->add_sibling();
6210  addrem_flags(QMRK, RKCL);
6211  _line_progressed(1);
6212  _maybe_skip_whitespace_tokens();
6213  }
6214  else if(first == '-')
6215  {
6216  if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6217  {
6218  _c4dbgp("mapblck[RKCL]: end+start doc");
6219  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6220  _start_doc_suddenly();
6221  _line_progressed(3);
6222  _maybe_skip_whitespace_tokens();
6223  goto mapblck_finish;
6224  }
6225  else
6226  {
6227  _c4err("parse error");
6228  }
6229  }
6230  else if(first == '.')
6231  {
6232  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6233  csubstr rs = rem.sub(1);
6234  if(rs == ".." || rs.begins_with(".. "))
6235  {
6236  _c4dbgp("mapblck[RKCL]: end+start doc");
6237  _end_doc_suddenly();
6238  _line_progressed(3);
6239  goto mapblck_finish;
6240  }
6241  else
6242  {
6243  _c4err("parse error");
6244  }
6245  }
6246  else if(m_was_inside_qmrk)
6247  {
6248  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6249  _c4dbgp("mapblck[RKCL]: missing :");
6250  m_evt_handler->set_val_scalar_plain({});
6251  m_evt_handler->add_sibling();
6252  m_was_inside_qmrk = false;
6253  addrem_flags(RKEY, RKCL);
6254  }
6255  else
6256  {
6257  _c4err("parse error");
6258  }
6259  }
6260  else if(has_any(RVAL))
6261  {
6262  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6263  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6264  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6265  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6266  //
6267  // handle indentation
6268  //
6269  if(m_evt_handler->m_curr->at_line_beginning())
6270  {
6271  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6272  m_evt_handler->m_curr->more_indented = false;
6273  if(m_evt_handler->m_curr->indref == npos)
6274  {
6275  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6276  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6277  _line_progressed(m_evt_handler->m_curr->indref);
6278  rem = m_evt_handler->m_curr->line_contents.rem;
6279  if(!rem.len)
6280  goto mapblck_again;
6281  }
6282  else if(m_evt_handler->m_curr->indentation_eq())
6283  {
6284  _c4dbgp("mapblck[RVAL]: skip indentation!");
6285  _line_progressed(m_evt_handler->m_curr->indref);
6286  rem = m_evt_handler->m_curr->line_contents.rem;
6287  if(!rem.len)
6288  goto mapblck_again;
6289  // TODO: this is valid:
6290  //
6291  // ```yaml
6292  // a:
6293  // b:
6294  // ---
6295  // a:
6296  // b
6297  // ---
6298  // a:
6299  // b: c
6300  // ```
6301  //
6302  // ... but this is not:
6303  //
6304  // ```yaml
6305  // a:
6306  // v
6307  // ---
6308  // a: b: c
6309  // ```
6310  //
6311  // here, we probably need to set a boolean on the state
6312  // to disambiguate between these cases.
6313  }
6314  else if(m_evt_handler->m_curr->indentation_gt())
6315  {
6316  _c4dbgp("mapblck[RVAL]: more indented!");
6317  m_evt_handler->m_curr->more_indented = true;
6318  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6319  rem = m_evt_handler->m_curr->line_contents.rem;
6320  if(!rem.len)
6321  goto mapblck_again;
6322  }
6323  else if(m_evt_handler->m_curr->indentation_lt())
6324  {
6325  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6326  _handle_indentation_pop_from_block_map();
6327  if(has_all(RMAP|BLCK))
6328  {
6329  _c4dbgp("mapblck[RVAL]: still mapblck!");
6330  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6331  if(has_any(RNXT))
6332  {
6333  _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6334  m_evt_handler->add_sibling();
6335  addrem_flags(RKEY, RNXT);
6336  }
6337  goto mapblck_again;
6338  }
6339  else
6340  {
6341  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6342  goto mapblck_finish;
6343  }
6344  }
6345  else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6346  {
6347  _c4dbgp("mapblck[RVAL]: empty line!");
6348  _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6349  goto mapblck_again;
6350  }
6351  }
6352  //
6353  // now handle the tokens
6354  //
6355  const char first = rem.str[0];
6356  const size_t startline = m_evt_handler->m_curr->pos.line;
6357  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6358  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6359  ScannedScalar sc;
6360  if(first == '\'')
6361  {
6362  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6363  sc = _scan_scalar_squot();
6364  if(!_maybe_scan_following_colon())
6365  {
6366  _c4dbgp("mapblck[RVAL]: set as val");
6367  _handle_annotations_before_blck_val_scalar();
6368  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6369  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6370  addrem_flags(RNXT, RVAL);
6371  }
6372  else
6373  {
6374  if(startindent != m_evt_handler->m_curr->indref)
6375  {
6376  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6377  _handle_annotations_before_start_mapblck(startline);
6378  addrem_flags(RNXT, RVAL);
6379  m_evt_handler->begin_map_val_block();
6380  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6381  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6382  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6383  _maybe_skip_whitespace_tokens();
6384  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6385  // keep the child state on RVAL
6386  addrem_flags(RVAL, RNXT);
6387  }
6388  else
6389  {
6390  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6391  m_evt_handler->set_val_scalar_plain({});
6392  m_evt_handler->add_sibling();
6393  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6394  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6395  // keep going on RVAL
6396  _maybe_skip_whitespace_tokens();
6397  }
6398  }
6399  }
6400  else if(first == '"')
6401  {
6402  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6403  sc = _scan_scalar_dquot();
6404  if(!_maybe_scan_following_colon())
6405  {
6406  _c4dbgp("mapblck[RVAL]: set as val");
6407  _handle_annotations_before_blck_val_scalar();
6408  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6409  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6410  addrem_flags(RNXT, RVAL);
6411  }
6412  else
6413  {
6414  if(startindent != m_evt_handler->m_curr->indref)
6415  {
6416  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6417  _handle_annotations_before_start_mapblck(startline);
6418  addrem_flags(RNXT, RVAL);
6419  m_evt_handler->begin_map_val_block();
6420  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6421  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6422  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6423  _maybe_skip_whitespace_tokens();
6424  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6425  // keep the child state on RVAL
6426  addrem_flags(RVAL, RNXT);
6427  }
6428  else
6429  {
6430  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6431  m_evt_handler->set_val_scalar_plain({});
6432  m_evt_handler->add_sibling();
6433  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6434  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6435  // keep going on RVAL
6436  _maybe_skip_whitespace_tokens();
6437  }
6438  }
6439  }
6440  // block scalars can only appear as keys when in QMRK scope
6441  // (ie, after ? tokens), so no need to scan following colon
6442  else if(first == '|')
6443  {
6444  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6445  ScannedBlock sb;
6446  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6447  _handle_annotations_before_blck_val_scalar();
6448  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6449  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6450  addrem_flags(RNXT, RVAL);
6451  }
6452  else if(first == '>')
6453  {
6454  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6455  ScannedBlock sb;
6456  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6457  _handle_annotations_before_blck_val_scalar();
6458  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6459  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6460  addrem_flags(RNXT, RVAL);
6461  }
6462  else if(_scan_scalar_plain_map_blck(&sc))
6463  {
6464  _c4dbgp("mapblck[RVAL]: plain scalar.");
6465  if(!_maybe_scan_following_colon())
6466  {
6467  _c4dbgp("mapblck[RVAL]: set as val");
6468  _handle_annotations_before_blck_val_scalar();
6469  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6470  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6471  addrem_flags(RNXT, RVAL);
6472  }
6473  else
6474  {
6475  if(startindent != m_evt_handler->m_curr->indref)
6476  {
6477  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6478  addrem_flags(RNXT, RVAL);
6479  _handle_annotations_before_start_mapblck(startline);
6480  m_evt_handler->begin_map_val_block();
6481  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6482  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6483  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6484  _maybe_skip_whitespace_tokens();
6485  _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6486  // keep the child state on RVAL
6487  addrem_flags(RVAL, RNXT);
6488  }
6489  else
6490  {
6491  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6492  _handle_annotations_before_blck_val_scalar();
6493  m_evt_handler->set_val_scalar_plain({});
6494  m_evt_handler->add_sibling();
6495  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6496  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6497  // keep going on RVAL
6498  _maybe_skip_whitespace_tokens();
6499  }
6500  }
6501  }
6502  else if(first == '-')
6503  {
6504  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6505  {
6506  _c4dbgp("mapblck[RVAL]: start val seqblck");
6507  addrem_flags(RNXT, RVAL);
6508  _handle_annotations_before_blck_val_scalar();
6509  m_evt_handler->begin_seq_val_block();
6510  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6511  _set_indentation(startindent);
6512  _line_progressed(1);
6513  _maybe_skip_whitespace_tokens();
6514  goto mapblck_finish;
6515  }
6516  else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6517  {
6518  _c4dbgp("mapblck[RVAL]: end+start doc");
6519  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6520  _start_doc_suddenly();
6521  _line_progressed(3);
6522  _maybe_skip_whitespace_tokens();
6523  goto mapblck_finish;
6524  }
6525  else
6526  {
6527  _c4err("parse error");
6528  }
6529  }
6530  else if(first == '[')
6531  {
6532  _c4dbgp("mapblck[RVAL]: start val seqflow");
6533  addrem_flags(RNXT, RVAL);
6534  _handle_annotations_before_blck_val_scalar();
6535  m_evt_handler->begin_seq_val_flow();
6536  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT|BLCK);
6537  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6538  _line_progressed(1);
6539  goto mapblck_finish;
6540  }
6541  else if(first == '{')
6542  {
6543  _c4dbgp("mapblck[RVAL]: start val mapflow");
6544  addrem_flags(RNXT, RVAL);
6545  _handle_annotations_before_blck_val_scalar();
6546  m_evt_handler->begin_map_val_flow();
6547  addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
6548  m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6549  _set_indentation(m_evt_handler->m_curr->indref + 1u);
6550  _line_progressed(1);
6551  goto mapblck_finish;
6552  }
6553  else if(first == '*')
6554  {
6555  csubstr ref = _scan_ref_map();
6556  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6557  if(startindent == m_evt_handler->m_curr->indref)
6558  {
6559  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6560  m_evt_handler->set_val_ref(ref);
6561  addrem_flags(RNXT, RVAL);
6562  }
6563  else
6564  {
6565  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6566  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6567  if(_maybe_scan_following_colon())
6568  {
6569  _c4dbgp("mapblck[RVAL]: start child map, block");
6570  addrem_flags(RNXT, RVAL);
6571  _handle_annotations_before_blck_val_scalar();
6572  m_evt_handler->begin_map_val_block();
6573  m_evt_handler->set_key_ref(ref);
6574  _set_indentation(startindent);
6575  // keep going in RVAL
6576  addrem_flags(RVAL, RNXT);
6577  }
6578  else
6579  {
6580  _c4dbgp("mapblck[RVAL]: was val ref");
6581  _handle_annotations_before_blck_val_scalar();
6582  m_evt_handler->set_val_ref(ref);
6583  addrem_flags(RNXT, RVAL);
6584  }
6585  }
6586  _maybe_skip_whitespace_tokens();
6587  }
6588  else if(first == '&')
6589  {
6590  csubstr anchor = _scan_anchor();
6591  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6592  if(startindent == m_evt_handler->m_curr->indref)
6593  {
6594  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6595  m_evt_handler->set_val_scalar_plain({});
6596  m_evt_handler->add_sibling();
6597  addrem_flags(RKEY, RVAL);
6598  }
6599  // we need to buffer the anchors, as there may be two
6600  // consecutive anchors in here
6601  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6602  }
6603  else if(first == '!')
6604  {
6605  csubstr tag = _scan_tag();
6606  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6607  if(startindent == m_evt_handler->m_curr->indref)
6608  {
6609  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6610  _handle_annotations_before_blck_val_scalar();
6611  m_evt_handler->set_val_scalar_plain({});
6612  m_evt_handler->add_sibling();
6613  addrem_flags(RKEY, RVAL);
6614  }
6615  // we need to buffer the tags, as there may be two
6616  // consecutive tags in here
6617  _add_annotation(&m_pending_tags, tag, startindent, startline);
6618  }
6619  else if(first == '?')
6620  {
6621  if(startindent == m_evt_handler->m_curr->indref)
6622  {
6623  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6624  _handle_annotations_before_blck_val_scalar();
6625  m_evt_handler->set_val_scalar_plain({});
6626  m_evt_handler->add_sibling();
6627  addrem_flags(QMRK, RVAL);
6628  }
6629  else if(startindent > m_evt_handler->m_curr->indref)
6630  {
6631  _c4dbgp("mapblck[RVAL]: start val mapblck");
6632  addrem_flags(RNXT, RVAL);
6633  _handle_annotations_before_blck_val_scalar();
6634  m_evt_handler->begin_map_val_block();
6635  addrem_flags(QMRK|BLCK, RNXT);
6636  _set_indentation(startindent);
6637  }
6638  else
6639  {
6640  _c4err("parse error");
6641  }
6642  m_was_inside_qmrk = true;
6643  _line_progressed(1);
6644  _maybe_skip_whitespace_tokens();
6645  goto mapblck_again;
6646  }
6647  else if(first == ':')
6648  {
6649  if(startindent == m_evt_handler->m_curr->indref)
6650  {
6651  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6652  m_evt_handler->set_val_scalar_plain({});
6653  m_evt_handler->add_sibling();
6654  m_evt_handler->set_key_scalar_plain({});
6655  _line_progressed(1);
6656  _maybe_skip_whitespace_tokens();
6657  goto mapblck_again;
6658  }
6659  else
6660  {
6661  _c4err("parse error");
6662  }
6663  }
6664  else if(first == '.')
6665  {
6666  _c4dbgp("mapblck[RVAL]: maybe doc?");
6667  csubstr rs = rem.sub(1);
6668  if(rs == ".." || rs.begins_with(".. "))
6669  {
6670  _c4dbgp("seqblck[RVAL]: end doc expl");
6671  _end_doc_suddenly();
6672  _line_progressed(3);
6673  _maybe_skip_whitespace_tokens();
6674  goto mapblck_finish;
6675  }
6676  else
6677  {
6678  _c4err("parse error");
6679  }
6680  }
6682  else if(first == '\t')
6683  {
6684  _c4dbgp("mapblck[RVAL]: skip tabs");
6685  _maybe_skipchars('\t');
6686  })
6687  else
6688  {
6689  _c4err("parse error");
6690  }
6691  }
6692  else if(has_any(RNXT))
6693  {
6694  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6695  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6696  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6697  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6698  //
6699  // handle indentation
6700  //
6701  if(m_evt_handler->m_curr->at_line_beginning())
6702  {
6703  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6704  if(m_evt_handler->m_curr->indentation_eq())
6705  {
6706  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6707  _line_progressed(m_evt_handler->m_curr->indref);
6708  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6709  m_evt_handler->add_sibling();
6710  addrem_flags(RKEY, RNXT);
6711  goto mapblck_again;
6712  }
6713  else if(m_evt_handler->m_curr->indentation_lt())
6714  {
6715  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6716  _handle_indentation_pop_from_block_map();
6717  if(has_all(RMAP|BLCK))
6718  {
6719  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6720  if(!has_any(RKCL))
6721  {
6722  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6723  m_evt_handler->add_sibling();
6724  addrem_flags(RKEY, RNXT);
6725  }
6726  goto mapblck_again;
6727  }
6728  else
6729  {
6730  goto mapblck_finish;
6731  }
6732  }
6733  }
6734  //
6735  // handle tokens
6736  //
6737  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6738  const char first = rem.str[0];
6739  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6740  if(first == ':')
6741  {
6742  if(m_evt_handler->m_curr->more_indented)
6743  {
6744  _c4dbgp("mapblck[RNXT]: start child block map");
6745  C4_NOT_IMPLEMENTED();
6746  //m_evt_handler->actually_as_block_map();
6747  _line_progressed(1);
6748  _set_indentation(m_evt_handler->m_curr->scalar_col);
6749  m_evt_handler->m_curr->more_indented = false;
6750  goto mapblck_again;
6751  }
6752  else
6753  {
6754  _c4err("parse error");
6755  }
6756  }
6757  else if(first == ' ')
6758  {
6759  _c4dbgp("mapblck[RNXT]: skip spaces");
6760  _maybe_skip_whitespace_tokens();
6761  }
6762  else
6763  {
6764  _c4err("parse error");
6765  }
6766  }
6767  else if(has_any(QMRK))
6768  {
6769  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6770  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6771  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6772  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6773  //
6774  // handle indentation
6775  //
6776  if(m_evt_handler->m_curr->at_line_beginning())
6777  {
6778  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
6779  if(m_evt_handler->m_curr->indentation_eq())
6780  {
6781  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6782  _line_progressed(m_evt_handler->m_curr->indref);
6783  rem = m_evt_handler->m_curr->line_contents.rem;
6784  if(!rem.len)
6785  goto mapblck_again;
6786  }
6787  else if(m_evt_handler->m_curr->indentation_lt())
6788  {
6789  _c4dbgp("mapblck[QMRK]: smaller indentation!");
6790  _handle_indentation_pop_from_block_map();
6791  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6792  if(has_all(RMAP|BLCK))
6793  {
6794  _c4dbgp("mapblck[QMRK]: still mapblck!");
6795  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
6796  rem = m_evt_handler->m_curr->line_contents.rem;
6797  if(!rem.len)
6798  goto mapblck_again;
6799  }
6800  else
6801  {
6802  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
6803  goto mapblck_finish;
6804  }
6805  }
6806  // indentation can be larger in QMRK state
6807  else
6808  {
6809  _c4dbgp("mapblck[QMRK]: larger indentation !");
6810  _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6811  rem = m_evt_handler->m_curr->line_contents.rem;
6812  if(!rem.len)
6813  goto mapblck_again;
6814  }
6815  }
6816  //
6817  // now handle the tokens
6818  //
6819  const char first = rem.str[0];
6820  const size_t startline = m_evt_handler->m_curr->pos.line;
6821  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6822  _c4dbgpf("mapblck[QMRK]: '{}'", first);
6823  ScannedScalar sc;
6824  if(first == '\'')
6825  {
6826  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
6827  sc = _scan_scalar_squot();
6828  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6829  if(!_maybe_scan_following_colon())
6830  {
6831  _c4dbgp("mapblck[QMRK]: set as key");
6832  _handle_annotations_before_blck_key_scalar();
6833  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6834  addrem_flags(RKCL, QMRK);
6835  }
6836  else
6837  {
6838  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6839  addrem_flags(RKCL, QMRK);
6840  _handle_annotations_before_start_mapblck_as_key();
6841  m_evt_handler->begin_map_key_block();
6842  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6843  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6844  _maybe_skip_whitespace_tokens();
6845  _set_indentation(startindent);
6846  // keep the child state on RVAL
6847  addrem_flags(RVAL, RKCL|QMRK);
6848  }
6849  }
6850  else if(first == '"')
6851  {
6852  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
6853  sc = _scan_scalar_dquot();
6854  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6855  if(!_maybe_scan_following_colon())
6856  {
6857  _c4dbgp("mapblck[QMRK]: set as key");
6858  _handle_annotations_before_blck_key_scalar();
6859  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6860  addrem_flags(RKCL, QMRK);
6861  }
6862  else
6863  {
6864  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6865  addrem_flags(RKCL, QMRK);
6866  _handle_annotations_before_start_mapblck_as_key();
6867  m_evt_handler->begin_map_key_block();
6868  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6869  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6870  _maybe_skip_whitespace_tokens();
6871  _set_indentation(startindent);
6872  // keep the child state on RVAL
6873  addrem_flags(RVAL, RKCL|QMRK);
6874  }
6875  }
6876  else if(first == '|')
6877  {
6878  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
6879  ScannedBlock sb;
6880  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6881  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
6882  _handle_annotations_before_blck_key_scalar();
6883  m_evt_handler->set_key_scalar_literal(maybe_filtered);
6884  addrem_flags(RKCL, QMRK);
6885  }
6886  else if(first == '>')
6887  {
6888  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
6889  ScannedBlock sb;
6890  _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6891  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
6892  _handle_annotations_before_blck_key_scalar();
6893  m_evt_handler->set_key_scalar_folded(maybe_filtered);
6894  addrem_flags(RKCL, QMRK);
6895  }
6896  else if(_scan_scalar_plain_map_blck(&sc))
6897  {
6898  _c4dbgp("mapblck[QMRK]: plain scalar");
6899  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6900  if(!_maybe_scan_following_colon())
6901  {
6902  _c4dbgp("mapblck[QMRK]: set as key");
6903  _handle_annotations_before_blck_key_scalar();
6904  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6905  addrem_flags(RKCL, QMRK);
6906  }
6907  else
6908  {
6909  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6910  addrem_flags(RKCL, QMRK);
6911  _handle_annotations_before_start_mapblck_as_key();
6912  m_evt_handler->begin_map_key_block();
6913  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6914  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6915  _maybe_skip_whitespace_tokens();
6916  _set_indentation(startindent);
6917  // keep the child state on RVAL
6918  addrem_flags(RVAL, RKCL|QMRK);
6919  }
6920  }
6921  else if(first == ':')
6922  {
6923  if(startindent == m_evt_handler->m_curr->indref)
6924  {
6925  _c4dbgp("mapblck[QMRK]: empty key");
6926  addrem_flags(RVAL, QMRK);
6927  _handle_annotations_before_blck_key_scalar();
6928  m_evt_handler->set_key_scalar_plain({});
6929  _line_progressed(1);
6930  _maybe_skip_whitespace_tokens();
6931  }
6932  else
6933  {
6934  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
6935  addrem_flags(RKCL, QMRK);
6936  _handle_annotations_before_start_mapblck_as_key();
6937  m_evt_handler->begin_map_key_block();
6938  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6939  m_evt_handler->set_key_scalar_plain({});
6940  _line_progressed(1);
6941  _maybe_skip_whitespace_tokens();
6942  _set_indentation(startindent);
6943  // keep the child state on RVAL
6944  addrem_flags(RVAL, RKCL|QMRK);
6945  }
6946  }
6947  else if(first == '*')
6948  {
6949  csubstr ref = _scan_ref_map();
6950  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
6951  if(!_maybe_scan_following_colon())
6952  {
6953  _c4dbgp("mapblck[QMRK]: set ref as key");
6954  _handle_annotations_before_blck_key_scalar();
6955  m_evt_handler->set_key_ref(ref);
6956  addrem_flags(RKCL, QMRK);
6957  }
6958  else
6959  {
6960  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
6961  addrem_flags(RKCL, QMRK);
6962  _handle_annotations_before_blck_key_scalar();
6963  m_evt_handler->begin_map_key_block();
6964  m_evt_handler->set_key_ref(ref);
6965  _set_indentation(startindent);
6966  // keep the child state on RVAL
6967  addrem_flags(RVAL, RKCL|QMRK);
6968  }
6969  _maybe_skip_whitespace_tokens();
6970  }
6971  else if(first == '&')
6972  {
6973  csubstr anchor = _scan_anchor();
6974  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6975  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6976  }
6977  else if(first == '!')
6978  {
6979  csubstr tag = _scan_tag();
6980  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
6981  _add_annotation(&m_pending_tags, tag, startindent, startline);
6982  }
6983  else if(first == '-')
6984  {
6985  _c4dbgp("mapblck[QMRK]: maybe doc?");
6986  csubstr rs = rem.sub(1);
6987  if(rs == "--" || rs.begins_with("-- "))
6988  {
6989  _c4dbgp("mapblck[QMRK]: end+start doc");
6990  _start_doc_suddenly();
6991  _line_progressed(3);
6992  }
6993  else
6994  {
6995  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
6996  addrem_flags(RKCL, RKEY|QMRK);
6997  m_evt_handler->begin_seq_key_block();
6998  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
6999  _set_indentation(startindent);
7000  _line_progressed(1);
7001  }
7002  _maybe_skip_whitespace_tokens();
7003  goto mapblck_finish;
7004  }
7005  else if(first == '[')
7006  {
7007  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7008  addrem_flags(RKCL, RKEY|QMRK);
7009  m_evt_handler->begin_seq_key_flow();
7010  addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
7011  _set_indentation(m_evt_handler->m_parent->indref);
7012  _line_progressed(1);
7013  goto mapblck_finish;
7014  }
7015  else if(first == '{')
7016  {
7017  _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
7018  addrem_flags(RKCL, RKEY|QMRK);
7019  m_evt_handler->begin_map_key_flow();
7020  addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
7021  _set_indentation(m_evt_handler->m_parent->indref);
7022  _line_progressed(1);
7023  goto mapblck_finish;
7024  }
7025  else if(first == '?')
7026  {
7027  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7028  m_evt_handler->set_key_scalar_plain({});
7029  m_evt_handler->set_val_scalar_plain({});
7030  m_evt_handler->add_sibling();
7031  _line_progressed(1);
7032  }
7033  else if(first == '.')
7034  {
7035  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7036  csubstr rs = rem.sub(1);
7037  if(rs == ".." || rs.begins_with(".. "))
7038  {
7039  _c4dbgp("mapblck[QMRK]: end+start doc");
7040  _end_doc_suddenly();
7041  _line_progressed(3);
7042  goto mapblck_finish;
7043  }
7044  else
7045  {
7046  _c4err("parse error");
7047  }
7048  }
7049  else
7050  {
7051  _c4err("parse error");
7052  }
7053  }
7054 
7055  mapblck_again:
7056  _c4dbgt("mapblck: again", 0);
7057  if(_finished_line())
7058  {
7059  _line_ended();
7060  _scan_line();
7061  if(_finished_file())
7062  {
7063  _c4dbgp("mapblck: file finished!");
7064  _end_map_blck();
7065  goto mapblck_finish;
7066  }
7067  _c4dbgnextline();
7068  }
7069  goto mapblck_start;
7070 
7071  mapblck_finish:
7072  _c4dbgp("mapblck: finish");
7073 }
7074 
7075 
7076 //-----------------------------------------------------------------------------
7077 
7078 template<class EventHandler>
7079 void ParseEngine<EventHandler>::_handle_unk_json()
7080 {
7081  _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7082 
7083  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7084  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7085 
7086  _maybe_skip_comment();
7087  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7088  if(!rem.len)
7089  return;
7090 
7091  size_t pos = rem.first_not_of(" \t");
7092  if(pos)
7093  {
7094  pos = pos != npos ? pos : rem.len;
7095  _c4dbgpf("skipping indentation of {}", pos);
7096  _line_progressed(pos);
7097  rem = m_evt_handler->m_curr->line_contents.rem;
7098  if(!rem.len)
7099  return;
7100  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7101  }
7102 
7103  if(rem.begins_with('['))
7104  {
7105  _c4dbgp("it's a seq");
7106  m_evt_handler->check_trailing_doc_token();
7107  _maybe_begin_doc();
7108  m_evt_handler->begin_seq_val_flow();
7109  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7110  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7111  m_doc_empty = false;
7112  _line_progressed(1);
7113  }
7114  else if(rem.begins_with('{'))
7115  {
7116  _c4dbgp("it's a map");
7117  m_evt_handler->check_trailing_doc_token();
7118  _maybe_begin_doc();
7119  m_evt_handler->begin_map_val_flow();
7120  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7121  m_doc_empty = false;
7122  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7123  _line_progressed(1);
7124  }
7125  else
7126  {
7127  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7128  _maybe_skip_whitespace_tokens();
7129  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7130  if(!s.len)
7131  return;
7132  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7133  const char first = s.str[0];
7134  ScannedScalar sc;
7135  if(first == '"')
7136  {
7137  _c4dbgp("runk_json: scanning double-quoted scalar");
7138  m_evt_handler->check_trailing_doc_token();
7139  _maybe_begin_doc();
7140  add_flags(RDOC);
7141  m_doc_empty = false;
7142  sc = _scan_scalar_dquot();
7143  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7144  if(!_maybe_scan_following_colon())
7145  {
7146  _c4dbgp("runk_json: set as val");
7147  _handle_annotations_before_blck_val_scalar();
7148  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7149  }
7150  else
7151  {
7152  _c4err("parse error");
7153  }
7154  }
7155  else if(_scan_scalar_plain_unk(&sc))
7156  {
7157  _c4dbgp("runk_json: got a plain scalar");
7158  m_evt_handler->check_trailing_doc_token();
7159  _maybe_begin_doc();
7160  add_flags(RDOC);
7161  m_doc_empty = false;
7162  if(!_maybe_scan_following_colon())
7163  {
7164  _c4dbgp("runk_json: set as val");
7165  _handle_annotations_before_blck_val_scalar();
7166  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7167  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7168  }
7169  else
7170  {
7171  _c4err("parse error");
7172  }
7173  }
7174  else
7175  {
7176  _c4err("parse error");
7177  }
7178  }
7179 }
7180 
7181 
7182 //-----------------------------------------------------------------------------
7183 
7184 template<class EventHandler>
7185 void ParseEngine<EventHandler>::_handle_unk()
7186 {
7187  _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7188 
7189  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7190  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7191 
7192  _maybe_skip_comment();
7193  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7194  if(!rem.len)
7195  return;
7196 
7197  size_t pos = rem.first_not_of(" \t");
7198  if(pos)
7199  {
7200  pos = pos != npos ? pos : rem.len;
7201  _c4dbgpf("skipping {} whitespace characters", pos);
7202  _line_progressed(pos);
7203  rem = m_evt_handler->m_curr->line_contents.rem;
7204  if(!rem.len)
7205  return;
7206  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7207  }
7208 
7209  if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7210  {
7211  const char first = rem.str[0];
7212  _c4dbgp("rtop: zero indent + at line begin");
7213  if(first == '-')
7214  {
7215  _c4dbgp("rtop: suspecting doc");
7216  if(_is_doc_begin_token(rem))
7217  {
7218  _c4dbgp("rtop: begin doc");
7219  _maybe_end_doc();
7220  _begin2_doc_expl();
7221  _set_indentation(0);
7222  addrem_flags(RDOC|RUNK, NDOC);
7223  _line_progressed(3u);
7224  _maybe_skip_whitespace_tokens();
7225  return;
7226  }
7227  }
7228  else if(first == '.')
7229  {
7230  _c4dbgp("rtop: suspecting doc end");
7231  if(_is_doc_end_token(rem))
7232  {
7233  _c4dbgp("rtop: end doc");
7234  if(has_any(RDOC))
7235  {
7236  _end2_doc_expl();
7237  }
7238  else
7239  {
7240  _c4dbgp("rtop: ignore end doc");
7241  }
7242  addrem_flags(NDOC|RUNK, RDOC);
7243  _line_progressed(3u);
7244  _maybe_skip_whitespace_tokens();
7245  return;
7246  }
7247  }
7248  else if(first == '%')
7249  {
7250  _c4dbgpf("directive: {}", rem);
7251  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7252  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
7253  _handle_directive(rem);
7254  return;
7255  }
7256  }
7257 
7258  /* no else-if! */
7259  char first = rem.str[0];
7260 
7261  if(first == '[')
7262  {
7263  m_evt_handler->check_trailing_doc_token();
7264  _maybe_begin_doc();
7265  m_doc_empty = false;
7266  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7267  if(C4_LIKELY( ! _annotations_require_key_container()))
7268  {
7269  _c4dbgp("it's a seq, flow");
7270  _handle_annotations_before_blck_val_scalar();
7271  m_evt_handler->begin_seq_val_flow();
7272  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7273  _set_indentation(startindent);
7274  }
7275  else
7276  {
7277  _c4dbgp("start new block map, set flow seq as key (!)");
7278  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7279  m_evt_handler->begin_map_val_block();
7280  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7281  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7282  m_evt_handler->begin_seq_key_flow();
7283  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
7284  _set_indentation(startindent);
7285  }
7286  _line_progressed(1);
7287  }
7288  else if(first == '{')
7289  {
7290  m_evt_handler->check_trailing_doc_token();
7291  _maybe_begin_doc();
7292  m_doc_empty = false;
7293  const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7294  if(C4_LIKELY( ! _annotations_require_key_container()))
7295  {
7296  _c4dbgp("it's a map, flow");
7297  _handle_annotations_before_blck_val_scalar();
7298  m_evt_handler->begin_map_val_flow();
7299  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7300  _set_indentation(startindent);
7301  }
7302  else
7303  {
7304  _c4dbgp("start new block map, set flow map as key (!)");
7305  _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7306  m_evt_handler->begin_map_val_block();
7307  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7308  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7309  m_evt_handler->begin_map_key_flow();
7310  addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
7311  _set_indentation(startindent);
7312  }
7313  _line_progressed(1);
7314  }
7315  else if(first == '-' && _is_blck_token(rem))
7316  {
7317  _c4dbgp("it's a seq, block");
7318  m_evt_handler->check_trailing_doc_token();
7319  _maybe_begin_doc();
7320  _handle_annotations_before_blck_val_scalar();
7321  m_evt_handler->begin_seq_val_block();
7322  addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7323  m_doc_empty = false;
7324  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7325  _line_progressed(1);
7326  _maybe_skip_whitespace_tokens();
7327  }
7328  else if(first == '?' && _is_blck_token(rem))
7329  {
7330  _c4dbgp("it's a map + this key is complex");
7331  m_evt_handler->check_trailing_doc_token();
7332  _maybe_begin_doc();
7333  _handle_annotations_before_blck_val_scalar();
7334  m_evt_handler->begin_map_val_block();
7335  addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7336  m_doc_empty = false;
7337  m_was_inside_qmrk = true;
7338  _save_indentation();
7339  _line_progressed(1);
7340  _maybe_skip_whitespace_tokens();
7341  }
7342  else if(first == ':' && _is_blck_token(rem))
7343  {
7344  if(m_doc_empty)
7345  {
7346  _c4dbgp("it's a map with an empty key");
7347  m_evt_handler->check_trailing_doc_token();
7348  _maybe_begin_doc();
7349  _handle_annotations_before_blck_val_scalar();
7350  m_evt_handler->begin_map_val_block();
7351  m_evt_handler->set_key_scalar_plain({});
7352  m_doc_empty = false;
7353  _save_indentation();
7354  }
7355  else
7356  {
7357  _c4dbgp("actually prev val is a key!");
7358  size_t prev_indentation = m_evt_handler->m_curr->indref;
7359  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7360  _set_indentation(prev_indentation);
7361  }
7362  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7363  _line_progressed(1);
7364  _maybe_skip_whitespace_tokens();
7365  }
7366  else if(first == '&')
7367  {
7368  csubstr anchor = _scan_anchor();
7369  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7370  m_evt_handler->check_trailing_doc_token();
7371  _maybe_begin_doc();
7372  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7373  const size_t line = m_evt_handler->m_curr->pos.line;
7374  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7375  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7376  m_doc_empty = false;
7377  }
7378  else if(first == '*')
7379  {
7380  csubstr ref = _scan_ref_map();
7381  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7382  m_evt_handler->check_trailing_doc_token();
7383  _maybe_begin_doc();
7384  m_doc_empty = false;
7385  if(!_maybe_scan_following_colon())
7386  {
7387  _c4dbgp("runk: set val ref");
7388  _handle_annotations_before_blck_val_scalar();
7389  m_evt_handler->set_val_ref(ref);
7390  }
7391  else
7392  {
7393  _c4dbgp("runk: start new block map, set ref as key");
7394  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7395  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7396  _handle_annotations_before_start_mapblck(startline);
7397  m_evt_handler->begin_map_val_block();
7398  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7399  m_evt_handler->set_key_ref(ref);
7400  _maybe_skip_whitespace_tokens();
7401  _set_indentation(startindent);
7402  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7403  }
7404  }
7405  else if(first == '!')
7406  {
7407  csubstr tag = _scan_tag();
7408  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7409  // we need to buffer the tags, as there may be two
7410  // consecutive tags in here
7411  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7412  const size_t line = m_evt_handler->m_curr->pos.line;
7413  _add_annotation(&m_pending_tags, tag, indentation, line);
7414  }
7415  else
7416  {
7417  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7418  _maybe_skip_whitespace_tokens();
7419  csubstr s = m_evt_handler->m_curr->line_contents.rem;
7420  if(!s.len)
7421  return;
7422  const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7423  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7424  first = s.str[0];
7425  ScannedScalar sc;
7426  if(first == '\'')
7427  {
7428  _c4dbgp("runk: scanning single-quoted scalar");
7429  m_evt_handler->check_trailing_doc_token();
7430  _maybe_begin_doc();
7431  add_flags(RDOC);
7432  m_doc_empty = false;
7433  sc = _scan_scalar_squot();
7434  if(!_maybe_scan_following_colon())
7435  {
7436  _c4dbgp("runk: set as val");
7437  _handle_annotations_before_blck_val_scalar();
7438  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7439  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7440  }
7441  else
7442  {
7443  _c4dbgp("runk: start new block map, set scalar as key");
7444  _handle_annotations_before_start_mapblck(startline);
7445  m_evt_handler->begin_map_val_block();
7446  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7447  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7448  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7449  _maybe_skip_whitespace_tokens();
7450  _set_indentation(startindent);
7451  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7452  }
7453  }
7454  else if(first == '"')
7455  {
7456  _c4dbgp("runk: scanning double-quoted scalar");
7457  m_evt_handler->check_trailing_doc_token();
7458  _maybe_begin_doc();
7459  add_flags(RDOC);
7460  m_doc_empty = false;
7461  sc = _scan_scalar_dquot();
7462  if(!_maybe_scan_following_colon())
7463  {
7464  _c4dbgp("runk: set as val");
7465  _handle_annotations_before_blck_val_scalar();
7466  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7467  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7468  }
7469  else
7470  {
7471  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7472  _handle_annotations_before_start_mapblck(startline);
7473  m_evt_handler->begin_map_val_block();
7474  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7475  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7476  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7477  _maybe_skip_whitespace_tokens();
7478  _set_indentation(startindent);
7479  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7480  }
7481  }
7482  else if(first == '|')
7483  {
7484  _c4dbgp("runk: scanning block-literal scalar");
7485  m_evt_handler->check_trailing_doc_token();
7486  _maybe_begin_doc();
7487  add_flags(RDOC);
7488  m_doc_empty = false;
7489  ScannedBlock sb;
7490  _scan_block(&sb, startindent);
7491  if(C4_LIKELY(!_maybe_scan_following_colon()))
7492  {
7493  _c4dbgp("runk: set as val");
7494  _handle_annotations_before_blck_val_scalar();
7495  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7496  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7497  }
7498  else
7499  {
7500  _c4err("block literal keys must be enclosed in '?'");
7501  }
7502  }
7503  else if(first == '>')
7504  {
7505  _c4dbgp("runk: scanning block-folded scalar");
7506  m_evt_handler->check_trailing_doc_token();
7507  _maybe_begin_doc();
7508  add_flags(RDOC);
7509  m_doc_empty = false;
7510  ScannedBlock sb;
7511  _scan_block(&sb, startindent);
7512  if(C4_LIKELY(!_maybe_scan_following_colon()))
7513  {
7514  _c4dbgp("runk: set as val");
7515  _handle_annotations_before_blck_val_scalar();
7516  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7517  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7518  }
7519  else
7520  {
7521  _c4err("block folded keys must be enclosed in '?'");
7522  }
7523  }
7524  else if(_scan_scalar_plain_unk(&sc))
7525  {
7526  _c4dbgp("runk: got a plain scalar");
7527  m_evt_handler->check_trailing_doc_token();
7528  _maybe_begin_doc();
7529  add_flags(RDOC);
7530  m_doc_empty = false;
7531  if(!_maybe_scan_following_colon())
7532  {
7533  _c4dbgp("runk: set as val");
7534  _handle_annotations_before_blck_val_scalar();
7535  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7536  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7537  }
7538  else
7539  {
7540  _c4dbgp("runk: start new block map, set scalar as key");
7541  _handle_annotations_before_start_mapblck(startline);
7542  m_evt_handler->begin_map_val_block();
7543  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7544  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7545  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7546  _maybe_skip_whitespace_tokens();
7547  _set_indentation(startindent);
7548  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7549  }
7550  }
7551  }
7552 }
7553 
7554 
7555 //-----------------------------------------------------------------------------
7556 
7557 template<class EventHandler>
7558 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7559 {
7560  _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7561 
7562  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
7563 
7564  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7565  if(has_any(RNXT))
7566  {
7567  _c4dbgp("usty[RNXT]: finishing!");
7568  _end_stream();
7569  }
7570  #endif
7571 
7572  _maybe_skip_comment();
7573  csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7574  if(!rem.len)
7575  return;
7576 
7577  size_t pos = rem.first_not_of(" \t");
7578  if(pos)
7579  {
7580  pos = pos != npos ? pos : rem.len;
7581  _c4dbgpf("skipping indentation of {}", pos);
7582  _line_progressed(pos);
7583  rem = m_evt_handler->m_curr->line_contents.rem;
7584  if(!rem.len)
7585  return;
7586  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7587  }
7588 
7589  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7590  size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7591  char first = rem.str[0];
7592  if(has_any(RSEQ)) // destination is a sequence
7593  {
7594  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7595  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7596  if(first == '[')
7597  {
7598  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7599  add_flags(RNXT);
7600  m_evt_handler->_push();
7601  addrem_flags(FLOW|RVAL, RNXT|USTY);
7602  _set_indentation(startindent);
7603  _line_progressed(1);
7604  _maybe_skip_whitespace_tokens();
7605  }
7606  else if(first == '-' && _is_blck_token(rem))
7607  {
7608  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7609  add_flags(RNXT);
7610  m_evt_handler->_push();
7611  addrem_flags(BLCK|RVAL, RNXT|USTY);
7612  _set_indentation(startindent);
7613  _line_progressed(1);
7614  _maybe_skip_whitespace_tokens();
7615  }
7616  else
7617  {
7618  _c4err("can only parse a seq into an existing seq");
7619  }
7620  }
7621  else if(has_any(RMAP)) // destination is a map
7622  {
7623  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7624  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7625  if(first == '{')
7626  {
7627  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7628  add_flags(RNXT);
7629  _handle_annotations_before_blck_val_scalar();
7630  m_evt_handler->_push();
7631  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7632  _set_indentation(startindent);
7633  _line_progressed(1);
7634  _maybe_skip_whitespace_tokens();
7635  }
7636  else if(first == '?' && _is_blck_token(rem))
7637  {
7638  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7639  add_flags(RNXT);
7640  _handle_annotations_before_blck_val_scalar();
7641  m_evt_handler->_push();
7642  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7643  m_was_inside_qmrk = true;
7644  _save_indentation();
7645  _line_progressed(1);
7646  _maybe_skip_whitespace_tokens();
7647  }
7648  else if(first == ':' && _is_blck_token(rem))
7649  {
7650  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7651  add_flags(RNXT);
7652  _handle_annotations_before_blck_val_scalar();
7653  m_evt_handler->_push();
7654  m_evt_handler->set_key_scalar_plain({});
7655  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7656  _save_indentation();
7657  _line_progressed(1);
7658  _maybe_skip_whitespace_tokens();
7659  }
7660  else if(rem.begins_with('&'))
7661  {
7662  csubstr anchor = _scan_anchor();
7663  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7664  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7665  const size_t line = m_evt_handler->m_curr->pos.line;
7666  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7667  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7668  }
7669  else if(first == '*')
7670  {
7671  csubstr ref = _scan_ref_map();
7672  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7673  if(!_maybe_scan_following_colon())
7674  {
7675  _c4err("cannot read a VAL to a map");
7676  }
7677  else
7678  {
7679  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7680  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7681  add_flags(RNXT);
7682  _handle_annotations_before_start_mapblck(startline);
7683  m_evt_handler->_push();
7684  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7685  m_evt_handler->set_key_ref(ref);
7686  _maybe_skip_whitespace_tokens();
7687  _set_indentation(startindent);
7688  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7689  }
7690  }
7691  else if(first == '!')
7692  {
7693  csubstr tag = _scan_tag();
7694  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7695  // we need to buffer the tags, as there may be two
7696  // consecutive tags in here
7697  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7698  const size_t line = m_evt_handler->m_curr->pos.line;
7699  _add_annotation(&m_pending_tags, tag, indentation, line);
7700  }
7701  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7702  {
7703  _c4err("cannot parse a seq into an existing map");
7704  }
7705  else
7706  {
7707  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7708  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7709  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7710  ScannedScalar sc;
7711  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7712  if(first == '\'')
7713  {
7714  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7715  sc = _scan_scalar_squot();
7716  if(!_maybe_scan_following_colon())
7717  {
7718  _c4err("cannot read a VAL to a map");
7719  }
7720  else
7721  {
7722  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7723  add_flags(RNXT);
7724  _handle_annotations_before_start_mapblck(startline);
7725  m_evt_handler->_push();
7726  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7727  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7728  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7729  _set_indentation(startindent);
7730  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7731  _maybe_skip_whitespace_tokens();
7732  }
7733  }
7734  else if(first == '"')
7735  {
7736  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
7737  sc = _scan_scalar_dquot();
7738  if(!_maybe_scan_following_colon())
7739  {
7740  _c4err("cannot read a VAL to a map");
7741  }
7742  else
7743  {
7744  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
7745  add_flags(RNXT);
7746  _handle_annotations_before_start_mapblck(startline);
7747  m_evt_handler->_push();
7748  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7749  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7750  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7751  _set_indentation(startindent);
7752  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7753  _maybe_skip_whitespace_tokens();
7754  }
7755  }
7756  else if(first == '|')
7757  {
7758  _c4err("block literal keys must be enclosed in '?'");
7759  }
7760  else if(first == '>')
7761  {
7762  _c4err("block literal keys must be enclosed in '?'");
7763  }
7764  else if(_scan_scalar_plain_unk(&sc))
7765  {
7766  _c4dbgp("usty[RMAP]: got a plain scalar");
7767  if(!_maybe_scan_following_colon())
7768  {
7769  _c4err("cannot read a VAL to a map");
7770  }
7771  else
7772  {
7773  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7774  add_flags(RNXT);
7775  _handle_annotations_before_start_mapblck(startline);
7776  m_evt_handler->_push();
7777  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7778  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7779  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7780  _set_indentation(startindent);
7781  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7782  _maybe_skip_whitespace_tokens();
7783  }
7784  }
7785  else
7786  {
7787  _c4err("parse error");
7788  }
7789  }
7790  }
7791  else // destination is unknown
7792  {
7793  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7794  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
7795  if(first == '[')
7796  {
7797  _c4dbgp("usty[UNK]: it's a flow seq");
7798  add_flags(RNXT);
7799  _handle_annotations_before_blck_val_scalar();
7800  m_evt_handler->begin_seq_val_flow();
7801  addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
7802  _set_indentation(startindent);
7803  _line_progressed(1);
7804  _maybe_skip_whitespace_tokens();
7805  }
7806  else if(first == '-' && _is_blck_token(rem))
7807  {
7808  _c4dbgp("usty[UNK]: it's a block seq");
7809  add_flags(RNXT);
7810  _handle_annotations_before_blck_val_scalar();
7811  m_evt_handler->begin_seq_val_block();
7812  addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
7813  _set_indentation(startindent);
7814  _line_progressed(1);
7815  _maybe_skip_whitespace_tokens();
7816  }
7817  else if(first == '{')
7818  {
7819  _c4dbgp("usty[UNK]: it's a flow map");
7820  add_flags(RNXT);
7821  _handle_annotations_before_blck_val_scalar();
7822  m_evt_handler->begin_map_val_flow();
7823  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7824  _set_indentation(startindent);
7825  _line_progressed(1);
7826  _maybe_skip_whitespace_tokens();
7827  }
7828  else if(first == '?' && _is_blck_token(rem))
7829  {
7830  _c4dbgp("usty[UNK]: it's a map + this key is complex");
7831  add_flags(RNXT);
7832  _handle_annotations_before_blck_val_scalar();
7833  m_evt_handler->begin_map_val_block();
7834  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7835  m_was_inside_qmrk = true;
7836  _save_indentation();
7837  _line_progressed(1);
7838  _maybe_skip_whitespace_tokens();
7839  }
7840  else if(first == ':' && _is_blck_token(rem))
7841  {
7842  _c4dbgp("usty[UNK]: it's a map with an empty key");
7843  add_flags(RNXT);
7844  _handle_annotations_before_blck_val_scalar();
7845  m_evt_handler->begin_map_val_block();
7846  m_evt_handler->set_key_scalar_plain({});
7847  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7848  _save_indentation();
7849  _line_progressed(1);
7850  _maybe_skip_whitespace_tokens();
7851  }
7852  else if(first == '&')
7853  {
7854  csubstr anchor = _scan_anchor();
7855  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7856  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7857  const size_t line = m_evt_handler->m_curr->pos.line;
7858  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7859  _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7860  }
7861  else if(first == '*')
7862  {
7863  csubstr ref = _scan_ref_map();
7864  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
7865  if(!_maybe_scan_following_colon())
7866  {
7867  _c4dbgp("usty[UNK]: set val ref");
7868  _handle_annotations_before_blck_val_scalar();
7869  m_evt_handler->set_val_ref(ref);
7870  }
7871  else
7872  {
7873  _c4dbgp("usty[UNK]: start new block map, set ref as key");
7874  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7875  add_flags(RNXT);
7876  _handle_annotations_before_start_mapblck(startline);
7877  m_evt_handler->begin_map_val_block();
7878  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7879  m_evt_handler->set_key_ref(ref);
7880  _maybe_skip_whitespace_tokens();
7881  _set_indentation(startindent);
7882  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7883  }
7884  }
7885  else if(first == '!')
7886  {
7887  csubstr tag = _scan_tag();
7888  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
7889  // we need to buffer the tags, as there may be two
7890  // consecutive tags in here
7891  const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7892  const size_t line = m_evt_handler->m_curr->pos.line;
7893  _add_annotation(&m_pending_tags, tag, indentation, line);
7894  }
7895  else
7896  {
7897  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7898  startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7899  const size_t startline = m_evt_handler->m_curr->pos.line; // save
7900  first = rem.str[0];
7901  ScannedScalar sc;
7902  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
7903  if(first == '\'')
7904  {
7905  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
7906  sc = _scan_scalar_squot();
7907  if(!_maybe_scan_following_colon())
7908  {
7909  _c4dbgp("usty[UNK]: set as val");
7910  _handle_annotations_before_blck_val_scalar();
7911  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7912  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7913  _end_stream();
7914  }
7915  else
7916  {
7917  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
7918  add_flags(RNXT);
7919  _handle_annotations_before_start_mapblck(startline);
7920  m_evt_handler->begin_map_val_block();
7921  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7922  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7923  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7924  _set_indentation(startindent);
7925  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7926  _maybe_skip_whitespace_tokens();
7927  }
7928  }
7929  else if(first == '"')
7930  {
7931  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
7932  sc = _scan_scalar_dquot();
7933  if(!_maybe_scan_following_colon())
7934  {
7935  _c4dbgp("usty[UNK]: set as val");
7936  _handle_annotations_before_blck_val_scalar();
7937  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7938  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7939  _end_stream();
7940  }
7941  else
7942  {
7943  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
7944  add_flags(RNXT);
7945  _handle_annotations_before_start_mapblck(startline);
7946  m_evt_handler->begin_map_val_block();
7947  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7948  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7949  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7950  _set_indentation(startindent);
7951  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7952  _maybe_skip_whitespace_tokens();
7953  }
7954  }
7955  else if(first == '|')
7956  {
7957  _c4dbgp("usty[UNK]: scanning block-literal scalar");
7958  ScannedBlock sb;
7959  _scan_block(&sb, startindent);
7960  _c4dbgp("usty[UNK]: set as val");
7961  _handle_annotations_before_blck_val_scalar();
7962  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7963  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7964  _end_stream();
7965  }
7966  else if(first == '>')
7967  {
7968  _c4dbgp("usty[UNK]: scanning block-folded scalar");
7969  ScannedBlock sb;
7970  _scan_block(&sb, startindent);
7971  _c4dbgp("usty[UNK]: set as val");
7972  _handle_annotations_before_blck_val_scalar();
7973  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7974  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7975  _end_stream();
7976  }
7977  else if(_scan_scalar_plain_unk(&sc))
7978  {
7979  _c4dbgp("usty[UNK]: got a plain scalar");
7980  if(!_maybe_scan_following_colon())
7981  {
7982  _c4dbgp("usty[UNK]: set as val");
7983  _handle_annotations_before_blck_val_scalar();
7984  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7985  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7986  _end_stream();
7987  }
7988  else
7989  {
7990  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
7991  add_flags(RNXT);
7992  _handle_annotations_before_start_mapblck(startline);
7993  m_evt_handler->begin_map_val_block();
7994  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7995  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7996  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7997  _set_indentation(startindent);
7998  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7999  _maybe_skip_whitespace_tokens();
8000  }
8001  }
8002  else
8003  {
8004  _c4err("parse error");
8005  }
8006  }
8007  }
8008 }
8009 
8010 
8011 //-----------------------------------------------------------------------------
8012 
8013 template<class EventHandler>
8014 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8015 {
8016  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8017  m_file = filename;
8018  m_buf = src;
8019  _reset();
8020  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8021  m_evt_handler->begin_stream();
8022  while( ! _finished_file())
8023  {
8024  _scan_line();
8025  while( ! _finished_line())
8026  {
8027  _c4dbgnextline();
8028  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8029  if(has_any(RSEQ))
8030  {
8031  _handle_seq_json();
8032  }
8033  else if(has_any(RMAP))
8034  {
8035  _handle_map_json();
8036  }
8037  else if(has_any(RUNK))
8038  {
8039  _handle_unk_json();
8040  }
8041  else
8042  {
8043  _c4err("internal error");
8044  }
8045  }
8046  if(_finished_file())
8047  break; // it may have finished because of multiline blocks
8048  _line_ended();
8049  }
8050  _end_stream();
8051  m_evt_handler->finish_parse();
8052 }
8053 
8054 
8055 //-----------------------------------------------------------------------------
8056 
8057 template<class EventHandler>
8058 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8059 {
8060  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8061  m_file = filename;
8062  m_buf = src;
8063  _reset();
8064  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8065  m_evt_handler->begin_stream();
8066  while( ! _finished_file())
8067  {
8068  _scan_line();
8069  while( ! _finished_line())
8070  {
8071  _c4dbgnextline();
8072  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8073  if(has_any(FLOW))
8074  {
8075  if(has_none(RSEQIMAP))
8076  {
8077  if(has_any(RSEQ))
8078  {
8079  _handle_seq_flow();
8080  }
8081  else
8082  {
8083  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8084  _handle_map_flow();
8085  }
8086  }
8087  else
8088  {
8089  _handle_seq_imap();
8090  }
8091  }
8092  else if(has_any(BLCK))
8093  {
8094  if(has_any(RSEQ))
8095  {
8096  _handle_seq_block();
8097  }
8098  else
8099  {
8100  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8101  _handle_map_block();
8102  }
8103  }
8104  else if(has_any(RUNK))
8105  {
8106  _handle_unk();
8107  }
8108  else if(has_any(USTY))
8109  {
8110  _handle_usty();
8111  }
8112  else
8113  {
8114  _c4err("internal error");
8115  }
8116  }
8117  if(_finished_file())
8118  break; // it may have finished because of multiline blocks
8119  _line_ended();
8120  }
8121  _end_stream();
8122  m_evt_handler->finish_parse();
8123 }
8124 
8125 } // namespace yml
8126 } // namespace c4
8127 
8128 #undef _c4dbgnextline
8129 
8130 #if defined(_MSC_VER)
8131 # pragma warning(pop)
8132 #elif defined(__clang__)
8133 # pragma clang diagnostic pop
8134 #elif defined(__GNUC__)
8135 # pragma GCC diagnostic pop
8136 #endif
8137 
8138 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
Holds a pointer to an existing tree, and a node id.
Definition: node.hpp:836
Tree const * tree() const noexcept
Definition: node.hpp:908
id_type id() const noexcept
Definition: node.hpp:909
bool readable() const noexcept
because a ConstNodeRef cannot be used to write to the tree, readable() has the same meaning as !...
Definition: node.hpp:894
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&)
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
NodeType type(id_type node) const
Definition: tree.hpp:378
id_type prev_sibling(id_type node) const
Definition: tree.hpp:497
bool has_key(id_type node) const
Definition: tree.hpp:409
id_type parent(id_type node) const
Definition: tree.hpp:495
id_type next_sibling(id_type node) const
Definition: tree.hpp:498
csubstr const & key(id_type node) const
Definition: tree.hpp:381
bool has_val(id_type node) const
Definition: tree.hpp:410
csubstr const & val(id_type node) const
Definition: tree.hpp:387
bool is_container(id_type node) const
Definition: tree.hpp:406
#define RYML_ERRMSG_SIZE
size for the error message buffer
Definition: common.hpp:23
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:48
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1548
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:893
size_t to_chars(substr buf, uint8_t v) noexcept
Definition: charconv.hpp:2328
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:252
@ npos
a null string position
Definition: common.hpp:266
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
Definition: parse.cpp:132
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:259
Definition: common.cpp:12
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _c4dbgfbf(...)
#define _c4dbgchomp(...)
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _c4dbgfsq(fmt,...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _c4dbgfdq(...)
#define _RYML_WITH_TAB_TOKENS(...)
#define _c4dbgfws(...)
#define _c4dbgfps(fmt,...)
#define _c4dbgfbl(...)
#define _c4dbgfb(...)
Filters an input string into a different output string.
a source file position
Definition: common.hpp:296
size_t col
column
Definition: common.hpp:302
size_t line
line
Definition: common.hpp:300
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:298
csubstr name
file name
Definition: common.hpp:304
Options to give to the parser to control its behavior.