rapidyaml  0.7.0
parse and emit YAML, and do it fast
parse_engine.def.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
3 
5 #include "c4/error.hpp"
6 #include "c4/charconv.hpp"
7 #include "c4/utf.hpp"
8 #include <c4/dump.hpp>
9 
10 #include <ctype.h>
11 
12 #include "c4/yml/detail/parser_dbg.hpp"
14 #ifdef RYML_DBG
15 #include "c4/yml/detail/print.hpp"
16 #endif
17 
18 
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
23 #else
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
27 #endif
28 
29 
30 // scaffold:
31 #define _set_flags2(f) this->set_flags2(f)()
32 #define _add_flags2(f) this->add_flags2(f)()
33 #define _addrem_flags2(on, off) this->addrem_flags(on, off)
34 #define _rem_flags2(off) this->rem_flags2(off)
35 #define m_state (m_evt_handler->m_curr) // FIXME REMOVE
36 #define _c4dbgnextline() \
37  do { \
38  _c4dbgq("\n-----------"); \
39  _c4dbgt("handling line={}, offset={}B", \
40  m_state->pos.line, \
41  m_state->pos.offset); \
42  } while(0)
43 
44 
45 #if defined(_MSC_VER)
46 # pragma warning(push)
47 # pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
48 # pragma warning(disable: 4702/*unreachable code*/)
49 #elif defined(__clang__)
50 # pragma clang diagnostic push
51 # pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
52 # pragma clang diagnostic ignored "-Wformat-nonliteral"
53 # pragma clang diagnostic ignored "-Wold-style-cast"
54 #elif defined(__GNUC__)
55 # pragma GCC diagnostic push
56 # pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
57 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 # pragma GCC diagnostic ignored "-Wold-style-cast"
59 # if __GNUC__ >= 7
60 # pragma GCC diagnostic ignored "-Wduplicated-branches"
61 # endif
62 #endif
63 
64 namespace c4 {
65 namespace yml {
66 
67 namespace {
68 
69 C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
70 {
71  RYML_ASSERT(s.len > 0);
72  RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
73  return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
74 }
75 
76 inline bool _is_doc_begin_token(csubstr s)
77 {
78  RYML_ASSERT(s.begins_with('-'));
79  RYML_ASSERT(!s.ends_with("\n"));
80  RYML_ASSERT(!s.ends_with("\r"));
81  return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
82  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
83 }
84 
85 inline bool _is_doc_end_token(csubstr s)
86 {
87  RYML_ASSERT(s.begins_with('.'));
88  RYML_ASSERT(!s.ends_with("\n"));
89  RYML_ASSERT(!s.ends_with("\r"));
90  return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
91  && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
92 }
93 
94 inline bool _is_doc_token(csubstr s)
95 {
96  if(s.len >= 3)
97  {
98  if(s.str[0] == '-')
99  return _is_doc_begin_token(s);
100  else if(s.str[0] == '.')
101  return _is_doc_end_token(s);
102  }
103  return false;
104 }
105 
106 inline size_t _is_special_json_scalar(csubstr s)
107 {
108  RYML_ASSERT(s.len);
109  switch(s.str[0])
110  {
111  case 'f':
112  if(s.len >= 5 && s.begins_with("false"))
113  return 5u;
114  break;
115  case 't':
116  if(s.len >= 4 && s.begins_with("true"))
117  return 4u;
118  break;
119  case 'n':
120  if(s.len >= 4 && s.begins_with("null"))
121  return 4u;
122  break;
123  }
124  return 0u;
125 }
126 
127 
128 //-----------------------------------------------------------------------------
129 
130 C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
131 {
132  return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
133 }
134 
135 //! look for the next newline chars, and jump to the right of those
136 inline substr from_next_line(substr rem)
137 {
138  size_t nlpos = rem.first_of("\r\n");
139  if(nlpos == csubstr::npos)
140  return {};
141  const char nl = rem[nlpos];
142  rem = rem.right_of(nlpos);
143  if(rem.empty())
144  return {};
145  if(_extend_from_combined_newline(nl, rem.front()))
146  rem = rem.sub(1);
147  return rem;
148 }
149 
150 
151 //-----------------------------------------------------------------------------
152 
153 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
154 {
155  RYML_ASSERT(r[*i] == '\n');
156  size_t numnl_following = 0;
157  ++(*i);
158  for( ; *i < r.len; ++(*i))
159  {
160  if(r.str[*i] == '\n')
161  ++numnl_following;
162  // skip leading whitespace
163  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
164  ;
165  else
166  break;
167  }
168  return numnl_following;
169 }
170 
171 /** @p i is set to the first non whitespace character after the line
172  * @return the number of empty lines after the initial position */
173 inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
174 {
175  RYML_ASSERT(r[*i] == '\n');
176  size_t numnl_following = 0;
177  ++(*i);
178  if(indentation == 0)
179  {
180  for( ; *i < r.len; ++(*i))
181  {
182  if(r.str[*i] == '\n')
183  ++numnl_following;
184  // skip leading whitespace
185  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
186  ;
187  else
188  break;
189  }
190  }
191  else
192  {
193  for( ; *i < r.len; ++(*i))
194  {
195  if(r.str[*i] == '\n')
196  {
197  ++numnl_following;
198  // skip the indentation after the newline
199  size_t stop = *i + indentation;
200  for( ; *i < r.len; ++(*i))
201  {
202  if(r.str[*i] != ' ' && r.str[*i] != '\r')
203  break;
204  RYML_ASSERT(*i < stop);
205  }
206  C4_UNUSED(stop);
207  }
208  // skip leading whitespace
209  else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
210  ;
211  else
212  break;
213  }
214  }
215  return numnl_following;
216 }
217 
218 } // anon namespace
219 
220 
221 //-----------------------------------------------------------------------------
222 //-----------------------------------------------------------------------------
223 //-----------------------------------------------------------------------------
224 
225 template<class EventHandler>
227 {
228  _free();
229  _clr();
230 }
231 
232 template<class EventHandler>
234  : m_options(opts)
235  , m_file()
236  , m_buf()
237  , m_evt_handler(evt_handler)
238  , m_pending_anchors()
239  , m_pending_tags()
240  , m_newline_offsets()
241  , m_newline_offsets_size(0)
242  , m_newline_offsets_capacity(0)
243  , m_newline_offsets_buf()
244 {
245  RYML_CHECK(evt_handler);
246 }
247 
248 template<class EventHandler>
250  : m_options(that.m_options)
251  , m_file(that.m_file)
252  , m_buf(that.m_buf)
253  , m_evt_handler(that.m_evt_handler)
254  , m_pending_anchors(that.m_pending_anchors)
255  , m_pending_tags(that.m_pending_tags)
256  , m_newline_offsets(that.m_newline_offsets)
257  , m_newline_offsets_size(that.m_newline_offsets_size)
258  , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
259  , m_newline_offsets_buf(that.m_newline_offsets_buf)
260 {
261  that._clr();
262 }
263 
264 template<class EventHandler>
266  : m_options(that.m_options)
267  , m_file(that.m_file)
268  , m_buf(that.m_buf)
269  , m_evt_handler(that.m_evt_handler)
270  , m_pending_anchors(that.m_pending_anchors)
271  , m_pending_tags(that.m_pending_tags)
272  , m_newline_offsets()
273  , m_newline_offsets_size()
274  , m_newline_offsets_capacity()
275  , m_newline_offsets_buf()
276 {
277  if(that.m_newline_offsets_capacity)
278  {
279  _resize_locations(that.m_newline_offsets_capacity);
280  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
281  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
282  m_newline_offsets_size = that.m_newline_offsets_size;
283  }
284 }
285 
286 template<class EventHandler>
288 {
289  _free();
290  m_options = (that.m_options);
291  m_file = (that.m_file);
292  m_buf = (that.m_buf);
293  m_evt_handler = that.m_evt_handler;
294  m_pending_anchors = that.m_pending_anchors;
295  m_pending_tags = that.m_pending_tags;
296  m_newline_offsets = (that.m_newline_offsets);
297  m_newline_offsets_size = (that.m_newline_offsets_size);
298  m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
299  m_newline_offsets_buf = (that.m_newline_offsets_buf);
300  that._clr();
301  return *this;
302 }
303 
304 template<class EventHandler>
306 {
307  _free();
308  m_options = (that.m_options);
309  m_file = (that.m_file);
310  m_buf = (that.m_buf);
311  m_evt_handler = that.m_evt_handler;
312  m_pending_anchors = that.m_pending_anchors;
313  m_pending_tags = that.m_pending_tags;
314  if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
315  _resize_locations(that.m_newline_offsets_capacity);
316  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
317  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
318  memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
319  m_newline_offsets_size = that.m_newline_offsets_size;
320  m_newline_offsets_buf = that.m_newline_offsets_buf;
321  return *this;
322 }
323 
324 template<class EventHandler>
326 {
327  m_options = {};
328  m_file = {};
329  m_buf = {};
330  m_evt_handler = {};
331  m_pending_anchors = {};
332  m_pending_tags = {};
333  m_newline_offsets = {};
334  m_newline_offsets_size = {};
335  m_newline_offsets_capacity = {};
336  m_newline_offsets_buf = {};
337 }
338 
339 template<class EventHandler>
340 void ParseEngine<EventHandler>::_free()
341 {
342  if(m_newline_offsets)
343  {
344  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
345  m_newline_offsets = nullptr;
346  m_newline_offsets_size = 0u;
347  m_newline_offsets_capacity = 0u;
348  m_newline_offsets_buf = 0u;
349  }
350 }
351 
352 
353 //-----------------------------------------------------------------------------
354 
355 template<class EventHandler>
356 void ParseEngine<EventHandler>::_reset()
357 {
358  m_pending_anchors = {};
359  m_pending_tags = {};
360  if(m_options.locations())
361  {
362  _prepare_locations();
363  }
364  m_was_inside_qmrk = false;
365 }
366 
367 
368 //-----------------------------------------------------------------------------
369 
370 template<class EventHandler>
371 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
372 {
373  #define _ryml_relocate(s) \
374  if(s.is_sub(prev_arena)) \
375  { \
376  s.str = next_arena.str + (s.str - prev_arena.str); \
377  }
378  _ryml_relocate(m_buf);
379  _ryml_relocate(m_newline_offsets_buf);
380  for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
381  _ryml_relocate(m_pending_tags.annotations[i].str);
382  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
383  _ryml_relocate(m_pending_anchors.annotations[i].str);
384  #undef _ryml_relocate
385 }
386 
387 template<class EventHandler>
388 void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
389 {
390  ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
391 }
392 
393 
394 //-----------------------------------------------------------------------------
395 
396 template<class EventHandler>
397 template<class DumpFn>
398 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
399 {
400  auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
401  auto const& lc = st->line_contents;
402  csubstr contents = lc.stripped;
403  if(contents.len)
404  {
405  // print the yaml src line
406  size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
407  if(m_file.len)
408  {
409  detail::_dump(dumpfn, "{}:", m_file);
410  offs += m_file.len + 1;
411  }
412  detail::_dump(dumpfn, "{}:{}: ", st->pos.line, st->pos.col);
413  csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
414  csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
415  detail::_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
416  // highlight the remaining portion of the previous line
417  size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
418  size_t lastcol = firstcol + lc.rem.len;
419  for(size_t i = 0; i < offs + firstcol; ++i)
420  dumpfn(" ");
421  dumpfn("^");
422  for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
423  dumpfn("~");
424  detail::_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
425  }
426  else
427  {
428  dumpfn("\n");
429  }
430 
431 #ifdef RYML_DBG
432  // next line: print the state flags
433  {
434  char flagbuf_[128];
435  detail::_dump(dumpfn, "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_state->flags));
436  }
437 #endif
438 }
439 
440 
441 //-----------------------------------------------------------------------------
442 
443 template<class EventHandler>
444 template<class ...Args>
445 void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
446 {
447  char errmsg[RYML_ERRMSG_SIZE];
448  detail::_SubstrWriter writer(errmsg);
449  auto dumpfn = [&writer](csubstr s){ writer.append(s); };
450  detail::_dump(dumpfn, fmt, args...);
451  writer.append('\n');
452  _fmt_msg(dumpfn);
453  size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
454  m_evt_handler->cancel_parse();
455  m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_state->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
456 }
457 
458 
459 //-----------------------------------------------------------------------------
460 #ifdef RYML_DBG
461 template<class EventHandler>
462 template<class ...Args>
463 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
464 {
465  if(_dbg_enabled())
466  {
467  auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
468  detail::_dump(dumpfn, fmt, args...);
469  dumpfn("\n");
470  _fmt_msg(dumpfn);
471  }
472 }
473 #endif
474 
475 
476 //-----------------------------------------------------------------------------
477 template<class EventHandler>
478 bool ParseEngine<EventHandler>::_finished_file() const
479 {
480  bool ret = m_state->pos.offset >= m_buf.len;
481  if(ret)
482  {
483  _c4dbgp("finished file!!!");
484  }
485  return ret;
486 }
487 
488 template<class EventHandler>
489 C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
490 {
491  return m_state->line_contents.rem.empty();
492 }
493 
494 
495 //-----------------------------------------------------------------------------
496 
497 template<class EventHandler>
498 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
499 {
500  csubstr rem = m_state->line_contents.rem;
501  if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
502  {
503  size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
504  if(pos == npos)
505  pos = rem.len; // maybe the line is just all whitespace
506  _c4dbgpf("skip {} whitespace characters", pos);
507  _line_progressed(pos);
508  }
509 }
510 
511 template<class EventHandler>
512 void ParseEngine<EventHandler>::_maybe_skipchars(char c)
513 {
514  csubstr rem = m_state->line_contents.rem;
515  if(rem.len && rem.str[0] == c)
516  {
517  size_t pos = rem.first_not_of(c);
518  if(pos == npos)
519  pos = rem.len; // maybe the line is just all c
520  _c4dbgpf("skip {}x'{}'", pos, c);
521  _line_progressed(pos);
522  }
523 }
524 
525 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
526 template<class EventHandler>
527 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
528 {
529  csubstr rem = m_state->line_contents.rem;
530  if(rem.len && rem.str[0] == c)
531  {
532  size_t pos = rem.first_not_of(c);
533  if(pos == npos)
534  pos = rem.len; // maybe the line is just all c
535  if(pos > max_to_skip)
536  pos = max_to_skip;
537  _c4dbgpf("skip {}x'{}'", pos, c);
538  _line_progressed(pos);
539  }
540 }
541 #endif
542 
543 template<class EventHandler>
544 template<size_t N>
545 void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
546 {
547  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars));
548  size_t pos = m_state->line_contents.rem.first_not_of(chars);
549  if(pos == npos)
550  pos = m_state->line_contents.rem.len; // maybe the line is just whitespace
551  _c4dbgpf("skip {} characters", pos);
552  _line_progressed(pos);
553 }
554 
555 template<class EventHandler>
556 void ParseEngine<EventHandler>::_skip_comment()
557 {
558  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->line_contents.rem.begins_with('#'));
559  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->line_contents.rem.is_sub(m_state->line_contents.full));
560  csubstr rem = m_state->line_contents.rem;
561  csubstr full = m_state->line_contents.full;
562  // raise an error if the comment is not preceded by whitespace
563  if(!full.begins_with('#'))
564  {
565  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
566  const char c = full[(size_t)(rem.str - full.str - 1)];
567  if(C4_UNLIKELY(c != ' ' && c != '\t'))
568  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
569  }
570  else
571  {
572  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
573  }
574  _c4dbgpf("comment was '{}'", rem);
575  _line_progressed(rem.len);
576 }
577 
578 template<class EventHandler>
579 void ParseEngine<EventHandler>::_maybe_skip_comment()
580 {
581  csubstr s = m_state->line_contents.rem.triml(' ');
582  if(s.begins_with('#'))
583  {
584  _line_progressed((size_t)(s.str - m_state->line_contents.rem.str));
585  _skip_comment();
586  }
587 }
588 
589 template<class EventHandler>
590 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
591 {
592  if(m_state->line_contents.rem.len)
593  {
594  if(m_state->line_contents.rem.str[0] == ' ' || m_state->line_contents.rem.str[0] == '\t')
595  {
596  size_t pos = m_state->line_contents.rem.first_not_of(" \t");
597  if(pos == npos)
598  pos = m_state->line_contents.rem.len; // maybe the line has only spaces
599  _c4dbgpf("skip {}x'{}'", pos, ' ');
600  _line_progressed(pos);
601  }
602  if(m_state->line_contents.rem.len && (m_state->line_contents.rem.str[0] == ':'))
603  {
604  _c4dbgp("found ':' colon next");
605  _line_progressed(1);
606  return true;
607  }
608  }
609  return false;
610 }
611 
612 template<class EventHandler>
613 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
614 {
615  if(m_state->line_contents.rem.len)
616  {
617  if(m_state->line_contents.rem.str[0] == ' ' || m_state->line_contents.rem.str[0] == '\t')
618  {
619  size_t pos = m_state->line_contents.rem.first_not_of(" \t");
620  if(pos == npos)
621  pos = m_state->line_contents.rem.len; // maybe the line has only spaces
622  _c4dbgpf("skip {}x'{}'", pos, ' ');
623  _line_progressed(pos);
624  }
625  if(m_state->line_contents.rem.len && (m_state->line_contents.rem.str[0] == ','))
626  {
627  _c4dbgp("found ',' comma next");
628  _line_progressed(1);
629  return true;
630  }
631  }
632  return false;
633 }
634 
635 
636 //-----------------------------------------------------------------------------
637 
638 template<class EventHandler>
639 csubstr ParseEngine<EventHandler>::_scan_anchor()
640 {
641  csubstr s = m_evt_handler->m_curr->line_contents.rem;
642  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
643  csubstr anchor = s.range(1, s.first_of(' '));
644  _line_progressed(1u + anchor.len);
645  _maybe_skipchars(' ');
646  return anchor;
647 }
648 
649 template<class EventHandler>
650 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
651 {
652  csubstr s = m_evt_handler->m_curr->line_contents.rem;
653  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
654  csubstr ref = s.first(s.first_of(",] :"));
655  _line_progressed(ref.len);
656  return ref;
657 }
658 
659 template<class EventHandler>
660 csubstr ParseEngine<EventHandler>::_scan_ref_map()
661 {
662  csubstr s = m_evt_handler->m_curr->line_contents.rem;
663  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
664  csubstr ref = s.first(s.first_of(",} "));
665  _line_progressed(ref.len);
666  return ref;
667 }
668 
669 template<class EventHandler>
670 csubstr ParseEngine<EventHandler>::_scan_tag()
671 {
672  csubstr rem = m_state->line_contents.rem.triml(' ');
673  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
674  csubstr t;
675  if(rem.begins_with("!!"))
676  {
677  _c4dbgp("begins with '!!'");
678  if(has_any(FLOW))
679  t = rem.left_of(rem.first_of(" ,"));
680  else
681  t = rem.left_of(rem.first_of(' '));
682  }
683  else if(rem.begins_with("!<"))
684  {
685  _c4dbgp("begins with '!<'");
686  t = rem.left_of(rem.first_of('>'), true);
687  }
688  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
689  else if(rem.begins_with("!h!"))
690  {
691  _c4dbgp("begins with '!h!'");
692  t = rem.left_of(rem.first_of(' '));
693  }
694  #endif
695  else
696  {
697  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
698  _c4dbgp("begins with '!'");
699  if(has_any(FLOW))
700  t = rem.left_of(rem.first_of(" ,"));
701  else
702  t = rem.left_of(rem.first_of(' '));
703  }
704  _line_progressed(t.len);
705  _maybe_skip_whitespace_tokens();
706  return t;
707 }
708 
709 
710 //-----------------------------------------------------------------------------
711 
712 template<class EventHandler>
713 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
714 {
715  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
716 
717  // it's not a scalar if it starts with any of these characters:
718  switch(s.str[0])
719  {
720  // these are all legal tokens which mean no scalar is starting:
721  case '[':
722  case ']':
723  case '{':
724  case '}':
725  case '!':
726  case '&':
727  case '*':
728  case '|':
729  case '>':
730  case '#':
731  _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
732  return false;
733  // '-' and ':' are illegal at the beginning if not followed by a scalar character
734  case '-':
735  case ':':
736  if(s.len > 1)
737  {
738  switch(s.str[1])
739  {
740  case '\n':
741  case '\r':
742  case '{':
743  case '[':
744  //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
745  _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
746  break;
747  case ' ':
748  case '}':
749  case ']':
750  if(s.str[0] == ':')
751  {
752  _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
753  return false;
754  }
755  break;
756  default:
757  break;
758  }
759  }
760  else
761  {
762  return false;
763  }
764  break;
765  case '?':
766  if(s.len > 1)
767  {
768  switch(s.str[1])
769  {
770  case ' ':
771  case '\n':
772  case '\r':
773  _RYML_WITHOUT_TAB_TOKENS(case '\t':)
774  _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
775  return false;
776  case '{':
777  case '}':
778  case '[':
779  case ']':
780  _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
781  break;
782  default:
783  break;
784  }
785  }
786  else
787  {
788  return false;
789  }
790  break;
791  // everything else is a legal starting character
792  default:
793  break;
794  }
795 
796  return true;
797 }
798 
799 template<class EventHandler>
800 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
801 {
802  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
803  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
804  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
805  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
806  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
807 
808  substr s = m_state->line_contents.rem;
809  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
810  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
811 
812  if(!s.len)
813  return false;
814 
815  if(!_is_valid_start_scalar_plain_flow(s))
816  return false;
817 
818  _c4dbgp("scanning seqflow scalar...");
819 
820  const size_t start_offset = m_state->pos.offset;
821  bool needs_filter = false;
822  while(true)
823  {
824  _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
825  for(size_t i = 0; i < s.len; ++i)
826  {
827  const char c = s.str[i];
828  switch(c)
829  {
830  case ',':
831  _c4dbgpf("found terminating character at {}: '{}'", i, c);
832  _line_progressed(i);
833  if(m_state->pos.offset + i > start_offset)
834  {
835  goto ended_scalar;
836  }
837  else
838  {
839  _c4dbgp("at the beginning. no scalar here.");
840  return false;
841  }
842  break;
843  case ']':
844  _c4dbgpf("found terminating character at {}: '{}'", i, c);
845  _line_progressed(i);
846  goto ended_scalar;
847  break;
848  case '#':
849  _c4dbgp("found suspicious '#'");
850  if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
851  {
852  _c4dbgpf("found terminating character at {}: '{}'", i, c);
853  _line_progressed(i);
854  goto ended_scalar;
855  }
856  break;
857  case ':':
858  _c4dbgp("found suspicious ':'");
859  if(s.len > i+1)
860  {
861  const char next = s.str[i+1];
862  _c4dbgpf("next char is '{}'", _c4prc(next));
863  if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
864  {
865  _c4dbgp("map starting!");
866  if(m_state->pos.offset + i > start_offset)
867  {
868  _c4dbgp("scalar finished!");
869  _line_progressed(i);
870  goto ended_scalar;
871  }
872  else
873  {
874  _c4dbgp("at the beginning. no scalar here.");
875  return false;
876  }
877  }
878  else
879  {
880  _c4dbgp("it's a scalar indeed.");
881  ++i; // skip the next char
882  }
883  }
884  else if(s.len == i+1)
885  {
886  _c4dbgp("':' at line end. map starting!");
887  return false;
888  }
889  break;
890  case '[':
891  case '{':
892  case '}':
893  _line_progressed(i);
894  _c4err("invalid character: '{}'", c); // noreturn
895  default:
896  ;
897  }
898  }
899  _line_progressed(s.len);
900  if(!_finished_file())
901  {
902  _c4dbgp("next line!");
903  _line_ended();
904  _scan_line();
905  }
906  else
907  {
908  _c4dbgp("file finished!");
909  goto ended_scalar;
910  }
911  s = m_state->line_contents.rem;
912  needs_filter = true;
913  }
914 
915 ended_scalar:
916 
917  sc->scalar = m_buf.range(start_offset, m_state->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
918  sc->needs_filter = needs_filter;
919 
920  _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
921 
922  return true;
923 }
924 
925 template<class EventHandler>
926 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
927 {
928  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
929  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
930  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
931  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
932  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
933 
934  substr s = m_state->line_contents.rem;
935  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
936 
937  if(!s.len)
938  return false;
939 
940  if(!_is_valid_start_scalar_plain_flow(s))
941  return false;
942 
943  _c4dbgp("scanning scalar...");
944 
945  const size_t start_offset = m_state->pos.offset;
946  bool needs_filter = false;
947  while(true)
948  {
949  for(size_t i = 0; i < s.len; ++i)
950  {
951  const char c = s.str[i];
952  switch(c)
953  {
954  case ',':
955  case '}':
956  _line_progressed(i);
957  _c4dbgpf("found terminating character: '{}'", c);
958  goto ended_scalar;
959  case ':':
960  if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
961  {
962  _line_progressed(i);
963  _c4dbgpf("found terminating character: '{}'", c);
964  goto ended_scalar;
965  }
966  break;
967  case '{':
968  case '[':
969  _line_progressed(i);
970  _c4err("invalid character: '{}'", c); // noreturn
971  break;
972  case ']':
973  _line_progressed(i);
974  if(has_any(RSEQIMAP))
975  goto ended_scalar;
976  else
977  _c4err("invalid character: '{}'", c); // noreturn
978  break;
979  case '#':
980  if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
981  {
982  _line_progressed(i);
983  _c4dbgpf("found terminating character: '{}'", c);
984  goto ended_scalar;
985  }
986  break;
987  default:
988  ;
989  }
990  }
991  _c4dbgp("next line!");
992  _line_progressed(s.len);
993  if(!_finished_file())
994  {
995  _c4dbgp("next line!");
996  _line_ended();
997  _scan_line();
998  }
999  else
1000  {
1001  _c4dbgp("file finished!");
1002  goto ended_scalar;
1003  }
1004  s = m_state->line_contents.rem;
1005  needs_filter = true;
1006  }
1007 
1008 ended_scalar:
1009 
1010  sc->scalar = m_buf.range(start_offset, m_state->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
1011  sc->needs_filter = needs_filter;
1012 
1013  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1014 
1015  return true;
1016 }
1017 
1018 template<class EventHandler>
1019 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1020 {
1021  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1022  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1023  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1024  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1025 
1026  substr s = m_state->line_contents.rem;
1027  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1028 
1029  if(!s.len)
1030  return false;
1031 
1032  _c4dbgp("scanning scalar...");
1033 
1034  switch(s.str[0])
1035  {
1036  case ']':
1037  case '{':
1038  case ',':
1039  _c4dbgp("not a scalar.");
1040  return false;
1041  }
1042 
1043  {
1044  const size_t len = _is_special_json_scalar(s);
1045  if(len)
1046  {
1047  sc->scalar = s.first(len);
1048  sc->needs_filter = false;
1049  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1050  _line_progressed(len);
1051  return true;
1052  }
1053  }
1054 
1055  // must be a number
1056  size_t i = 0;
1057  for( ; i < s.len; ++i)
1058  {
1059  const char c = s.str[i];
1060  switch(c)
1061  {
1062  case ',':
1063  case ']':
1064  case ' ':
1065  case '\t':
1066  _c4dbgpf("found terminating character: '{}'", c);
1067  goto ended_scalar;
1068  case '#':
1069  if(!i || s.str[i-1] == ' ')
1070  {
1071  _c4dbgpf("found terminating character: '{}'", c);
1072  goto ended_scalar;
1073  }
1074  break;
1075  default:
1076  ;
1077  }
1078  }
1079 
1080 ended_scalar:
1081 
1082  if(C4_LIKELY(i > 0))
1083  {
1084  _line_progressed(i);
1085  sc->scalar = s.first(i);
1086  sc->needs_filter = false;
1087  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1088  return true;
1089  }
1090 
1091  return false;
1092 }
1093 
1094 template<class EventHandler>
1095 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1096 {
1097  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1098  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
1099  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1100  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
1101  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
1102 
1103  substr s = m_state->line_contents.rem;
1104  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1105 
1106  if(!s.len)
1107  return false;
1108 
1109  _c4dbgp("scanning scalar...");
1110 
1111  {
1112  const size_t len = _is_special_json_scalar(s);
1113  if(len)
1114  {
1115  sc->scalar = s.first(len);
1116  sc->needs_filter = false;
1117  _c4dbgpf("special json scalar: '{}'", sc->scalar);
1118  _line_progressed(len);
1119  return true;
1120  }
1121  }
1122 
1123  // must be a number
1124  size_t i = 0;
1125  for( ; i < s.len; ++i)
1126  {
1127  const char c = s.str[i];
1128  switch(c)
1129  {
1130  case ',':
1131  case '}':
1132  case ' ':
1133  case '\t':
1134  _c4dbgpf("found terminating character: '{}'", c);
1135  goto ended_scalar;
1136  case '#':
1137  if(!i || s.str[i-1] == ' ')
1138  {
1139  _c4dbgpf("found terminating character: '{}'", c);
1140  goto ended_scalar;
1141  }
1142  break;
1143  default:
1144  ;
1145  }
1146  }
1147 
1148 ended_scalar:
1149 
1150  if(C4_LIKELY(i > 0))
1151  {
1152  _line_progressed(i);
1153  sc->scalar = s.first(i);
1154  sc->needs_filter = false;
1155  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1156  return true;
1157  }
1158 
1159  return false;
1160 }
1161 
1162 template<class EventHandler>
1163 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1164 {
1165  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
1166  return (m_state->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1167 }
1168 
1169 template<class EventHandler>
1170 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1171 {
1172  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
1173  return (m_state->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1174 }
1175 
1176 template<class EventHandler>
1177 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1178 {
1179  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1180  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1181  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
1182 
1183  substr s = m_state->line_contents.rem;
1184  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
1185 
1186  if(!s.len)
1187  return false;
1188 
1189  switch(s.str[0])
1190  {
1191  case '-':
1192  if(_is_blck_token(s))
1193  {
1194  return false;
1195  }
1196  else if(_is_doc_begin(s))
1197  {
1198  _c4dbgp("token is doc start");
1199  return false;
1200  }
1201  break;
1202  case ':':
1203  case '?':
1204  if(_is_blck_token(s))
1205  return false;
1206  break;
1207  case '[':
1208  case '{':
1209  case '&':
1210  case '*':
1211  case '!':
1212  _RYML_WITH_TAB_TOKENS(case '\t':)
1213  return false;
1214  case '.':
1215  if(_is_doc_end(s))
1216  {
1217  _c4dbgp("token is doc end");
1218  return false;
1219  }
1220  break;
1221  }
1222 
1223  _c4dbgpf("plain scalar! indentation={}", indentation);
1224 
1225  const size_t start_offset = m_state->pos.offset;
1226  const size_t start_line = m_state->pos.line;
1227 
1228  bool needs_filter = false;
1229  while(true)
1230  {
1231  _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
1232  for(size_t i = 0; i < s.len; ++i)
1233  {
1234  const char curr = s.str[i];
1235  //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1236  switch(curr)
1237  {
1238  case ':':
1239  _c4dbgpf("[{}]: got suspicious ':'", i);
1240  // are there more characters?
1241  if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1242  {
1243  _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1244  _line_progressed(i);
1245  // ': ' is accepted only on the first line
1246  if(C4_LIKELY(m_state->pos.line == start_line))
1247  {
1248  _c4dbgp("start line. scalar ends here");
1249  goto ended_scalar;
1250  }
1251  else
1252  {
1253  _c4err("parse error");
1254  }
1255  }
1256  else
1257  {
1258  size_t j = i;
1259  while(j + 1 < s.len && s.str[j+1] == ':')
1260  {
1261  _c4dbgp("skip colon");
1262  ++j;
1263  }
1264  i = j > i ? j-1 : i;
1265  _c4dbgp("nothing to see here");
1266  }
1267  break;
1268  case '#':
1269  _c4dbgp("got suspicious '#'");
1270  if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1271  {
1272  _c4dbgp("comment! scalar ends here");
1273  _line_progressed(i);
1274  goto ended_scalar;
1275  }
1276  else
1277  {
1278  _c4dbgp("nothing to see here");
1279  }
1280  break;
1281  }
1282  }
1283  _line_progressed(s.len);
1284  csubstr next_peeked = _peek_next_line(m_state->pos.offset);
1285  next_peeked = next_peeked.trimr("\n\r");
1286  const size_t next_indentation = next_peeked.first_not_of(' ');
1287  _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1288  if(next_indentation < indentation)
1289  {
1290  _c4dbgp("smaller indentation! scalar ended");
1291  goto ended_scalar;
1292  }
1293  else if(next_indentation == 0 && next_peeked.len > 0)
1294  {
1295  const char first = next_peeked.str[0];
1296  switch(first)
1297  {
1298  case '-':
1299  next_peeked = next_peeked.trimr("\n\r");
1300  _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1301  if(_is_doc_begin_token(next_peeked))
1302  {
1303  _c4dbgp("doc begin! scalar ended");
1304  goto ended_scalar;
1305  }
1306  break;
1307  case '.':
1308  next_peeked = next_peeked.trimr("\n\r");
1309  _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
1310  if(_is_doc_end_token(next_peeked))
1311  {
1312  _c4dbgp("doc end! scalar ended");
1313  goto ended_scalar;
1314  }
1315  break;
1316  }
1317  }
1318  // load with next line
1319  _c4dbgp("next line!");
1320  if(!_finished_file())
1321  {
1322  _c4dbgp("next line!");
1323  _line_ended();
1324  _scan_line();
1325  }
1326  else
1327  {
1328  _c4dbgp("file finished!");
1329  goto ended_scalar;
1330  }
1331  s = m_state->line_contents.rem;
1332  needs_filter = true;
1333  }
1334 
1335 ended_scalar:
1336 
1337  sc->scalar = m_buf.range(start_offset, m_state->pos.offset).trimr(" \n\r\t");
1338  sc->needs_filter = needs_filter;
1339 
1340  _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1341 
1342  return true;
1343 }
1344 
1345 template<class EventHandler>
1346 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1347 {
1348  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
1349  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1350  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
1351  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1352  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1353  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
1354  return _scan_scalar_plain_blck(sc, m_state->indref + 1u);
1355 }
1356 
1357 template<class EventHandler>
1358 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1359 {
1360  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
1361  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1362  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1363  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
1364  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
1365  return _scan_scalar_plain_blck(sc, m_state->indref + 1u);
1366 }
1367 
1368 template<class EventHandler>
1369 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1370 {
1371  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
1372  return _scan_scalar_plain_blck(sc, m_state->indref);
1373 }
1374 
1375 
1376 //-----------------------------------------------------------------------------
1377 
1378 template<class EventHandler>
1379 substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1380 {
1381  substr rem{}; // declare here because of the goto
1382  size_t nlpos{}; // declare here because of the goto
1383  pos = pos == npos ? m_state->pos.offset : pos;
1384  if(pos >= m_buf.len)
1385  goto next_is_empty;
1386 
1387  // look for the next newline chars, and jump to the right of those
1388  rem = from_next_line(m_buf.sub(pos));
1389  if(rem.empty())
1390  goto next_is_empty;
1391 
1392  // now get everything up to and including the following newline chars
1393  nlpos = rem.first_of("\r\n");
1394  if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1395  nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1396  rem = rem.left_of(nlpos, /*include_pos*/true);
1397 
1398  _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1399  return rem;
1400 
1401 next_is_empty:
1402  _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1403  return {};
1404 }
1405 
1406 //-----------------------------------------------------------------------------
1407 
1408 template<class EventHandler>
1409 void ParseEngine<EventHandler>::_scan_line()
1410 {
1411  if(C4_LIKELY(m_state->pos.offset < m_buf.len))
1412  m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset);
1413  else
1414  m_state->line_contents.reset(m_buf.last(0), m_buf.last(0));
1415 }
1416 
1417 template<class EventHandler>
1418 void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1419 {
1420  _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead);
1421  m_state->pos.offset += ahead;
1422  m_state->pos.col += ahead;
1423  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1);
1424  m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead);
1425 }
1426 
1427 template<class EventHandler>
1428 void ParseEngine<EventHandler>::_line_ended()
1429 {
1430  _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1431  m_state->pos.line,
1432  m_state->line_contents.full.len,
1433  m_state->pos.offset, m_state->pos.offset + m_state->line_contents.full.len - m_state->line_contents.stripped.len,
1434  m_state->pos.col, 1);
1435  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len + 1);
1436  m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len;
1437  ++m_state->pos.line;
1438  m_state->pos.col = 1;
1439 }
1440 
1441 template<class EventHandler>
1442 void ParseEngine<EventHandler>::_line_ended_undo()
1443 {
1444  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->pos.col == 1u);
1445  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->pos.line > 0u);
1446  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len);
1447  const size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len;
1448  _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta);
1449  m_state->pos.offset -= delta;
1450  --m_state->pos.line;
1451  m_state->pos.col = m_state->line_contents.stripped.len + 1u;
1452  // don't forget to undo also the changes to the remainder of the line
1453  //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r');
1454  m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0);
1455 }
1456 
1457 
1458 //-----------------------------------------------------------------------------
1459 template<class EventHandler>
1460 void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1461 {
1462  m_state->indref = indentation;
1463  _c4dbgpf("state[{}]: saving indentation: {}", m_state->level, m_state->indref);
1464 }
1465 
1466 template<class EventHandler>
1467 void ParseEngine<EventHandler>::_save_indentation()
1468 {
1469  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin());
1470  m_state->indref = m_state->line_contents.current_col();
1471  _c4dbgpf("state[{}]: saving indentation: {}", m_state->level, m_state->indref);
1472 }
1473 
1474 
1475 //-----------------------------------------------------------------------------
1476 
1477 template<class EventHandler>
1478 void ParseEngine<EventHandler>::_end_map_blck()
1479 {
1480  _c4dbgp("mapblck: end");
1481  if(has_any(RKCL|RVAL))
1482  {
1483  _c4dbgp("mapblck: set missing val");
1484  _handle_annotations_before_blck_val_scalar();
1485  m_evt_handler->set_val_scalar_plain({});
1486  }
1487  else if(has_any(QMRK))
1488  {
1489  _c4dbgp("mapblck: set missing keyval");
1490  _handle_annotations_before_blck_key_scalar();
1491  m_evt_handler->set_key_scalar_plain({});
1492  _handle_annotations_before_blck_val_scalar();
1493  m_evt_handler->set_val_scalar_plain({});
1494  }
1495  m_evt_handler->end_map();
1496 }
1497 
1498 template<class EventHandler>
1499 void ParseEngine<EventHandler>::_end_seq_blck()
1500 {
1501  if(has_any(RVAL))
1502  {
1503  _c4dbgp("seqblck: set missing val");
1504  _handle_annotations_before_blck_val_scalar();
1505  m_evt_handler->set_val_scalar_plain({});
1506  }
1507  m_evt_handler->end_seq();
1508 }
1509 
1510 template<class EventHandler>
1511 void ParseEngine<EventHandler>::_end2_map()
1512 {
1513  _c4dbgp("map: end");
1514  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
1515  if(has_any(BLCK))
1516  {
1517  _end_map_blck();
1518  }
1519  else
1520  {
1521  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1522  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1523  m_evt_handler->_pop();
1524  }
1525 }
1526 
1527 template<class EventHandler>
1528 void ParseEngine<EventHandler>::_end2_seq()
1529 {
1530  _c4dbgp("seq: end");
1531  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
1532  if(has_any(BLCK))
1533  {
1534  _end_seq_blck();
1535  }
1536  else
1537  {
1538  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
1539  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
1540  m_evt_handler->_pop();
1541  }
1542 }
1543 
1544 template<class EventHandler>
1545 void ParseEngine<EventHandler>::_begin2_doc()
1546 {
1547  m_doc_empty = true;
1548  add_flags(RDOC);
1549  m_evt_handler->begin_doc();
1550  m_evt_handler->m_curr->indref = 0; // ?
1551 }
1552 
1553 template<class EventHandler>
1554 void ParseEngine<EventHandler>::_begin2_doc_expl()
1555 {
1556  m_doc_empty = true;
1557  add_flags(RDOC);
1558  m_evt_handler->begin_doc_expl();
1559  m_evt_handler->m_curr->indref = 0; // ?
1560 }
1561 
1562 template<class EventHandler>
1563 void ParseEngine<EventHandler>::_end2_doc()
1564 {
1565  _c4dbgp("doc: end");
1566  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1567  if(m_doc_empty)
1568  {
1569  _c4dbgp("doc was empty; add empty val");
1570  m_evt_handler->set_val_scalar_plain({});
1571  }
1572  m_evt_handler->end_doc();
1573 }
1574 
1575 template<class EventHandler>
1576 void ParseEngine<EventHandler>::_end2_doc_expl()
1577 {
1578  _c4dbgp("doc: end");
1579  if(m_doc_empty)
1580  {
1581  _c4dbgp("doc: no children; add empty val");
1582  m_evt_handler->set_val_scalar_plain({});
1583  }
1584  m_evt_handler->end_doc_expl();
1585 }
1586 
1587 template<class EventHandler>
1588 void ParseEngine<EventHandler>::_maybe_begin_doc()
1589 {
1590  if(has_none(RDOC))
1591  {
1592  _c4dbgp("doc must be started");
1593  _begin2_doc();
1594  }
1595 }
1596 template<class EventHandler>
1597 void ParseEngine<EventHandler>::_maybe_end_doc()
1598 {
1599  if(has_any(RDOC))
1600  {
1601  _c4dbgp("doc must be finished");
1602  _end2_doc();
1603  }
1604 }
1605 
1606 template<class EventHandler>
1607 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1608 {
1609  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1610  if(m_evt_handler->m_stack[0].flags & RDOC)
1611  {
1612  _c4dbgp("root is RDOC");
1613  if(m_state->level != 0)
1614  _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1615  }
1616  else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1617  {
1618  _c4dbgp("root is STREAM");
1619  if(m_state->level != 1)
1620  _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1621  }
1622  else
1623  {
1624  _c4err("internal error");
1625  }
1626  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
1627 }
1628 
1629 template<class EventHandler>
1630 void ParseEngine<EventHandler>::_end_doc_suddenly()
1631 {
1632  _c4dbgp("end doc suddenly");
1633  _end_doc_suddenly__pop();
1634  _end2_doc_expl();
1635  addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1636 }
1637 
1638 template<class EventHandler>
1639 void ParseEngine<EventHandler>::_start_doc_suddenly()
1640 {
1641  _c4dbgp("start doc suddenly");
1642  _end_doc_suddenly__pop();
1643  _end2_doc();
1644  _begin2_doc_expl();
1645 }
1646 
1647 template<class EventHandler>
1648 void ParseEngine<EventHandler>::_end_stream()
1649 {
1650  _c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id);
1651  if(has_all(RSEQ|FLOW))
1652  _c4err("missing terminating ]");
1653  else if(has_all(RMAP|FLOW))
1654  _c4err("missing terminating }");
1655  if(m_evt_handler->m_stack.size() > 1)
1656  _handle_indentation_pop(m_evt_handler->m_stack.begin());
1657  if(has_all(RDOC))
1658  {
1659  _end2_doc();
1660  }
1661  else if(has_all(RTOP|RUNK))
1662  {
1663  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1664  {
1665  if(m_doc_empty)
1666  {
1667  m_evt_handler->begin_doc();
1668  _handle_annotations_before_blck_val_scalar();
1669  m_evt_handler->set_val_scalar_plain({});
1670  m_evt_handler->end_doc();
1671  }
1672  }
1673  }
1674  m_evt_handler->end_stream();
1675 }
1676 
1677 
1678 template<class EventHandler>
1679 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
1680 {
1681  _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_state->level - popto->level, (((m_state->level - popto->level) > 1) ? "s" : ""), m_state->level, m_state->indref, popto->level, popto->indref);
1682  while(m_state != popto)
1683  {
1684  if(has_any(RSEQ))
1685  {
1686  _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_state->level, m_state->indref, m_state);
1687  _end2_seq();
1688  }
1689  else if(has_any(RMAP))
1690  {
1691  _c4dbgpf("popping map at level {} (indentation={},addr={})", m_state->level, m_state->indref, m_state);
1692  _end2_map();
1693  }
1694  else
1695  {
1696  break;
1697  }
1698  }
1699  _c4dbgpf("current level is {} (indentation={})", m_state->level, m_state->indref);
1700 }
1701 
1702 template<class EventHandler>
1703 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1704 {
1705  // search the stack frame to jump to based on its indentation
1706  using state_type = typename EventHandler::state;
1707  state_type const* popto = nullptr;
1708  auto &stack = m_evt_handler->m_stack;
1709  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1710  _RYML_CB_ASSERT(stack.m_callbacks, m_state >= stack.begin() && m_state < stack.end());
1711  const size_t ind = m_state->line_contents.indentation;
1712  #ifdef RYML_DBG
1713  if(_dbg_enabled())
1714  {
1715  char flagbuf_[128];
1716  for(state_type const& s : stack)
1717  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1718  }
1719  #endif
1720  for(state_type const* s = m_state-1; s >= stack.begin(); --s)
1721  {
1722  _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1723  if(s->indref == ind)
1724  {
1725  _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
1726  popto = s;
1727  break;
1728  }
1729  }
1730  if(!popto || popto >= m_state || popto->level >= m_state->level)
1731  {
1732  _c4err("parse error: incorrect indentation?");
1733  }
1734  _handle_indentation_pop(popto);
1735 }
1736 
1737 template<class EventHandler>
1738 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1739 {
1740  // search the stack frame to jump to based on its indentation
1741  using state_type = typename EventHandler::state;
1742  auto &stack = m_evt_handler->m_stack;
1743  _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
1744  _RYML_CB_ASSERT(stack.m_callbacks, m_state >= stack.begin() && m_state < stack.end());
1745  const size_t ind = m_state->line_contents.indentation;
1746  state_type const* popto = nullptr;
1747  #ifdef RYML_DBG
1748  char flagbuf_[128];
1749  if(_dbg_enabled())
1750  {
1751  for(state_type const& s : stack)
1752  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1753  }
1754  #endif
1755  for(state_type const* s = m_state-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
1756  {
1757  _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1758  if(s->indref < ind)
1759  {
1760  break;
1761  }
1762  else if(s->indref == ind)
1763  {
1764  _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
1765  if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
1766  {
1767  break;
1768  }
1769  popto = s;
1770  if(has_all(RSEQ|BLCK, s))
1771  {
1772  csubstr rem = m_state->line_contents.rem;
1773  const size_t first = rem.first_not_of(' ');
1774  _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
1775  rem = rem.right_of(first, true);
1776  _c4dbgpf("indentless? rem='{}' first={}", rem, first);
1777  if(rem.begins_with('-') && _is_blck_token(rem))
1778  {
1779  _c4dbgp("parent was indentless seq");
1780  break;
1781  }
1782  }
1783  }
1784  }
1785  if(!popto || popto >= m_state || popto->level >= m_state->level)
1786  {
1787  _c4err("parse error: incorrect indentation?");
1788  }
1789  _handle_indentation_pop(popto);
1790 }
1791 
1792 
1793 //-----------------------------------------------------------------------------
1794 template<class EventHandler>
1795 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1796 {
1797  // quoted scalars can spread over multiple lines!
1798  // nice explanation here: http://yaml-multiline.info/
1799 
1800  // a span to the end of the file
1801  size_t b = m_state->pos.offset;
1802  substr s = m_buf.sub(b);
1803  if(s.begins_with(' '))
1804  {
1805  s = s.triml(' ');
1806  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1807  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1808  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1809  }
1810  b = m_state->pos.offset; // take this into account
1811  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
1812 
1813  // skip the opening quote
1814  _line_progressed(1);
1815  s = s.sub(1);
1816 
1817  bool needs_filter = false;
1818 
1819  size_t numlines = 1; // we already have one line
1820  size_t pos = npos; // find the pos of the matching quote
1821  while( ! _finished_file())
1822  {
1823  const csubstr line = m_state->line_contents.rem;
1824  bool line_is_blank = true;
1825  _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line);
1826  for(size_t i = 0; i < line.len; ++i)
1827  {
1828  const char curr = line.str[i];
1829  if(curr == '\'') // single quotes are escaped with two single quotes
1830  {
1831  const char next = i+1 < line.len ? line.str[i+1] : '~';
1832  if(next != '\'') // so just look for the first quote
1833  { // without another after it
1834  pos = i;
1835  break;
1836  }
1837  else
1838  {
1839  needs_filter = true; // needs filter to remove escaped quotes
1840  ++i; // skip the escaped quote
1841  }
1842  }
1843  else if(curr != ' ')
1844  {
1845  line_is_blank = false;
1846  }
1847  }
1848 
1849  // leading whitespace also needs filtering
1850  needs_filter = needs_filter
1851  || (numlines > 1)
1852  || line_is_blank
1853  || (_at_line_begin() && line.begins_with(' '));
1854 
1855  if(pos == npos)
1856  {
1857  _line_progressed(line.len);
1858  ++numlines;
1859  }
1860  else
1861  {
1862  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1863  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\'');
1864  _line_progressed(pos + 1); // progress beyond the quote
1865  pos = m_state->pos.offset - b - 1; // but we stop before it
1866  break;
1867  }
1868 
1869  _line_ended();
1870  _scan_line();
1871  }
1872 
1873  if(pos == npos)
1874  {
1875  _c4err("reached end of file while looking for closing quote");
1876  }
1877  else
1878  {
1879  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1880  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1881  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
1882  s = s.sub(0, pos-1);
1883  }
1884 
1885  _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
1886 
1887  return ScannedScalar { s, needs_filter };
1888 }
1889 
1890 
1891 //-----------------------------------------------------------------------------
1892 template<class EventHandler>
1893 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1894 {
1895  // quoted scalars can spread over multiple lines!
1896  // nice explanation here: http://yaml-multiline.info/
1897 
1898  // a span to the end of the file
1899  size_t b = m_state->pos.offset;
1900  substr s = m_buf.sub(b);
1901  if(s.begins_with(' '))
1902  {
1903  s = s.triml(' ');
1904  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1905  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1906  _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
1907  }
1908  b = m_state->pos.offset; // take this into account
1909  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
1910 
1911  // skip the opening quote
1912  _line_progressed(1);
1913  s = s.sub(1);
1914 
1915  bool needs_filter = false;
1916 
1917  size_t numlines = 1; // we already have one line
1918  size_t pos = npos; // find the pos of the matching quote
1919  while( ! _finished_file())
1920  {
1921  const csubstr line = m_state->line_contents.rem;
1922  bool line_is_blank = true;
1923  _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_state->pos.line, line);
1924  for(size_t i = 0; i < line.len; ++i)
1925  {
1926  const char curr = line.str[i];
1927  if(curr != ' ')
1928  line_is_blank = false;
1929  // every \ is an escape
1930  if(curr == '\\')
1931  {
1932  const char next = i+1 < line.len ? line.str[i+1] : '~';
1933  needs_filter = true;
1934  if(next == '"' || next == '\\')
1935  ++i;
1936  }
1937  else if(curr == '"')
1938  {
1939  pos = i;
1940  break;
1941  }
1942  }
1943 
1944  // leading whitespace also needs filtering
1945  needs_filter = needs_filter
1946  || (numlines > 1)
1947  || line_is_blank
1948  || (_at_line_begin() && line.begins_with(' '));
1949 
1950  if(pos == npos)
1951  {
1952  _line_progressed(line.len);
1953  ++numlines;
1954  }
1955  else
1956  {
1957  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1958  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"');
1959  _line_progressed(pos + 1); // progress beyond the quote
1960  pos = m_state->pos.offset - b - 1; // but we stop before it
1961  break;
1962  }
1963 
1964  _line_ended();
1965  _scan_line();
1966  }
1967 
1968  if(pos == npos)
1969  {
1970  _c4err("reached end of file looking for closing quote");
1971  }
1972  else
1973  {
1974  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1975  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
1976  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1977  s = s.sub(0, pos-1);
1978  }
1979 
1980  _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
1981 
1982  return ScannedScalar { s, needs_filter };
1983 }
1984 
1985 
1986 //-----------------------------------------------------------------------------
1987 template<class EventHandler>
1988 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
1989 {
1990  _c4dbgpf("blck: indref={}", indref);
1991  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
1992 
1993  // nice explanation here: http://yaml-multiline.info/
1994  csubstr s = m_state->line_contents.rem;
1995  csubstr trimmed = s.triml(' ');
1996  if(trimmed.str > s.str)
1997  {
1998  _c4dbgp("skipping whitespace");
1999  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2000  _line_progressed(static_cast<size_t>(trimmed.str - s.str));
2001  s = trimmed;
2002  }
2003  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
2004 
2005  _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
2006 
2007  // parse the spec
2008  BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2009  size_t indentation = npos; // have to find out if no spec is given
2010  csubstr digits;
2011  if(s.len > 1)
2012  {
2013  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
2014  csubstr t = s.sub(1);
2015  _c4dbgpf("blck: spec is multichar: '{}'", t);
2016  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2017  size_t pos = t.first_of("-+");
2018  _c4dbgpf("blck: spec chomp char at {}", pos);
2019  if(pos != npos)
2020  {
2021  if(t[pos] == '-')
2022  chomp = CHOMP_STRIP;
2023  else if(t[pos] == '+')
2024  chomp = CHOMP_KEEP;
2025  if(pos == 0)
2026  t = t.sub(1);
2027  else
2028  t = t.first(pos);
2029  }
2030  // from here to the end, only digits are considered
2031  digits = t.left_of(t.first_not_of("0123456789"));
2032  if( ! digits.empty())
2033  {
2034  if(digits.len > 1)
2035  _c4err("parse error: invalid indentation");
2036  _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2037  if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
2038  _c4err("parse error: could not read indentation as decimal");
2039  if(C4_UNLIKELY( ! indentation))
2040  _c4err("parse error: null indentation");
2041  _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+indref);
2042  indentation += m_state->indref;
2043  }
2044  }
2045 
2046  _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2047 
2048  // finish the current line
2049  _line_progressed(s.len);
2050  _line_ended();
2051  _scan_line();
2052 
2053  // start with a zero-length block, already pointing at the right place
2054  substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0);
2055  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin());
2056 
2057  // read every full line into a raw block,
2058  // from which newlines are to be stripped as needed.
2059  //
2060  // If no explicit indentation was given, pick it from the first
2061  // non-empty line. See
2062  // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2063  size_t num_lines = 0;
2064  size_t first = m_state->pos.line;
2065  size_t provisional_indentation = npos;
2066  LineContents lc;
2067  while(( ! _finished_file()))
2068  {
2069  // peek next line, but do not advance immediately
2070  lc.reset_with_next_line(m_buf, m_state->pos.offset);
2071  _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2072  // evaluate termination conditions
2073  if(indentation != npos)
2074  {
2075  // stop when the line is deindented and not empty
2076  if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))
2077  {
2078  if(raw_block.len)
2079  {
2080  _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2081  }
2082  else
2083  {
2084  _c4err("indentation decreased without any scalar");
2085  }
2086  break;
2087  }
2088  else if(indentation == 0)
2089  {
2090  if(_is_doc_token(lc.rem))
2091  {
2092  _c4dbgp("blck: stop. indentation=0 and doc ended");
2093  break;
2094  }
2095  }
2096  }
2097  else
2098  {
2099  const size_t fns = lc.stripped.first_not_of(' ');
2100  _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2101  if(fns != npos) // non-empty line
2102  {
2104  if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
2105  _c4err("parse error");
2106  )
2107  _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2108  if(provisional_indentation == npos)
2109  {
2110  if(lc.indentation < indref)
2111  {
2112  _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2113  if(raw_block.len == 0)
2114  {
2115  _c4dbgp("blck: was empty, undo next line");
2116  _line_ended_undo();
2117  }
2118  break;
2119  }
2120  else if(lc.indentation == m_state->indref)
2121  {
2122  if(has_any(RSEQ|RMAP))
2123  {
2124  _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref);
2125  break;
2126  }
2127  }
2128  _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2129  indentation = lc.indentation;
2130  }
2131  else
2132  {
2133  if(lc.indentation >= provisional_indentation)
2134  {
2135  _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2136  //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2137  indentation = lc.indentation;
2138  }
2139  else
2140  {
2141  break;
2142  //_c4err("parse error: first non-empty block line should have at least the original indentation");
2143  }
2144  }
2145  }
2146  else // empty line
2147  {
2148  _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2149  if(provisional_indentation != npos)
2150  {
2151  if(lc.stripped.len >= provisional_indentation)
2152  {
2153  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2154  provisional_indentation = lc.stripped.len;
2155  }
2156  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2157  else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
2158  {
2159  _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2160  provisional_indentation = lc.indentation;
2161  }
2162  #endif
2163  }
2164  else
2165  {
2166  provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2167  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2168  if(provisional_indentation == npos)
2169  {
2170  provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
2171  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2172  }
2173  if(provisional_indentation < indref)
2174  {
2175  provisional_indentation = indref;
2176  _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2177  }
2178  }
2179  }
2180  }
2181  // advance now that we know the folded scalar continues
2182  m_state->line_contents = lc;
2183  _c4dbgpf("blck: append '{}'", m_state->line_contents.rem);
2184  raw_block.len += m_state->line_contents.full.len;
2185  _line_progressed(m_state->line_contents.rem.len);
2186  _line_ended();
2187  ++num_lines;
2188  }
2189  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0));
2190  C4_UNUSED(num_lines);
2191  C4_UNUSED(first);
2192 
2193  if(indentation == npos)
2194  {
2195  _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2196  indentation = provisional_indentation;
2197  }
2198 
2199  if(num_lines)
2200  _line_ended_undo();
2201 
2202  _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2203 
2204  sb->scalar = raw_block;
2205  sb->indentation = indentation;
2206  sb->chomp = chomp;
2207 }
2208 
2209 
2210 //-----------------------------------------------------------------------------
2211 //-----------------------------------------------------------------------------
2212 //-----------------------------------------------------------------------------
2213 
2214 // a debugging scaffold:
2215 #if 0
2216 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2217 #else
2218 #define _c4dbgfws(...)
2219 #endif
2220 
2221 template<class EventHandler>
2222 template<class FilterProcessor>
2223 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2224 {
2225  _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2226  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
2227 
2228  const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2229  if(first_pos != npos)
2230  {
2231  const char first_char = proc.src[first_pos];
2232  _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2233  if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2234  {
2235  _c4dbgfws("whitespace is trailing on line", "");
2236  proc.skip(first_pos - proc.rpos);
2237  }
2238  else // a legit whitespace
2239  {
2240  proc.copy();
2241  _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2242  }
2243  return true;
2244  }
2245  _c4dbgfws("whitespace is trailing on line", "");
2246  return false;
2247 }
2248 
2249 template<class EventHandler>
2250 template<class FilterProcessor>
2251 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2252 {
2253  if(!_filter_ws_handle_to_first_non_space(proc))
2254  {
2255  _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2256  proc.copy(proc.src.len - proc.rpos);
2257  }
2258 }
2259 
2260 template<class EventHandler>
2261 template<class FilterProcessor>
2262 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2263 {
2264  if(!_filter_ws_handle_to_first_non_space(proc))
2265  {
2266  _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2267  proc.skip(proc.src.len - proc.rpos);
2268  }
2269 }
2270 
2271 #undef _c4dbgfws
2272 
2273 
2274 //-----------------------------------------------------------------------------
2275 //-----------------------------------------------------------------------------
2276 //-----------------------------------------------------------------------------
2277 /* plain scalars */
2278 
2279 // a debugging scaffold:
2280 #if 0
2281 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2282 #else
2283 #define _c4dbgfps(fmt, ...)
2284 #endif
2285 
2286 template<class EventHandler>
2287 template<class FilterProcessor>
2288 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2289 {
2290  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2291 
2292  _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2293  size_t ii = proc.rpos;
2294  const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2295  if(numnl_following)
2296  {
2297  proc.set('\n', numnl_following);
2298  _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2299  }
2300  else
2301  {
2302  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2303  if(ret != npos)
2304  {
2305  proc.set(' ');
2306  _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2307  }
2308  else
2309  {
2310  _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2311  ii = proc.src.len;
2312  }
2313  }
2314  proc.rpos = ii;
2315 }
2316 
2317 template<class EventHandler>
2318 template<class FilterProcessor>
2319 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2320 {
2321  _RYML_CB_ASSERT(this->callbacks(), indentation != npos);
2322  _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2323 
2324  while(proc.has_more_chars())
2325  {
2326  const char curr = proc.curr();
2327  _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2328  switch(curr)
2329  {
2330  case ' ':
2331  _RYML_WITH_TAB_TOKENS(case '\t':)
2332  _c4dbgfps("whitespace", curr);
2333  _filter_ws_skip_trailing(proc);
2334  break;
2335  case '\n':
2336  _c4dbgfps("newline", curr);
2337  _filter_nl_plain(proc, /*indentation*/indentation);
2338  break;
2339  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2340  _c4dbgfps("carriage return, ignore", curr);
2341  proc.skip();
2342  break;
2343  default:
2344  proc.copy();
2345  break;
2346  }
2347  }
2348 
2349  _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2350 
2351  return proc.result();
2352 }
2353 
2354 #undef _c4dbgfps
2355 
2356 
2357 template<class EventHandler>
2358 FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
2359 {
2360  FilterProcessorSrcDst proc(scalar, dst);
2361  return _filter_plain(proc, indentation);
2362 }
2363 
2364 template<class EventHandler>
2365 FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
2366 {
2367  FilterProcessorInplaceEndExtending proc(dst, cap);
2368  return _filter_plain(proc, indentation);
2369 }
2370 
2371 
2372 //-----------------------------------------------------------------------------
2373 //-----------------------------------------------------------------------------
2374 //-----------------------------------------------------------------------------
2375 /* single quoted */
2376 
2377 // a debugging scaffold:
2378 #if 0
2379 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2380 #else
2381 #define _c4dbgfsq(fmt, ...)
2382 #endif
2383 
2384 template<class EventHandler>
2385 template<class FilterProcessor>
2386 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2387 {
2388  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2389 
2390  _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2391  size_t ii = proc.rpos;
2392  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2393  if(numnl_following)
2394  {
2395  proc.set('\n', numnl_following);
2396  _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2397  }
2398  else
2399  {
2400  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2401  if(ret != npos)
2402  {
2403  proc.set(' ');
2404  _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2405  }
2406  else
2407  {
2408  proc.set(' ');
2409  _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2410  }
2411  }
2412  proc.rpos = ii;
2413 }
2414 
2415 template<class EventHandler>
2416 template<class FilterProcessor>
2417 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2418 {
2419  _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2420 
2421  // from the YAML spec for double-quoted scalars:
2422  // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2423  while(proc.has_more_chars())
2424  {
2425  const char curr = proc.curr();
2426  _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2427  switch(curr)
2428  {
2429  case ' ':
2430  case '\t':
2431  _c4dbgfsq("whitespace", curr);
2432  _filter_ws_copy_trailing(proc);
2433  break;
2434  case '\n':
2435  _c4dbgfsq("newline", curr);
2436  _filter_nl_squoted(proc);
2437  break;
2438  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2439  _c4dbgfsq("skip cr", curr);
2440  proc.skip();
2441  break;
2442  case '\'':
2443  _c4dbgfsq("squote", curr);
2444  if(proc.next() == '\'')
2445  {
2446  _c4dbgfsq("two consecutive squotes", curr);
2447  proc.skip();
2448  proc.copy();
2449  }
2450  else
2451  {
2452  _c4err("filter error");
2453  }
2454  break;
2455  default:
2456  proc.copy();
2457  break;
2458  }
2459  }
2460 
2461  _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2462 
2463  return proc.result();
2464 }
2465 
2466 #undef _c4dbgfsq
2467 
2468 template<class EventHandler>
2469 FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
2470 {
2471  FilterProcessorSrcDst proc(scalar, dst);
2472  return _filter_squoted(proc);
2473 }
2474 
2475 template<class EventHandler>
2477 {
2478  FilterProcessorInplaceEndExtending proc(dst, cap);
2479  return _filter_squoted(proc);
2480 }
2481 
2482 
2483 //-----------------------------------------------------------------------------
2484 //-----------------------------------------------------------------------------
2485 //-----------------------------------------------------------------------------
2486 /* double quoted */
2487 
2488 // a debugging scaffold:
2489 #if 0
2490 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2491 #else
2492 #define _c4dbgfdq(...)
2493 #endif
2494 
2495 template<class EventHandler>
2496 template<class FilterProcessor>
2497 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2498 {
2499  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
2500 
2501  _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2502  size_t ii = proc.rpos;
2503  const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2504  if(numnl_following)
2505  {
2506  proc.set('\n', numnl_following);
2507  _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2508  }
2509  else
2510  {
2511  const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2512  if(ret != npos)
2513  {
2514  proc.set(' ');
2515  _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2516  }
2517  else
2518  {
2519  proc.set(' ');
2520  _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2521  }
2522  if(ii < proc.src.len && proc.src.str[ii] == '\\')
2523  {
2524  _c4dbgfdq("backslash at [{}]", ii);
2525  const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2526  if(next == ' ' || next == '\t')
2527  {
2528  _c4dbgfdq("extend skip to backslash", "");
2529  ++ii;
2530  }
2531  }
2532  }
2533  proc.rpos = ii;
2534 }
2535 
2536 template<class EventHandler>
2537 template<class FilterProcessor>
2538 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2539 {
2540  char next = proc.next();
2541  _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2542  if(next == '\r')
2543  {
2544  if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2545  {
2546  proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2547  next = '\n';
2548  _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2549  }
2550  }
2551 
2552  if(next == '\n')
2553  {
2554  size_t ii = proc.rpos + 2;
2555  for( ; ii < proc.src.len; ++ii)
2556  {
2557  // skip leading whitespace
2558  if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2559  ;
2560  else
2561  break;
2562  }
2563  proc.skip(ii - proc.rpos);
2564  }
2565  else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2566  {
2567  // escapes for json compatibility
2568  proc.translate_esc(next);
2569  _c4dbgfdq("here, used '{}'", _c4prc(next));
2570  }
2571  else if(next == '\r')
2572  {
2573  proc.skip();
2574  }
2575  else if(next == 'n')
2576  {
2577  proc.translate_esc('\n');
2578  }
2579  else if(next == 'r')
2580  {
2581  proc.translate_esc('\r');
2582  }
2583  else if(next == 't')
2584  {
2585  proc.translate_esc('\t');
2586  }
2587  else if(next == '\\')
2588  {
2589  proc.translate_esc('\\');
2590  }
2591  else if(next == 'x') // UTF8
2592  {
2593  if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2594  _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2595  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2596  _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2597  uint8_t byteval = {};
2598  if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
2599  _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
2600  proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
2601  _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2602  }
2603  else if(next == 'u') // UTF16
2604  {
2605  if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2606  _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2607  char readbuf[8];
2608  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2609  uint32_t codepoint_val = {};
2610  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2611  _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2612  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2613  if(C4_UNLIKELY(numbytes == 0))
2614  _c4err("failed to decode code point={}", proc.rpos);
2615  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2616  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
2617  }
2618  else if(next == 'U') // UTF32
2619  {
2620  if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2621  _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2622  char readbuf[8];
2623  csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2624  uint32_t codepoint_val = {};
2625  if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2626  _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2627  const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2628  if(C4_UNLIKELY(numbytes == 0))
2629  _c4err("failed to decode code point={}", proc.rpos);
2630  _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2631  proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
2632  }
2633  // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2634  else if(next == '0')
2635  {
2636  proc.translate_esc('\0');
2637  }
2638  else if(next == 'b') // backspace
2639  {
2640  proc.translate_esc('\b');
2641  }
2642  else if(next == 'f') // form feed
2643  {
2644  proc.translate_esc('\f');
2645  }
2646  else if(next == 'a') // bell character
2647  {
2648  proc.translate_esc('\a');
2649  }
2650  else if(next == 'v') // vertical tab
2651  {
2652  proc.translate_esc('\v');
2653  }
2654  else if(next == 'e') // escape character
2655  {
2656  proc.translate_esc('\x1b');
2657  }
2658  else if(next == '_') // unicode non breaking space \u00a0
2659  {
2660  // https://www.compart.com/en/unicode/U+00a0
2661  const char payload[] = {
2662  _RYML_CHCONST(-0x3e, 0xc2),
2663  _RYML_CHCONST(-0x60, 0xa0),
2664  };
2665  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2666  }
2667  else if(next == 'N') // unicode next line \u0085
2668  {
2669  // https://www.compart.com/en/unicode/U+0085
2670  const char payload[] = {
2671  _RYML_CHCONST(-0x3e, 0xc2),
2672  _RYML_CHCONST(-0x7b, 0x85),
2673  };
2674  proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2675  }
2676  else if(next == 'L') // unicode line separator \u2028
2677  {
2678  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2679  const char payload[] = {
2680  _RYML_CHCONST(-0x1e, 0xe2),
2681  _RYML_CHCONST(-0x80, 0x80),
2682  _RYML_CHCONST(-0x58, 0xa8),
2683  };
2684  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2685  }
2686  else if(next == 'P') // unicode paragraph separator \u2029
2687  {
2688  // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2689  const char payload[] = {
2690  _RYML_CHCONST(-0x1e, 0xe2),
2691  _RYML_CHCONST(-0x80, 0x80),
2692  _RYML_CHCONST(-0x57, 0xa9),
2693  };
2694  proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2695  }
2696  else if(next == '\0')
2697  {
2698  proc.skip();
2699  }
2700  else
2701  {
2702  _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2703  }
2704  _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2705 }
2706 
2707 
2708 template<class EventHandler>
2709 template<class FilterProcessor>
2710 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2711 {
2712  _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
2713  // from the YAML spec for double-quoted scalars:
2714  // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
2715  while(proc.has_more_chars())
2716  {
2717  const char curr = proc.curr();
2718  _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2719  switch(curr)
2720  {
2721  case ' ':
2722  case '\t':
2723  {
2724  _c4dbgfdq("whitespace", curr);
2725  _filter_ws_copy_trailing(proc);
2726  break;
2727  }
2728  case '\n':
2729  {
2730  _c4dbgfdq("newline", curr);
2731  _filter_nl_dquoted(proc);
2732  break;
2733  }
2734  case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2735  {
2736  _c4dbgfdq("carriage return, ignore", curr);
2737  proc.skip();
2738  break;
2739  }
2740  case '\\':
2741  {
2742  _filter_dquoted_backslash(proc);
2743  break;
2744  }
2745  default:
2746  {
2747  proc.copy();
2748  break;
2749  }
2750  }
2751  }
2752  _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2753  return proc.result();
2754 }
2755 
2756 #undef _c4dbgfdq
2757 
2758 
2759 template<class EventHandler>
2760 FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
2761 {
2762  FilterProcessorSrcDst proc(scalar, dst);
2763  return _filter_dquoted(proc);
2764 }
2765 
2766 template<class EventHandler>
2767 FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
2768 {
2769  FilterProcessorInplaceMidExtending proc(dst, cap);
2770  return _filter_dquoted(proc);
2771 }
2772 
2773 
2774 //-----------------------------------------------------------------------------
2775 //-----------------------------------------------------------------------------
2776 //-----------------------------------------------------------------------------
2777 // block filtering helpers
2778 
2779 template<class EventHandler>
2780 template<class FilterProcessor>
2781 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
2782 {
2783  _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2784  _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
2785 
2786  // a debugging scaffold:
2787  #if 0
2788  #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2789  #else
2790  #define _c4dbgchomp(...)
2791  #endif
2792 
2793  // advance to the last line having spaces beyond the indentation
2794  {
2795  size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2796  if(last != npos)
2797  {
2798  _c4dbgchomp("found newline and larger indentation. last={}", last);
2799  last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
2800  _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2801  // remove indentation spaces, copy the rest
2802  while((proc.rpos < last) && proc.has_more_chars())
2803  {
2804  const char curr = proc.curr();
2805  _c4dbgchomp("curr='{}'", _c4prc(curr));
2806  switch(curr)
2807  {
2808  case '\n':
2809  {
2810  _c4dbgchomp("newline! remlen={}", proc.rem().len);
2811  proc.copy();
2812  // are there spaces after the newline?
2813  csubstr at_next_line = proc.rem();
2814  if(at_next_line.begins_with(' '))
2815  {
2816  _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
2817  // there are spaces.
2818  size_t first_non_space = at_next_line.first_not_of(' ');
2819  _c4dbgchomp("first_non_space={}", first_non_space);
2820  if(first_non_space == npos)
2821  {
2822  _c4dbgchomp("{} spaces, to the end", at_next_line.len);
2823  first_non_space = at_next_line.len;
2824  }
2825  if(first_non_space <= indentation)
2826  {
2827  _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
2828  proc.skip(first_non_space);
2829  }
2830  else
2831  {
2832  _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
2833  proc.skip(indentation);
2834  // copy the spaces after the indentation
2835  _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2836  proc.copy(first_non_space - indentation);
2837  }
2838  }
2839  break;
2840  }
2841  case '\r':
2842  proc.skip();
2843  break;
2844  default:
2845  _c4err("parse error");
2846  break;
2847  }
2848  }
2849  }
2850  }
2851 
2852  // from now on, we only have line ends (or indentation spaces)
2853  switch(chomp)
2854  {
2855  case CHOMP_CLIP:
2856  {
2857  bool had_one = false;
2858  while(proc.has_more_chars())
2859  {
2860  const char curr = proc.curr();
2861  _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
2862  switch(curr)
2863  {
2864  case '\n':
2865  {
2866  _c4dbgchomp("copy newline!", curr);
2867  proc.copy();
2868  proc.set_at_end();
2869  had_one = true;
2870  break;
2871  }
2872  case ' ':
2873  case '\r':
2874  _c4dbgchomp("skip!", curr);
2875  proc.skip();
2876  break;
2877  }
2878  }
2879  if(!had_one) // there were no newline characters. add one.
2880  {
2881  _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
2882  proc.set('\n');
2883  }
2884  break;
2885  }
2886  case CHOMP_KEEP:
2887  {
2888  _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2889  while(proc.has_more_chars())
2890  {
2891  const char curr = proc.curr();
2892  _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
2893  switch(curr)
2894  {
2895  case '\n':
2896  _c4dbgchomp("copy newline!", curr);
2897  proc.copy();
2898  break;
2899  case ' ':
2900  case '\r':
2901  _c4dbgchomp("skip!", curr);
2902  proc.skip();
2903  break;
2904  }
2905  }
2906  break;
2907  }
2908  case CHOMP_STRIP:
2909  {
2910  _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
2911  // nothing to do!
2912  break;
2913  }
2914  }
2915 
2916  #undef _c4dbgchomp
2917 }
2918 
2919 
2920 // a debugging scaffold:
2921 #if 0
2922 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2923 #else
2924 #define _c4dbgfb(...)
2925 #endif
2926 
2927 template<class EventHandler>
2928 template<class FilterProcessor>
2929 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2930 {
2931  csubstr rem = proc.rem(); // remaining
2932  if(rem.len)
2933  {
2934  size_t first = rem.first_not_of(' ');
2935  if(first != npos)
2936  {
2937  _c4dbgfb("{} spaces follow before next nonws character", first);
2938  if(first < indentation)
2939  {
2940  _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
2941  proc.skip(first);
2942  }
2943  else
2944  {
2945  _c4dbgfb("skip {} spaces from indentation", indentation);
2946  proc.skip(indentation);
2947  }
2948  }
2949  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2950  else
2951  {
2952  _c4dbgfb("all spaces to the end: {} spaces", first);
2953  first = rem.len;
2954  if(first)
2955  {
2956  if(first < indentation)
2957  {
2958  _c4dbgfb("skip everything", first);
2959  proc.skip(proc.src.len - proc.rpos);
2960  }
2961  else
2962  {
2963  _c4dbgfb("skip {} spaces from indentation", indentation);
2964  proc.skip(indentation);
2965  }
2966  }
2967  }
2968  #endif
2969  }
2970 }
2971 
2972 template<class EventHandler>
2973 template<class FilterProcessor>
2974 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
2975 {
2976  csubstr contents = proc.src.trimr(" \n\r");
2977  _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
2978  if(!contents.len)
2979  {
2980  _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
2981  if(chomp == CHOMP_KEEP && proc.src.len)
2982  {
2983  _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
2984  while(proc.has_more_chars())
2985  {
2986  const char curr = proc.curr();
2987  if(curr == '\n')
2988  proc.copy();
2989  else
2990  proc.skip();
2991  }
2992  if(!proc.wpos)
2993  {
2994  proc.set('\n');
2995  }
2996  }
2997  }
2998  return contents.len;
2999 }
3000 
3001 template<class EventHandler>
3002 template<class FilterProcessor>
3003 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3004 {
3005  _c4dbgfb("contents_len={}", contents_len);
3006 
3007  _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3008 
3009  // extend contents to just before the first newline at the end,
3010  // in case it is preceded by spaces
3011  size_t firstnewl = proc.src.first_of('\n', contents_len);
3012  if(firstnewl != npos)
3013  {
3014  contents_len = firstnewl;
3015  _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3016  }
3017  else
3018  {
3019  contents_len = proc.src.len;
3020  _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3021  }
3022 
3023  return contents_len;
3024 }
3025 
3026 #undef _c4dbgfb
3027 
3028 
3029 //-----------------------------------------------------------------------------
3030 //-----------------------------------------------------------------------------
3031 //-----------------------------------------------------------------------------
3032 
3033 // a debugging scaffold:
3034 #if 0
3035 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3036 #else
3037 #define _c4dbgfbl(...)
3038 #endif
3039 
3040 template<class EventHandler>
3041 template<class FilterProcessor>
3042 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3043 {
3044  _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3045 
3046  size_t contents_len = _handle_all_whitespace(proc, chomp);
3047  if(!contents_len)
3048  return proc.result();
3049 
3050  contents_len = _extend_to_chomp(proc, contents_len);
3051 
3052  _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3053 
3054  _filter_block_indentation(proc, indentation);
3055 
3056  // now filter the bulk
3057  while(proc.has_more_chars(/*maxpos*/contents_len))
3058  {
3059  const char curr = proc.curr();
3060  _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3061  switch(curr)
3062  {
3063  case '\n':
3064  {
3065  _c4dbgfbl("found newline. skip indentation on the next line", curr);
3066  proc.copy(); // copy the newline
3067  _filter_block_indentation(proc, indentation);
3068  break;
3069  }
3070  case '\r':
3071  proc.skip();
3072  break;
3073  default:
3074  proc.copy();
3075  break;
3076  }
3077  }
3078 
3079  _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3080 
3081  _filter_chomp(proc, chomp, indentation);
3082 
3083  _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3084 
3085  return proc.result();
3086 }
3087 
3088 #undef _c4dbgfbl
3089 
3090 template<class EventHandler>
3091 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3092 {
3093  FilterProcessorSrcDst proc(scalar, dst);
3094  return _filter_block_literal(proc, indentation, chomp);
3095 }
3096 
3097 template<class EventHandler>
3098 FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3099 {
3100  FilterProcessorInplaceEndExtending proc(scalar, cap);
3101  return _filter_block_literal(proc, indentation, chomp);
3102 }
3103 
3104 
3105 //-----------------------------------------------------------------------------
3106 //-----------------------------------------------------------------------------
3107 //-----------------------------------------------------------------------------
3108 
3109 // a debugging scaffold:
3110 #if 0
3111 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3112 #else
3113 #define _c4dbgfbf(...)
3114 #endif
3115 
3116 
3117 template<class EventHandler>
3118 template<class FilterProcessor>
3119 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3120 {
3121  _filter_block_indentation(proc, indentation);
3122  while(proc.has_more_chars(len))
3123  {
3124  const char curr = proc.curr();
3125  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3126  switch(curr)
3127  {
3128  case '\n':
3129  _c4dbgfbf("newline.", curr);
3130  proc.copy();
3131  _filter_block_indentation(proc, indentation);
3132  break;
3133  case '\r':
3134  proc.skip();
3135  break;
3136  case ' ':
3137  case '\t':
3138  {
3139  size_t first = proc.rem().first_not_of(" \t");
3140  _c4dbgfbf("space. first={}", first);
3141  if(first == npos)
3142  first = proc.rem().len;
3143  _c4dbgfbf("... indentation increased to {}", first);
3144  _filter_block_folded_indented_block(proc, indentation, len, first);
3145  break;
3146  }
3147  default:
3148  _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3149  return;
3150  }
3151  }
3152 }
3153 
3154 template<class EventHandler>
3155 template<class FilterProcessor>
3156 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3157 {
3158  switch(num_newl)
3159  {
3160  case 1u:
3161  _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3162  wpos_at_first_newl = proc.wpos;
3163  proc.skip();
3164  proc.set(' ');
3165  break;
3166  case 2u:
3167  _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3168  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
3169  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
3170  _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3171  proc.skip();
3172  proc.set_at(wpos_at_first_newl, '\n');
3173  _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
3174  break;
3175  default:
3176  _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3177  proc.copy();
3178  break;
3179  }
3180  return wpos_at_first_newl;
3181 }
3182 
3183 template<class EventHandler>
3184 template<class FilterProcessor>
3185 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3186 {
3187  _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
3188  size_t num_newl = 0;
3189  size_t wpos_at_first_newl = npos;
3190  while(proc.has_more_chars(len))
3191  {
3192  const char curr = proc.curr();
3193  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3194  switch(curr)
3195  {
3196  case '\n':
3197  {
3198  _c4dbgfbf("newline. sofar={}", num_newl);
3199  // NOTE: vs2022-32bit-release builds were giving wrong
3200  // results in this block, if it was written as either
3201  // as a switch(num_newl) or its equivalent if-form.
3202  //
3203  // For this reason, we're using a dedicated function
3204  // (**_compress), which seems to work around the issue.
3205  //
3206  // The manifested problem was that somewhere between the
3207  // assignment to curr and this point, proc.wpos (the
3208  // write-position of the processor) jumped to npos, which
3209  // made the write wrap-around! To make things worse,
3210  // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3211  // problem go away!
3212  //
3213  // The only way to make the problem appear with prints
3214  // enabled was by disabling all prints in this function
3215  // (including in the block which was moved to the compress
3216  // function) and then selectively enabling only some of
3217  // those prints.
3218  //
3219  // This may be due to some bug in the cl-x86 optimizer; or
3220  // it may be triggered by some UB which may be
3221  // inadvertedly present in this function or in the filter
3222  // processor. This is despite our best efforts to weed out
3223  // any such UB problem: neither clang-tidy nor none of the
3224  // sanitizers, or gcc's -fanalyzer pointed to any problems
3225  // in this code.
3226  //
3227  // In the end, moving this block to a separate function
3228  // was the only way to bury the problem. But it may
3229  // resurface again, as The Undead, rising to from the
3230  // grave to haunt us with his terrible
3231  //
3232  // We may have to revisit this. With a stake, and lots of
3233  // garlic.
3234  wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3235  _filter_block_indentation(proc, indentation);
3236  break;
3237  }
3238  case ' ':
3239  case '\t':
3240  {
3241  size_t first = proc.rem().first_not_of(" \t");
3242  _c4dbgfbf("space. first={}", first);
3243  if(first == npos)
3244  first = proc.rem().len;
3245  _c4dbgfbf("... indentation increased to {}", first);
3246  if(num_newl)
3247  {
3248  _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3249  proc.set_at(wpos_at_first_newl, '\n');
3250  }
3251  if(num_newl > 1u)
3252  {
3253  _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3254  proc.set('\n');
3255  }
3256  _filter_block_folded_indented_block(proc, indentation, len, first);
3257  num_newl = 0;
3258  wpos_at_first_newl = npos;
3259  break;
3260  }
3261  case '\r':
3262  proc.skip();
3263  break;
3264  default:
3265  _c4dbgfbf("not space, not newline. stop.", 0);
3266  return;
3267  }
3268  }
3269 }
3270 
3271 
3272 template<class EventHandler>
3273 template<class FilterProcessor>
3274 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3275 {
3276  _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
3277  if(curr_indentation)
3278  proc.copy(curr_indentation);
3279  while(proc.has_more_chars(len))
3280  {
3281  const char curr = proc.curr();
3282  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3283  switch(curr)
3284  {
3285  case '\n':
3286  {
3287  proc.copy();
3288  _filter_block_indentation(proc, indentation);
3289  csubstr rem = proc.rem();
3290  const size_t first = rem.first_not_of(' ');
3291  _c4dbgfbf("newline. firstns={}", first);
3292  if(first == 0)
3293  {
3294  const char c = rem[first];
3295  _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3296  if(c == '\n' || c == '\r')
3297  {
3298  ;
3299  }
3300  else
3301  {
3302  _c4dbgfbf("done with indented block", first);
3303  goto endloop;
3304  }
3305  }
3306  else if(first != npos)
3307  {
3308  proc.copy(first);
3309  _c4dbgfbf("copy all {} spaces", first);
3310  }
3311  break;
3312  }
3313  break;
3314  case '\r':
3315  proc.skip();
3316  break;
3317  default:
3318  proc.copy();
3319  break;
3320  }
3321  }
3322  endloop:
3323  return;
3324 }
3325 
3326 
3327 template<class EventHandler>
3328 template<class FilterProcessor>
3329 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3330 {
3331  _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3332 
3333  size_t contents_len = _handle_all_whitespace(proc, chomp);
3334  if(!contents_len)
3335  return proc.result();
3336 
3337  contents_len = _extend_to_chomp(proc, contents_len);
3338 
3339  _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3340 
3341  _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3342 
3343  // now filter the bulk
3344  while(proc.has_more_chars(/*maxpos*/contents_len))
3345  {
3346  const char curr = proc.curr();
3347  _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3348  switch(curr)
3349  {
3350  case '\n':
3351  {
3352  _c4dbgfbf("found newline", curr);
3353  _filter_block_folded_newlines(proc, indentation, contents_len);
3354  break;
3355  }
3356  case '\r':
3357  proc.skip();
3358  break;
3359  default:
3360  proc.copy();
3361  break;
3362  }
3363  }
3364 
3365  _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3366 
3367  _filter_chomp(proc, chomp, indentation);
3368 
3369  _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3370 
3371  return proc.result();
3372 }
3373 
3374 #undef _c4dbgfbf
3375 
3376 template<class EventHandler>
3377 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3378 {
3379  FilterProcessorSrcDst proc(scalar, dst);
3380  return _filter_block_folded(proc, indentation, chomp);
3381 }
3382 
3383 template<class EventHandler>
3384 FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3385 {
3386  FilterProcessorInplaceEndExtending proc(scalar, cap);
3387  return _filter_block_folded(proc, indentation, chomp);
3388 }
3389 
3390 
3391 //-----------------------------------------------------------------------------
3392 //-----------------------------------------------------------------------------
3393 //-----------------------------------------------------------------------------
3394 
3395 template<class EventHandler>
3396 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
3397 {
3398  _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3399  FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3400  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3401  _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3402  return r.get();
3403 }
3404 
3405 //-----------------------------------------------------------------------------
3406 
3407 template<class EventHandler>
3408 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3409 {
3410  _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3411  FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3412  _RYML_CB_ASSERT(this->callbacks(), r.valid());
3413  _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3414  return r.get();
3415 }
3416 
3417 
3418 //-----------------------------------------------------------------------------
3419 
3420 template<class EventHandler>
3421 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3422 {
3423  _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3424  FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3425  if(C4_LIKELY(r.valid()))
3426  {
3427  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3428  return r.get();
3429  }
3430  else
3431  {
3432  const size_t len = r.required_len();
3433  _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3434  substr dst = m_evt_handler->alloc_arena(len, &s);
3435  _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3436  _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3437  FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3438  _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3439  _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
3440  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3441  _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3442  return rsd.get();
3443  }
3444 }
3445 
3446 
3447 //-----------------------------------------------------------------------------
3448 template<class EventHandler>
3449 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3450 {
3451  _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3452  FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3453  if(C4_LIKELY(r.valid()))
3454  {
3455  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3456  return r.get();
3457  }
3458  else
3459  {
3460  _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3461  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3462  FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3463  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3464  _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3465  return rsd.get();
3466  }
3467 }
3468 
3469 
3470 //-----------------------------------------------------------------------------
3471 template<class EventHandler>
3472 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3473 {
3474  _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3475  FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3476  if(C4_LIKELY(r.valid()))
3477  {
3478  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3479  return r.get();
3480  }
3481  else
3482  {
3483  _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3484  substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3485  FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3486  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3487  _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3488  return rsd.get();
3489  }
3490 }
3491 
3492 
3493 //-----------------------------------------------------------------------------
3494 
3495 template<class EventHandler>
3496 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3497 {
3498  csubstr maybe_filtered = sc.scalar;
3499  if(sc.needs_filter)
3500  {
3501  if(m_options.scalar_filtering())
3502  {
3503  maybe_filtered = _filter_scalar_plain(sc.scalar, indentation);
3504  }
3505  else
3506  {
3507  _c4dbgp("plain scalar left unfiltered");
3508  m_evt_handler->mark_key_scalar_unfiltered();
3509  }
3510  }
3511  else
3512  {
3513  _c4dbgp("plain scalar doesn't need filtering");
3514  }
3515  return maybe_filtered;
3516 }
3517 
3518 template<class EventHandler>
3519 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3520 {
3521  csubstr maybe_filtered = sc.scalar;
3522  if(sc.needs_filter)
3523  {
3524  if(m_options.scalar_filtering())
3525  {
3526  maybe_filtered = _filter_scalar_plain(sc.scalar, indentation);
3527  }
3528  else
3529  {
3530  _c4dbgp("plain scalar left unfiltered");
3531  m_evt_handler->mark_val_scalar_unfiltered();
3532  }
3533  }
3534  else
3535  {
3536  _c4dbgp("plain scalar doesn't need filtering");
3537  }
3538  return maybe_filtered;
3539 }
3540 
3541 
3542 //-----------------------------------------------------------------------------
3543 
3544 template<class EventHandler>
3545 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3546 {
3547  csubstr maybe_filtered = sc.scalar;
3548  if(sc.needs_filter)
3549  {
3550  if(m_options.scalar_filtering())
3551  {
3552  maybe_filtered = _filter_scalar_squot(sc.scalar);
3553  }
3554  else
3555  {
3556  _c4dbgp("squo key scalar left unfiltered");
3557  m_evt_handler->mark_key_scalar_unfiltered();
3558  }
3559  }
3560  else
3561  {
3562  _c4dbgp("squo key scalar doesn't need filtering");
3563  }
3564  return maybe_filtered;
3565 }
3566 
3567 template<class EventHandler>
3568 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3569 {
3570  csubstr maybe_filtered = sc.scalar;
3571  if(sc.needs_filter)
3572  {
3573  if(m_options.scalar_filtering())
3574  {
3575  maybe_filtered = _filter_scalar_squot(sc.scalar);
3576  }
3577  else
3578  {
3579  _c4dbgp("squo val scalar left unfiltered");
3580  m_evt_handler->mark_val_scalar_unfiltered();
3581  }
3582  }
3583  else
3584  {
3585  _c4dbgp("squo val scalar doesn't need filtering");
3586  }
3587  return maybe_filtered;
3588 }
3589 
3590 
3591 //-----------------------------------------------------------------------------
3592 
3593 template<class EventHandler>
3594 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3595 {
3596  csubstr maybe_filtered = sc.scalar;
3597  if(sc.needs_filter)
3598  {
3599  if(m_options.scalar_filtering())
3600  {
3601  maybe_filtered = _filter_scalar_dquot(sc.scalar);
3602  }
3603  else
3604  {
3605  _c4dbgp("dquo scalar left unfiltered");
3606  m_evt_handler->mark_key_scalar_unfiltered();
3607  }
3608  }
3609  else
3610  {
3611  _c4dbgp("dquo scalar doesn't need filtering");
3612  }
3613  return maybe_filtered;
3614 }
3615 
3616 template<class EventHandler>
3617 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3618 {
3619  csubstr maybe_filtered = sc.scalar;
3620  if(sc.needs_filter)
3621  {
3622  if(m_options.scalar_filtering())
3623  {
3624  maybe_filtered = _filter_scalar_dquot(sc.scalar);
3625  }
3626  else
3627  {
3628  _c4dbgp("dquo scalar left unfiltered");
3629  m_evt_handler->mark_val_scalar_unfiltered();
3630  }
3631  }
3632  else
3633  {
3634  _c4dbgp("dquo scalar doesn't need filtering");
3635  }
3636  return maybe_filtered;
3637 }
3638 
3639 
3640 //-----------------------------------------------------------------------------
3641 
3642 template<class EventHandler>
3643 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3644 {
3645  csubstr maybe_filtered = sb.scalar;
3646  if(m_options.scalar_filtering())
3647  {
3648  maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3649  }
3650  else
3651  {
3652  _c4dbgp("literal scalar left unfiltered");
3653  m_evt_handler->mark_key_scalar_unfiltered();
3654  }
3655  return maybe_filtered;
3656 }
3657 
3658 template<class EventHandler>
3659 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
3660 {
3661  csubstr maybe_filtered = sb.scalar;
3662  if(m_options.scalar_filtering())
3663  {
3664  maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3665  }
3666  else
3667  {
3668  _c4dbgp("literal scalar left unfiltered");
3669  m_evt_handler->mark_val_scalar_unfiltered();
3670  }
3671  return maybe_filtered;
3672 }
3673 
3674 
3675 //-----------------------------------------------------------------------------
3676 
3677 template<class EventHandler>
3678 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3679 {
3680  csubstr maybe_filtered = sb.scalar;
3681  if(m_options.scalar_filtering())
3682  {
3683  maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3684  }
3685  else
3686  {
3687  _c4dbgp("folded scalar left unfiltered");
3688  m_evt_handler->mark_key_scalar_unfiltered();
3689  }
3690  return maybe_filtered;
3691 }
3692 
3693 template<class EventHandler>
3694 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
3695 {
3696  csubstr maybe_filtered = sb.scalar;
3697  if(m_options.scalar_filtering())
3698  {
3699  maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3700  }
3701  else
3702  {
3703  _c4dbgp("folded scalar left unfiltered");
3704  m_evt_handler->mark_val_scalar_unfiltered();
3705  }
3706  return maybe_filtered;
3707 }
3708 
3709 
3710 //-----------------------------------------------------------------------------
3711 //-----------------------------------------------------------------------------
3712 //-----------------------------------------------------------------------------
3713 
3714 #ifdef RYML_DBG // !!! <----------------------------------
3715 
3716 template<class EventHandler>
3717 void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
3718 {
3719  char buf1_[64], buf2_[64], buf3_[64];
3720  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3721  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3722  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3723  _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3724  s->flags |= on;
3725 }
3726 
3727 template<class EventHandler>
3728 void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
3729 {
3730  char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3731  csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3732  csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3733  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3734  csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3735  _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3736  s->flags |= on;
3737  s->flags &= ~off;
3738 }
3739 
3740 template<class EventHandler>
3741 void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
3742 {
3743  char buf1_[64], buf2_[64], buf3_[64];
3744  csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3745  csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3746  csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3747  _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3748  s->flags &= ~off;
3749 }
3750 
3751 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
3752 {
3753  size_t pos = 0;
3754  bool gotone = false;
3755 
3756  #define _prflag(fl) \
3757  if((flags & fl) == (fl)) \
3758  { \
3759  if(gotone) \
3760  { \
3761  if(pos + 1 < buf.len) \
3762  buf[pos] = '|'; \
3763  ++pos; \
3764  } \
3765  csubstr fltxt = #fl; \
3766  if(pos + fltxt.len <= buf.len) \
3767  memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3768  pos += fltxt.len; \
3769  gotone = true; \
3770  }
3771 
3772  _prflag(RTOP);
3773  _prflag(RUNK);
3774  _prflag(RMAP);
3775  _prflag(RSEQ);
3776  _prflag(FLOW);
3777  _prflag(BLCK);
3778  _prflag(QMRK);
3779  _prflag(RKEY);
3780  _prflag(RVAL);
3781  _prflag(RKCL);
3782  _prflag(RNXT);
3783  _prflag(SSCL);
3784  _prflag(QSCL);
3785  _prflag(RSET);
3786  _prflag(RDOC);
3787  _prflag(NDOC);
3788  _prflag(USTY);
3789  _prflag(RSEQIMAP);
3790 
3791  #undef _prflag
3792 
3793  if(pos == 0)
3794  if(buf.len > 0)
3795  buf[pos++] = '0';
3796 
3797  RYML_CHECK(pos <= buf.len);
3798 
3799  return buf.first(pos);
3800 }
3801 
3802 #endif // RYML_DBG !!! <----------------------------------
3803 
3804 
3805 //-----------------------------------------------------------------------------
3806 //-----------------------------------------------------------------------------
3807 //-----------------------------------------------------------------------------
3808 
3809 template<class EventHandler>
3811 {
3812  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3813  return m_buf.sub(loc.offset);
3814 }
3815 
3816 template<class EventHandler>
3818 {
3819  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
3820  return location(*node.tree(), node.id());
3821 }
3822 
3823 template<class EventHandler>
3825 {
3826  // try hard to avoid getting the location from a null string.
3827  Location loc;
3828  if(_location_from_node(tree, node, &loc, 0))
3829  return loc;
3830  return val_location(m_buf.str);
3831 }
3832 
3833 template<class EventHandler>
3834 bool ParseEngine<EventHandler>::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const
3835 {
3836  if(tree.has_key(node))
3837  {
3838  csubstr k = tree.key(node);
3839  if(C4_LIKELY(k.str != nullptr))
3840  {
3841  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3842  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3843  *loc = val_location(k.str);
3844  return true;
3845  }
3846  }
3847 
3848  if(tree.has_val(node))
3849  {
3850  csubstr v = tree.val(node);
3851  if(C4_LIKELY(v.str != nullptr))
3852  {
3853  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3854  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3855  *loc = val_location(v.str);
3856  return true;
3857  }
3858  }
3859 
3860  if(tree.is_container(node))
3861  {
3862  if(_location_from_cont(tree, node, loc))
3863  return true;
3864  }
3865 
3866  if(tree.type(node) != NOTYPE && level == 0)
3867  {
3868  // try the prev sibling
3869  {
3870  const id_type prev = tree.prev_sibling(node);
3871  if(prev != NONE)
3872  {
3873  if(_location_from_node(tree, prev, loc, level+1))
3874  return true;
3875  }
3876  }
3877  // try the next sibling
3878  {
3879  const id_type next = tree.next_sibling(node);
3880  if(next != NONE)
3881  {
3882  if(_location_from_node(tree, next, loc, level+1))
3883  return true;
3884  }
3885  }
3886  // try the parent
3887  {
3888  const id_type parent = tree.parent(node);
3889  if(parent != NONE)
3890  {
3891  if(_location_from_node(tree, parent, loc, level+1))
3892  return true;
3893  }
3894  }
3895  }
3896 
3897  return false;
3898 }
3899 
3900 template<class EventHandler>
3901 bool ParseEngine<EventHandler>::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const
3902 {
3903  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3904  if(!tree.is_stream(node))
3905  {
3906  const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container
3907  if(tree.has_children(node))
3908  {
3909  id_type child = tree.first_child(node);
3910  if(tree.has_key(child))
3911  {
3912  // when a map starts, the container was set after the key
3913  csubstr k = tree.key(child);
3914  if(k.str && node_start > k.str)
3915  node_start = k.str;
3916  }
3917  }
3918  *loc = val_location(node_start);
3919  return true;
3920  }
3921  else // it's a stream
3922  {
3923  *loc = val_location(m_buf.str); // just return the front of the buffer
3924  }
3925  return true;
3926 }
3927 
3928 
3929 template<class EventHandler>
3931 {
3932  if(C4_UNLIKELY(val == nullptr))
3933  return {m_file, 0, 0, 0};
3934  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3935  // NOTE: if any of these checks fails, the parser needs to be
3936  // instantiated with locations enabled.
3937  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3938  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3939  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3940  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3941  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
3942  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3943  // NOTE: the pointer needs to belong to the buffer that was used to parse.
3944  csubstr src = m_buf;
3945  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
3946  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
3947  // ok. search the first stored newline after the given ptr
3948  using lineptr_type = size_t const* C4_RESTRICT;
3949  lineptr_type lineptr = nullptr;
3950  size_t offset = (size_t)(val - src.begin());
3951  if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
3952  {
3953  // just do a linear search if the size is small.
3954  for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3955  {
3956  if(*curr > offset)
3957  {
3958  lineptr = curr;
3959  break;
3960  }
3961  }
3962  }
3963  else
3964  {
3965  // do a bisection search if the size is not small.
3966  //
3967  // We could use std::lower_bound but this is simple enough and
3968  // spares the costly include of <algorithm>.
3969  size_t count = m_newline_offsets_size;
3970  size_t step;
3971  lineptr_type it;
3972  lineptr = m_newline_offsets;
3973  while(count)
3974  {
3975  step = count >> 1;
3976  it = lineptr + step;
3977  if(*it < offset)
3978  {
3979  lineptr = ++it;
3980  count -= step + 1;
3981  }
3982  else
3983  {
3984  count = step;
3985  }
3986  }
3987  }
3988  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
3989  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
3990  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
3991  Location loc;
3992  loc.name = m_file;
3993  loc.offset = offset;
3994  loc.line = (size_t)(lineptr - m_newline_offsets);
3995  if(lineptr > m_newline_offsets)
3996  loc.col = (offset - *(lineptr-1) - 1u);
3997  else
3998  loc.col = offset;
3999  return loc;
4000 }
4001 
4002 template<class EventHandler>
4004 {
4005  m_newline_offsets_buf = m_buf;
4006  size_t numnewlines = 1u + m_buf.count('\n');
4007  _resize_locations(numnewlines);
4008  m_newline_offsets_size = 0;
4009  for(size_t i = 0; i < m_buf.len; i++)
4010  if(m_buf[i] == '\n')
4011  m_newline_offsets[m_newline_offsets_size++] = i;
4012  m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4013  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4014 }
4015 
4016 template<class EventHandler>
4017 void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4018 {
4019  if(numnewlines > m_newline_offsets_capacity)
4020  {
4021  if(m_newline_offsets)
4022  _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4023  m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4024  m_newline_offsets_capacity = numnewlines;
4025  }
4026 }
4027 
4028 template<class EventHandler>
4029 bool ParseEngine<EventHandler>::_locations_dirty() const
4030 {
4031  return !m_newline_offsets_size;
4032 }
4033 
4034 
4035 //-----------------------------------------------------------------------------
4036 //-----------------------------------------------------------------------------
4037 //-----------------------------------------------------------------------------
4038 
4039 template<class EventHandler>
4040 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4041 {
4042  if(m_state->line_contents.rem.len > 0)
4043  {
4044  csubstr rem = m_state->line_contents.rem;
4045  if(rem.str[0] == ' ' || rem.str[0] == '\t')
4046  {
4047  _c4dbgpf("starts with whitespace: '{}'", _c4prc(rem.str[0]));
4048  _skipchars(" \t");
4049  rem = m_state->line_contents.rem;
4050  }
4051  // comments
4052  if(rem.begins_with('#'))
4053  {
4054  _c4dbgpf("it's a comment: {}", m_state->line_contents.rem);
4055  _line_progressed(m_state->line_contents.rem.len);
4056  }
4057  }
4058 }
4059 
4060 
4061 //-----------------------------------------------------------------------------
4062 
4063 
4064 template<class EventHandler>
4065 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4066 {
4067  _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4068  if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4069  _c4err("too many annotations");
4070  dst->annotations[dst->num_entries].str = str;
4071  dst->annotations[dst->num_entries].indentation = indentation;
4072  dst->annotations[dst->num_entries].line = line;
4073  ++dst->num_entries;
4074 }
4075 
4076 template<class EventHandler>
4077 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4078 {
4079  dst->num_entries = 0;
4080 }
4081 
4082 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4083 template<class EventHandler>
4084 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4085 {
4086  if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4087  {
4088  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4089  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_state->pos.line);
4090  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_state->pos.line);
4091  size_t to_skip = m_state->indref;
4092  if(m_pending_anchors.num_entries)
4093  to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4094  if(m_pending_tags.num_entries)
4095  to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4096  _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
4097  _maybe_skipchars_up_to(' ', to_skip);
4098  return true;
4099  }
4100  return false;
4101 }
4102 #endif
4103 
4104 template<class EventHandler>
4105 bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4106 {
4107  return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4108 }
4109 
4110 template<class EventHandler>
4111 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4112 {
4113  if(!tag.begins_with("!<"))
4114  {
4115  if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
4116  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_state->pos);
4117  }
4118  else
4119  {
4120  if(C4_UNLIKELY(!tag.ends_with('>')))
4121  _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_state->pos);
4122  }
4123 }
4124 
4125 template<class EventHandler>
4126 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4127 {
4128  _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_state->node_id);
4129  if(m_pending_tags.num_entries)
4130  {
4131  _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4132  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4133  {
4134  _check_tag(m_pending_tags.annotations[0].str);
4135  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4136  _clear_annotations(&m_pending_tags);
4137  }
4138  else
4139  {
4140  _c4err("too many tags");
4141  }
4142  }
4143  if(m_pending_anchors.num_entries)
4144  {
4145  _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4146  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4147  {
4148  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4149  _clear_annotations(&m_pending_anchors);
4150  }
4151  else
4152  {
4153  _c4err("too many anchors");
4154  }
4155  }
4156 }
4157 
4158 template<class EventHandler>
4159 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4160 {
4161  _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_state->node_id);
4162  if(m_pending_tags.num_entries)
4163  {
4164  _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4165  if(C4_LIKELY(m_pending_tags.num_entries == 1))
4166  {
4167  _check_tag(m_pending_tags.annotations[0].str);
4168  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4169  _clear_annotations(&m_pending_tags);
4170  }
4171  else
4172  {
4173  _c4err("too many tags");
4174  }
4175  }
4176  if(m_pending_anchors.num_entries)
4177  {
4178  _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4179  if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4180  {
4181  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4182  _clear_annotations(&m_pending_anchors);
4183  }
4184  else
4185  {
4186  _c4err("too many anchors");
4187  }
4188  }
4189 }
4190 
4191 template<class EventHandler>
4192 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4193 {
4194  _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4195  if(m_pending_tags.num_entries == 2)
4196  {
4197  _c4dbgp("2 tags, setting entry 0");
4198  _check_tag(m_pending_tags.annotations[0].str);
4199  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4200  }
4201  else if(m_pending_tags.num_entries == 1)
4202  {
4203  _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4204  if(m_pending_tags.annotations[0].line < current_line)
4205  {
4206  _c4dbgp("...tag is for the map. setting it.");
4207  _check_tag(m_pending_tags.annotations[0].str);
4208  m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4209  _clear_annotations(&m_pending_tags);
4210  }
4211  }
4212  //
4213  if(m_pending_anchors.num_entries == 2)
4214  {
4215  _c4dbgp("2 anchors, setting entry 0");
4216  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4217  }
4218  else if(m_pending_anchors.num_entries == 1)
4219  {
4220  _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4221  if(m_pending_anchors.annotations[0].line < current_line)
4222  {
4223  _c4dbgp("...anchor is for the map. setting it.");
4224  m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4225  _clear_annotations(&m_pending_anchors);
4226  }
4227  }
4228 }
4229 
4230 template<class EventHandler>
4231 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4232 {
4233  _c4dbgp("annotations_before_start_mapblck_as_key");
4234  if(m_pending_tags.num_entries == 2)
4235  {
4236  _check_tag(m_pending_tags.annotations[0].str);
4237  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4238  }
4239  if(m_pending_anchors.num_entries == 2)
4240  {
4241  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4242  }
4243 }
4244 
4245 template<class EventHandler>
4246 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4247 {
4248  _c4dbgp("annotations_after_start_mapblck");
4249  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4250  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4251  if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4252  {
4253  key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4254  switch(m_pending_tags.num_entries)
4255  {
4256  case 1u:
4257  _check_tag(m_pending_tags.annotations[0].str);
4258  m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4259  _clear_annotations(&m_pending_tags);
4260  break;
4261  case 2u:
4262  _check_tag(m_pending_tags.annotations[1].str);
4263  m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4264  _clear_annotations(&m_pending_tags);
4265  break;
4266  }
4267  switch(m_pending_anchors.num_entries)
4268  {
4269  case 1u:
4270  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4271  _clear_annotations(&m_pending_anchors);
4272  break;
4273  case 2u:
4274  m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4275  _clear_annotations(&m_pending_anchors);
4276  break;
4277  }
4278  }
4279  _set_indentation(key_indentation);
4280 }
4281 
4282 template<class EventHandler>
4283 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4284 {
4285  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4286  // select the left-most annotation on the max line
4287  auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4288  for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4289  {
4290  auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4291  if(ann.line > curr->line)
4292  curr = &ann;
4293  else if(ann.indentation < curr->indentation)
4294  curr = &ann;
4295  }
4296  for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4297  {
4298  auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4299  if(ann.line > curr->line)
4300  curr = &ann;
4301  else if(ann.indentation < curr->indentation)
4302  curr = &ann;
4303  }
4304  return curr->line < val_line ? val_indentation : curr->indentation;
4305 }
4306 
4307 template<class EventHandler>
4308 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4309 {
4310  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_state->line_contents.rem));
4311  const size_t pos = rem.find('#');
4312  _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
4313  if(pos == npos) // no comments
4314  {
4315  m_evt_handler->add_directive(rem);
4316  _line_progressed(rem.len);
4317  }
4318  else
4319  {
4320  csubstr to_comment = rem.first(pos);
4321  csubstr trimmed = to_comment.trimr(" \t");
4322  m_evt_handler->add_directive(trimmed);
4323  _line_progressed(pos);
4324  _skip_comment();
4325  }
4326 }
4327 
4328 
4329 //-----------------------------------------------------------------------------
4330 
4331 template<class EventHandler>
4332 void ParseEngine<EventHandler>::_handle_seq_json()
4333 {
4334 seqjson_start:
4335  _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_state->node_id, m_state->level, m_state->indref);
4336 
4337  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4338  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4339  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4340  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4341  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4342 
4343  _handle_flow_skip_whitespace();
4344  csubstr rem = m_state->line_contents.rem;
4345  if(!rem.len)
4346  goto seqjson_again;
4347 
4348  if(has_any(RVAL))
4349  {
4350  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4351  const char first = rem.str[0];
4352  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4353  switch(first)
4354  {
4355  case '"':
4356  {
4357  _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4358  ScannedScalar sc = _scan_scalar_dquot();
4359  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4360  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4361  addrem_flags(RNXT, RVAL);
4362  break;
4363  }
4364  case '[':
4365  {
4366  _c4dbgp("seqjson[RVAL]: start child seqjson");
4367  addrem_flags(RNXT, RVAL);
4368  m_evt_handler->begin_seq_val_flow();
4369  addrem_flags(RVAL, RNXT);
4370  _line_progressed(1);
4371  break;
4372  }
4373  case '{':
4374  {
4375  _c4dbgp("seqjson[RVAL]: start child mapjson");
4376  addrem_flags(RNXT, RVAL);
4377  m_evt_handler->begin_map_val_flow();
4378  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4379  _line_progressed(1);
4380  goto seqjson_finish;
4381  }
4382  case ']': // this happens on a trailing comma like ", ]"
4383  {
4384  _c4dbgp("seqjson[RVAL]: end!");
4385  rem_flags(RSEQ);
4386  m_evt_handler->end_seq();
4387  _line_progressed(1);
4388  if(!has_all(RSEQ|FLOW))
4389  goto seqjson_finish;
4390  break;
4391  }
4392  default:
4393  {
4394  ScannedScalar sc;
4395  if(_scan_scalar_seq_json(&sc))
4396  {
4397  _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4398  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref);
4399  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4400  addrem_flags(RNXT, RVAL);
4401  }
4402  else
4403  {
4404  _c4err("parse error");
4405  }
4406  }
4407  }
4408  }
4409  else // RNXT
4410  {
4411  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4412  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4413  const char first = rem.str[0];
4414  _c4dbgpf("mapjson[RNXT]: '{}'", first);
4415  switch(first)
4416  {
4417  case ',':
4418  {
4419  _c4dbgp("seqjson[RNXT]: expect next val");
4420  addrem_flags(RVAL, RNXT);
4421  m_evt_handler->add_sibling();
4422  _line_progressed(1);
4423  break;
4424  }
4425  case ']':
4426  {
4427  _c4dbgp("seqjson[RNXT]: end!");
4428  m_evt_handler->end_seq();
4429  _line_progressed(1);
4430  goto seqjson_finish;
4431  }
4432  default:
4433  _c4err("parse error");
4434  }
4435  }
4436 
4437  seqjson_again:
4438  _c4dbgt("seqjson: go again", 0);
4439  if(_finished_line())
4440  {
4441  if(C4_LIKELY(!_finished_file()))
4442  {
4443  _line_ended();
4444  _scan_line();
4445  _c4dbgnextline();
4446  }
4447  else
4448  {
4449  _c4err("missing terminating ]");
4450  }
4451  }
4452  goto seqjson_start;
4453 
4454  seqjson_finish:
4455  _c4dbgp("seqjson: finish");
4456 }
4457 
4458 
4459 //-----------------------------------------------------------------------------
4460 
4461 template<class EventHandler>
4462 void ParseEngine<EventHandler>::_handle_map_json()
4463 {
4464 mapjson_start:
4465  _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_state->node_id, m_state->level, m_state->indref);
4466 
4467  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
4468  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4469  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4470  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
4471  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
4472 
4473  _handle_flow_skip_whitespace();
4474  csubstr rem = m_state->line_contents.rem;
4475  if(!rem.len)
4476  goto mapjson_again;
4477 
4478  if(has_any(RKEY))
4479  {
4480  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4481  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4482  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4483  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4484  const char first = rem.str[0];
4485  _c4dbgpf("mapjson[RKEY]: '{}'", first);
4486  switch(first)
4487  {
4488  case '"':
4489  {
4490  _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
4491  ScannedScalar sc = _scan_scalar_dquot();
4492  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4493  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4494  addrem_flags(RKCL, RKEY);
4495  break;
4496  }
4497  case '}': // this happens on a trailing comma like ", }"
4498  {
4499  _c4dbgp("mapjson[RKEY]: end!");
4500  m_evt_handler->end_map();
4501  _line_progressed(1);
4502  goto mapjson_finish;
4503  }
4504  default:
4505  _c4err("parse error");
4506  }
4507  }
4508  else if(has_any(RVAL))
4509  {
4510  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4511  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4512  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4513  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4514  const char first = rem.str[0];
4515  _c4dbgpf("mapjson[RVAL]: '{}'", first);
4516  switch(first)
4517  {
4518  case '"':
4519  {
4520  _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
4521  ScannedScalar sc = _scan_scalar_dquot();
4522  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4523  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4524  addrem_flags(RNXT, RVAL);
4525  break;
4526  }
4527  case '[':
4528  {
4529  _c4dbgp("mapjson[RVAL]: start val seqjson");
4530  addrem_flags(RNXT, RVAL);
4531  m_evt_handler->begin_seq_val_flow();
4532  _set_indentation(m_evt_handler->m_parent->indref);
4533  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
4534  _line_progressed(1);
4535  goto mapjson_finish;
4536  }
4537  case '{':
4538  {
4539  _c4dbgp("mapjson[RVAL]: start val mapjson");
4540  addrem_flags(RNXT, RVAL);
4541  m_evt_handler->begin_map_val_flow();
4542  _set_indentation(m_evt_handler->m_parent->indref);
4543  addrem_flags(RKEY, RNXT);
4544  _line_progressed(1);
4545  // keep going in this function
4546  break;
4547  }
4548  default:
4549  {
4550  ScannedScalar sc;
4551  if(_scan_scalar_map_json(&sc))
4552  {
4553  _c4dbgp("mapjson[RVAL]: plain scalar.");
4554  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref);
4555  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4556  addrem_flags(RNXT, RVAL);
4557  }
4558  else
4559  {
4560  _c4err("parse error");
4561  }
4562  break;
4563  }
4564  }
4565  }
4566  else if(has_any(RKCL)) // read the key colon
4567  {
4568  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4569  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4570  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4571  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4572  const char first = rem.str[0];
4573  _c4dbgpf("mapjson[RKCL]: '{}'", first);
4574  if(first == ':')
4575  {
4576  _c4dbgp("mapjson[RKCL]: found the colon");
4577  addrem_flags(RVAL, RKCL);
4578  _line_progressed(1);
4579  }
4580  else
4581  {
4582  _c4err("parse error");
4583  }
4584  }
4585  else if(has_any(RNXT))
4586  {
4587  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4588  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4589  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4590  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4591  _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
4592  if(rem.begins_with(','))
4593  {
4594  _c4dbgp("mapjson[RNXT]: expect next keyval");
4595  m_evt_handler->add_sibling();
4596  addrem_flags(RKEY, RNXT);
4597  _line_progressed(1);
4598  }
4599  else if(rem.begins_with('}'))
4600  {
4601  _c4dbgp("mapjson[RNXT]: end!");
4602  m_evt_handler->end_map();
4603  _line_progressed(1);
4604  goto mapjson_finish;
4605  }
4606  else
4607  {
4608  _c4err("parse error");
4609  }
4610  }
4611 
4612  mapjson_again:
4613  _c4dbgt("mapjson: go again", 0);
4614  if(_finished_line())
4615  {
4616  if(C4_LIKELY(!_finished_file()))
4617  {
4618  _line_ended();
4619  _scan_line();
4620  _c4dbgnextline();
4621  }
4622  else
4623  {
4624  _c4err("missing terminating }");
4625  }
4626  }
4627  goto mapjson_start;
4628 
4629  mapjson_finish:
4630  _c4dbgp("mapjson: finish");
4631 }
4632 
4633 
4634 //-----------------------------------------------------------------------------
4635 
4636 template<class EventHandler>
4637 void ParseEngine<EventHandler>::_handle_seq_imap()
4638 {
4639 seqimap_start:
4640  _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_state->node_id, m_state->level, m_state->indref);
4641 
4642  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
4643  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4644  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
4645  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
4646  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4647 
4648  _handle_flow_skip_whitespace();
4649  csubstr rem = m_state->line_contents.rem;
4650  if(!rem.len)
4651  goto seqimap_again;
4652 
4653  if(has_any(RVAL))
4654  {
4655  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
4656  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4657  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4658  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4659  const char first = rem.str[0];
4660  _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
4661  ScannedScalar sc;
4662  if(first == '\'')
4663  {
4664  _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
4665  sc = _scan_scalar_squot();
4666  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4667  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4668  m_evt_handler->end_map();
4669  goto seqimap_finish;
4670  }
4671  else if(first == '"')
4672  {
4673  _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
4674  sc = _scan_scalar_dquot();
4675  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4676  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4677  m_evt_handler->end_map();
4678  goto seqimap_finish;
4679  }
4680  // block scalars (ie | and >) cannot appear in flow containers
4681  else if(_scan_scalar_plain_map_flow(&sc))
4682  {
4683  _c4dbgp("seqimap[RVAL]: it's a scalar.");
4684  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref);
4685  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4686  m_evt_handler->end_map();
4687  goto seqimap_finish;
4688  }
4689  else if(first == '[')
4690  {
4691  _c4dbgp("seqimap[RVAL]: start child seqflow");
4692  addrem_flags(RNXT, RVAL);
4693  m_evt_handler->begin_seq_val_flow();
4694  addrem_flags(RVAL, RNXT|RSEQIMAP);
4695  _set_indentation(m_evt_handler->m_parent->indref);
4696  _line_progressed(1);
4697  goto seqimap_finish;
4698  }
4699  else if(first == '{')
4700  {
4701  _c4dbgp("seqimap[RVAL]: start child mapflow");
4702  addrem_flags(RNXT, RVAL);
4703  m_evt_handler->begin_map_val_flow();
4704  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
4705  _set_indentation(m_evt_handler->m_parent->indref);
4706  _line_progressed(1);
4707  goto seqimap_finish;
4708  }
4709  else if(first == ',' || first == ']')
4710  {
4711  _c4dbgp("seqimap[RVAL]: finish without val.");
4712  m_evt_handler->set_val_scalar_plain({});
4713  m_evt_handler->end_map();
4714  goto seqimap_finish;
4715  }
4716  else if(first == '&')
4717  {
4718  csubstr anchor = _scan_anchor();
4719  _c4dbgp("seqimap[RVAL]: anchor!");
4720  m_evt_handler->set_val_anchor(anchor);
4721  }
4722  else if(first == '*')
4723  {
4724  csubstr ref = _scan_ref_seq();
4725  _c4dbgp("seqimap[RVAL]: ref!");
4726  m_evt_handler->set_val_ref(ref);
4727  addrem_flags(RNXT, RVAL);
4728  }
4729  else
4730  {
4731  _c4err("parse error");
4732  }
4733  }
4734  else if(has_any(RNXT))
4735  {
4736  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
4737  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4738  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4739  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4740  const char first = rem.str[0];
4741  _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
4742  if(first == ',' || first == ']')
4743  {
4744  // we may get here because a map or a seq started and we
4745  // return later
4746  _c4dbgp("seqimap: done");
4747  m_evt_handler->end_map();
4748  goto seqimap_finish;
4749  }
4750  else
4751  {
4752  _c4err("parse error");
4753  }
4754  }
4755  else if(has_any(QMRK))
4756  {
4757  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
4758  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4759  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4760  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
4761  const char first = rem.str[0];
4762  _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
4763  ScannedScalar sc;
4764  if(first == '\'')
4765  {
4766  _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
4767  sc = _scan_scalar_squot();
4768  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4769  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4770  addrem_flags(RKCL, QMRK);
4771  goto seqimap_again;
4772  }
4773  else if(first == '"')
4774  {
4775  _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
4776  sc = _scan_scalar_dquot();
4777  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4778  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4779  addrem_flags(RKCL, QMRK);
4780  goto seqimap_again;
4781  }
4782  // block scalars (ie | and >) cannot appear in flow containers
4783  else if(_scan_scalar_plain_map_flow(&sc))
4784  {
4785  _c4dbgp("seqimap[QMRK]: it's a scalar.");
4786  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref);
4787  m_evt_handler->set_key_scalar_plain(maybe_filtered);
4788  addrem_flags(RKCL, QMRK);
4789  goto seqimap_again;
4790  }
4791  else if(first == '[')
4792  {
4793  _c4dbgp("seqimap[QMRK]: start child seqflow");
4794  addrem_flags(RKCL, QMRK);
4795  m_evt_handler->begin_seq_key_flow();
4796  addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
4797  _set_indentation(m_evt_handler->m_parent->indref);
4798  _line_progressed(1);
4799  goto seqimap_finish;
4800  }
4801  else if(first == '{')
4802  {
4803  _c4dbgp("seqimap[QMRK]: start child mapflow");
4804  addrem_flags(RKCL, QMRK);
4805  m_evt_handler->begin_map_key_flow();
4806  addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
4807  _set_indentation(m_evt_handler->m_parent->indref);
4808  _line_progressed(1);
4809  goto seqimap_finish;
4810  }
4811  else if(first == ',' || first == ']')
4812  {
4813  _c4dbgp("seqimap[QMRK]: finish without key.");
4814  m_evt_handler->set_key_scalar_plain({});
4815  m_evt_handler->set_val_scalar_plain({});
4816  m_evt_handler->end_map();
4817  goto seqimap_finish;
4818  }
4819  else if(first == '&')
4820  {
4821  csubstr anchor = _scan_anchor();
4822  _c4dbgp("seqimap[QMRK]: anchor!");
4823  m_evt_handler->set_key_anchor(anchor);
4824  }
4825  else if(first == '*')
4826  {
4827  csubstr ref = _scan_ref_seq();
4828  _c4dbgp("seqimap[QMRK]: ref!");
4829  m_evt_handler->set_key_ref(ref);
4830  addrem_flags(RKCL, QMRK);
4831  }
4832  else
4833  {
4834  _c4err("parse error");
4835  }
4836  }
4837  else if(has_any(RKCL))
4838  {
4839  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
4840  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4841  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
4842  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
4843  const char first = rem.str[0];
4844  _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
4845  if(first == ':')
4846  {
4847  _c4dbgp("seqimap[RKCL]: found ':'");
4848  addrem_flags(RVAL, RKCL);
4849  _line_progressed(1);
4850  goto seqimap_again;
4851  }
4852  else if(first == ',' || first == ']')
4853  {
4854  _c4dbgp("seqimap[RKCL]: found ','. finish without val");
4855  m_evt_handler->set_val_scalar_plain({});
4856  m_evt_handler->end_map();
4857  goto seqimap_finish;
4858  }
4859  else
4860  {
4861  _c4err("parse error");
4862  }
4863  }
4864 
4865  seqimap_again:
4866  _c4dbgt("seqimap: go again", 0);
4867  if(_finished_line())
4868  {
4869  if(C4_LIKELY(!_finished_file()))
4870  {
4871  _line_ended();
4872  _scan_line();
4873  _c4dbgnextline();
4874  }
4875  else
4876  {
4877  _c4err("parse error");
4878  }
4879  }
4880  goto seqimap_start;
4881 
4882  seqimap_finish:
4883  _c4dbgp("seqimap: finish");
4884 }
4885 
4886 
4887 //-----------------------------------------------------------------------------
4888 
4889 template<class EventHandler>
4890 void ParseEngine<EventHandler>::_handle_seq_flow()
4891 {
4892 seqflow_start:
4893  _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_state->node_id, m_state->level, m_state->indref);
4894 
4895  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
4896  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
4897  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
4898  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
4899  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
4900  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->indref != npos);
4901 
4902  _handle_flow_skip_whitespace();
4903  csubstr rem = m_state->line_contents.rem;
4904  if(!rem.len)
4905  goto seqflow_again;
4906 
4907  if(has_any(RVAL))
4908  {
4909  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
4910  const char first = rem.str[0];
4911  ScannedScalar sc;
4912  if(first == '\'')
4913  {
4914  _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
4915  sc = _scan_scalar_squot();
4916  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4917  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4918  addrem_flags(RNXT, RVAL);
4919  }
4920  else if(first == '"')
4921  {
4922  _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
4923  sc = _scan_scalar_dquot();
4924  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4925  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4926  addrem_flags(RNXT, RVAL);
4927  }
4928  // block scalars (ie | and >) cannot appear in flow containers
4929  else if(_scan_scalar_plain_seq_flow(&sc))
4930  {
4931  _c4dbgp("seqflow[RVAL]: it's a scalar.");
4932  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref);
4933  m_evt_handler->set_val_scalar_plain(maybe_filtered);
4934  addrem_flags(RNXT, RVAL);
4935  }
4936  else if(first == '[')
4937  {
4938  _c4dbgp("seqflow[RVAL]: start child seqflow");
4939  addrem_flags(RNXT, RVAL);
4940  m_evt_handler->begin_seq_val_flow();
4941  _set_indentation(m_evt_handler->m_parent->indref);
4942  addrem_flags(RVAL, RNXT);
4943  _line_progressed(1);
4944  }
4945  else if(first == '{')
4946  {
4947  _c4dbgp("seqflow[RVAL]: start child mapflow");
4948  addrem_flags(RNXT, RVAL);
4949  m_evt_handler->begin_map_val_flow();
4950  _set_indentation(m_evt_handler->m_parent->indref);
4951  addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4952  _line_progressed(1);
4953  goto seqflow_finish;
4954  }
4955  else if(first == ']') // this happens on a trailing comma like ", ]"
4956  {
4957  _c4dbgp("seqflow[RVAL]: end!");
4958  _line_progressed(1);
4959  m_evt_handler->end_seq();
4960  goto seqflow_finish;
4961  }
4962  else if(first == '*')
4963  {
4964  csubstr ref = _scan_ref_seq();
4965  _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
4966  m_evt_handler->set_val_ref(ref);
4967  addrem_flags(RNXT, RVAL);
4968  }
4969  else if(first == '&')
4970  {
4971  csubstr anchor = _scan_anchor();
4972  _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
4973  m_evt_handler->set_val_anchor(anchor);
4974  if(_maybe_scan_following_comma())
4975  {
4976  _c4dbgp("seqflow[RVAL]: empty scalar!");
4977  m_evt_handler->set_val_scalar_plain({});
4978  m_evt_handler->add_sibling();
4979  }
4980  }
4981  else if(first == '!')
4982  {
4983  csubstr tag = _scan_tag();
4984  _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
4985  _check_tag(tag);
4986  m_evt_handler->set_val_tag(tag);
4987  if(_maybe_scan_following_comma())
4988  {
4989  _c4dbgp("seqflow[RVAL]: empty scalar!");
4990  m_evt_handler->set_val_scalar_plain({});
4991  m_evt_handler->add_sibling();
4992  }
4993  }
4994  else if(first == ':')
4995  {
4996  _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_state->node_id);
4997  addrem_flags(RNXT, RVAL);
4998  m_evt_handler->begin_map_val_flow();
4999  _set_indentation(m_evt_handler->m_parent->indref);
5000  m_evt_handler->set_key_scalar_plain({});
5001  addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5002  _line_progressed(1);
5003  goto seqflow_finish;
5004  }
5005  else if(first == '?')
5006  {
5007  _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5008  addrem_flags(RNXT, RVAL);
5009  m_was_inside_qmrk = true;
5010  m_evt_handler->begin_map_val_flow();
5011  _set_indentation(m_evt_handler->m_parent->indref);
5012  addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5013  _line_progressed(1);
5014  _maybe_skip_whitespace_tokens();
5015  goto seqflow_finish;
5016  }
5017  else
5018  {
5019  _c4err("parse error");
5020  }
5021  }
5022  else // RNXT
5023  {
5024  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5025  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5026  const char first = rem.str[0];
5027  if(first == ',')
5028  {
5029  _c4dbgp("seqflow[RNXT]: expect next val");
5030  addrem_flags(RVAL, RNXT);
5031  m_evt_handler->add_sibling();
5032  _line_progressed(1);
5033  }
5034  else if(first == ']')
5035  {
5036  _c4dbgp("seqflow[RNXT]: end!");
5037  m_evt_handler->end_seq();
5038  _line_progressed(1);
5039  goto seqflow_finish;
5040  }
5041  else if(first == ':')
5042  {
5043  _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_state->node_id);
5044  m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5045  _set_indentation(m_evt_handler->m_parent->indref);
5046  _line_progressed(1);
5047  addrem_flags(RSEQIMAP|RVAL, RNXT);
5048  goto seqflow_finish;
5049  }
5050  else
5051  {
5052  _c4err("parse error");
5053  }
5054  }
5055 
5056  seqflow_again:
5057  _c4dbgt("seqflow: go again", 0);
5058  if(_finished_line())
5059  {
5060  if(C4_LIKELY(!_finished_file()))
5061  {
5062  _line_ended();
5063  _scan_line();
5064  _c4dbgnextline();
5065  }
5066  else
5067  {
5068  _c4err("missing terminating ]");
5069  }
5070  }
5071  goto seqflow_start;
5072 
5073  seqflow_finish:
5074  _c4dbgp("seqflow: finish");
5075 }
5076 
5077 
5078 //-----------------------------------------------------------------------------
5079 
5080 template<class EventHandler>
5081 void ParseEngine<EventHandler>::_handle_map_flow()
5082 {
5083 mapflow_start:
5084  _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_state->node_id, m_state->level, m_state->indref);
5085 
5086  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5087  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
5088  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5089  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5090 
5091  _handle_flow_skip_whitespace();
5092  csubstr rem = m_state->line_contents.rem;
5093  if(!rem.len)
5094  goto mapflow_again;
5095 
5096  if(has_any(RKEY))
5097  {
5098  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5099  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5100  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5101  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5102  const char first = rem.str[0];
5103  _c4dbgpf("mapflow[RKEY]: '{}'", first);
5104  ScannedScalar sc;
5105  if(first == '\'')
5106  {
5107  _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5108  sc = _scan_scalar_squot();
5109  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5110  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5111  addrem_flags(RKCL, RKEY|QMRK);
5112  }
5113  else if(first == '"')
5114  {
5115  _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5116  sc = _scan_scalar_dquot();
5117  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5118  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5119  addrem_flags(RKCL, RKEY|QMRK);
5120  }
5121  // block scalars (ie | and >) cannot appear in flow containers
5122  else if(_scan_scalar_plain_map_flow(&sc))
5123  {
5124  _c4dbgp("mapflow[RKEY]: plain scalar");
5125  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref);
5126  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5127  addrem_flags(RKCL, RKEY|QMRK);
5128  }
5129  else if(first == '?')
5130  {
5131  _c4dbgp("mapflow[RKEY]: explicit key");
5132  _line_progressed(1);
5133  addrem_flags(QMRK, RKEY);
5134  _maybe_skip_whitespace_tokens();
5135  }
5136  else if(first == ':')
5137  {
5138  _c4dbgp("mapflow[RKEY]: setting empty key");
5139  m_evt_handler->set_key_scalar_plain({});
5140  addrem_flags(RVAL, RKEY|QMRK);
5141  _line_progressed(1);
5142  _maybe_skip_whitespace_tokens();
5143  }
5144  else if(first == '}') // this happens on a trailing comma like ", }"
5145  {
5146  _c4dbgp("mapflow[RKEY]: end!");
5147  m_evt_handler->end_map();
5148  _line_progressed(1);
5149  goto mapflow_finish;
5150  }
5151  else if(first == '&')
5152  {
5153  csubstr anchor = _scan_anchor();
5154  _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5155  m_evt_handler->set_key_anchor(anchor);
5156  }
5157  else if(first == '*')
5158  {
5159  csubstr ref = _scan_ref_map();
5160  _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5161  m_evt_handler->set_key_ref(ref);
5162  addrem_flags(RKCL, RKEY);
5163  }
5164  else if(first == '[')
5165  {
5166  // RYML's tree cannot store container keys, but that's
5167  // handled inside the tree sink. Other sink types may be
5168  // able to handle it.
5169  _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5170  addrem_flags(RKCL, RKEY);
5171  m_evt_handler->begin_seq_key_flow();
5172  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5173  _set_indentation(m_evt_handler->m_parent->indref);
5174  _line_progressed(1);
5175  goto mapflow_finish;
5176  }
5177  else if(first == '{')
5178  {
5179  // RYML's tree cannot store container keys, but that's
5180  // handled inside the tree sink. Other sink types may be
5181  // able to handle it.
5182  _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5183  addrem_flags(RKCL, RKEY);
5184  m_evt_handler->begin_map_key_flow();
5185  addrem_flags(RKEY, RVAL|RKCL);
5186  _set_indentation(m_evt_handler->m_parent->indref);
5187  _line_progressed(1);
5188  // keep going in this function
5189  }
5190  else if(first == '!')
5191  {
5192  csubstr tag = _scan_tag();
5193  _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5194  _check_tag(tag);
5195  m_evt_handler->set_key_tag(tag);
5196  }
5197  else
5198  {
5199  _c4err("parse error");
5200  }
5201  }
5202  else if(has_any(RKCL)) // read the key colon
5203  {
5204  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5205  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5206  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5207  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5208  const char first = rem.str[0];
5209  _c4dbgpf("mapflow[RKCL]: '{}'", first);
5210  if(first == ':')
5211  {
5212  _c4dbgp("mapflow[RKCL]: found the colon");
5213  addrem_flags(RVAL, RKCL);
5214  _line_progressed(1);
5215  }
5216  else if(first == '}')
5217  {
5218  _c4dbgp("mapflow[RKCL]: end with missing val!");
5219  addrem_flags(RVAL, RKCL);
5220  m_evt_handler->set_val_scalar_plain({});
5221  m_evt_handler->end_map();
5222  _line_progressed(1);
5223  goto mapflow_finish;
5224  }
5225  else if(first == ',')
5226  {
5227  _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5228  m_evt_handler->set_val_scalar_plain({});
5229  m_evt_handler->add_sibling();
5230  addrem_flags(RKEY, RKCL);
5231  _line_progressed(1);
5232  }
5233  else
5234  {
5235  _c4err("parse error");
5236  }
5237  }
5238  else if(has_any(RVAL))
5239  {
5240  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5241  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5242  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5243  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5244  const char first = rem.str[0];
5245  _c4dbgpf("mapflow[RVAL]: '{}'", first);
5246  ScannedScalar sc;
5247  if(first == '\'')
5248  {
5249  _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5250  sc = _scan_scalar_squot();
5251  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5252  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5253  addrem_flags(RNXT, RVAL);
5254  }
5255  else if(first == '"')
5256  {
5257  _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5258  sc = _scan_scalar_dquot();
5259  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5260  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5261  addrem_flags(RNXT, RVAL);
5262  }
5263  // block scalars (ie | and >) cannot appear in flow containers
5264  else if(_scan_scalar_plain_map_flow(&sc))
5265  {
5266  _c4dbgp("mapflow[RVAL]: plain scalar.");
5267  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref);
5268  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5269  addrem_flags(RNXT, RVAL);
5270  }
5271  else if(first == '[')
5272  {
5273  _c4dbgp("mapflow[RVAL]: start val seqflow");
5274  addrem_flags(RNXT, RVAL);
5275  m_evt_handler->begin_seq_val_flow();
5276  _set_indentation(m_evt_handler->m_parent->indref);
5277  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5278  _line_progressed(1);
5279  goto mapflow_finish;
5280  }
5281  else if(first == '{')
5282  {
5283  _c4dbgp("mapflow[RVAL]: start val mapflow");
5284  addrem_flags(RNXT, RVAL);
5285  m_evt_handler->begin_map_val_flow();
5286  _set_indentation(m_evt_handler->m_parent->indref);
5287  addrem_flags(RKEY, RNXT);
5288  _line_progressed(1);
5289  // keep going in this function
5290  }
5291  else if(first == '}')
5292  {
5293  _c4dbgp("mapflow[RVAL]: end!");
5294  m_evt_handler->set_val_scalar_plain({});
5295  m_evt_handler->end_map();
5296  _line_progressed(1);
5297  goto mapflow_finish;
5298  }
5299  else if(first == '*')
5300  {
5301  csubstr ref = _scan_ref_map();
5302  _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5303  m_evt_handler->set_val_ref(ref);
5304  addrem_flags(RNXT, RVAL);
5305  }
5306  else if(first == '&')
5307  {
5308  csubstr anchor = _scan_anchor();
5309  _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5310  m_evt_handler->set_val_anchor(anchor);
5311  }
5312  else if(first == '!')
5313  {
5314  csubstr tag = _scan_tag();
5315  _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5316  _check_tag(tag);
5317  m_evt_handler->set_val_tag(tag);
5318  }
5319  else
5320  {
5321  _c4err("parse error");
5322  }
5323  }
5324  else if(has_any(RNXT))
5325  {
5326  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5327  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5328  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5329  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5330  _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
5331  if(rem.begins_with(','))
5332  {
5333  _c4dbgp("mapflow[RNXT]: expect next keyval");
5334  m_evt_handler->add_sibling();
5335  addrem_flags(RKEY, RNXT);
5336  _line_progressed(1);
5337  }
5338  else if(rem.begins_with('}'))
5339  {
5340  _c4dbgp("mapflow[RNXT]: end!");
5341  m_evt_handler->end_map();
5342  _line_progressed(1);
5343  goto mapflow_finish;
5344  }
5345  else
5346  {
5347  _c4err("parse error");
5348  }
5349  }
5350  else if(has_any(QMRK))
5351  {
5352  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
5353  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5354  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5355  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5356  const char first = rem.str[0];
5357  _c4dbgpf("mapflow[QMRK]: '{}'", first);
5358  ScannedScalar sc;
5359  if(first == '\'')
5360  {
5361  _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5362  sc = _scan_scalar_squot();
5363  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5364  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5365  addrem_flags(RKCL, QMRK);
5366  }
5367  else if(first == '"')
5368  {
5369  _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
5370  sc = _scan_scalar_dquot();
5371  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5372  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5373  addrem_flags(RKCL, QMRK);
5374  }
5375  // block scalars (ie | and >) cannot appear in flow containers
5376  else if(_scan_scalar_plain_map_flow(&sc))
5377  {
5378  _c4dbgp("mapflow[QMRK]: plain scalar");
5379  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref);
5380  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5381  addrem_flags(RKCL, QMRK);
5382  }
5383  else if(first == ':')
5384  {
5385  _c4dbgp("mapflow[QMRK]: setting empty key");
5386  m_evt_handler->set_key_scalar_plain({});
5387  addrem_flags(RVAL, QMRK);
5388  _line_progressed(1);
5389  _maybe_skip_whitespace_tokens();
5390  }
5391  else if(first == '}') // this happens on a trailing comma like ", }"
5392  {
5393  _c4dbgp("mapflow[QMRK]: end!");
5394  m_evt_handler->set_key_scalar_plain({});
5395  m_evt_handler->set_val_scalar_plain({});
5396  m_evt_handler->end_map();
5397  _line_progressed(1);
5398  goto mapflow_finish;
5399  }
5400  else if(first == '&')
5401  {
5402  csubstr anchor = _scan_anchor();
5403  _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5404  m_evt_handler->set_key_anchor(anchor);
5405  }
5406  else if(first == '*')
5407  {
5408  csubstr ref = _scan_ref_map();
5409  _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5410  m_evt_handler->set_key_ref(ref);
5411  addrem_flags(RKCL, QMRK);
5412  }
5413  else if(first == '[')
5414  {
5415  // RYML's tree cannot store container keys, but that's
5416  // handled inside the tree sink. Other sink types may be
5417  // able to handle it.
5418  _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
5419  addrem_flags(RKCL, QMRK);
5420  m_evt_handler->begin_seq_key_flow();
5421  addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5422  _set_indentation(m_evt_handler->m_parent->indref);
5423  _line_progressed(1);
5424  goto mapflow_finish;
5425  }
5426  else if(first == '{')
5427  {
5428  // RYML's tree cannot store container keys, but that's
5429  // handled inside the tree sink. Other sink types may be
5430  // able to handle it.
5431  _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
5432  addrem_flags(RKCL, QMRK);
5433  m_evt_handler->begin_map_key_flow();
5434  _set_indentation(m_evt_handler->m_parent->indref);
5435  addrem_flags(RKEY, RKCL);
5436  _line_progressed(1);
5437  // keep going in this function
5438  }
5439  else if(first == '!')
5440  {
5441  csubstr tag = _scan_tag();
5442  _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5443  _check_tag(tag);
5444  m_evt_handler->set_key_tag(tag);
5445  }
5446  else
5447  {
5448  _c4err("parse error");
5449  }
5450  }
5451 
5452  mapflow_again:
5453  _c4dbgt("mapflow: go again", 0);
5454  if(_finished_line())
5455  {
5456  if(C4_LIKELY(!_finished_file()))
5457  {
5458  _line_ended();
5459  _scan_line();
5460  _c4dbgnextline();
5461  }
5462  else
5463  {
5464  _c4err("missing terminating }");
5465  }
5466  }
5467  goto mapflow_start;
5468 
5469  mapflow_finish:
5470  _c4dbgp("mapflow: finish");
5471 }
5472 
5473 
5474 //-----------------------------------------------------------------------------
5475 
5476 template<class EventHandler>
5477 void ParseEngine<EventHandler>::_handle_seq_block()
5478 {
5479 seqblck_start:
5480  _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_state->node_id, m_state->level, m_state->indref);
5481 
5482  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
5483  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5484  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
5485  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
5486 
5487  _maybe_skip_comment();
5488  csubstr rem = m_state->line_contents.rem;
5489  if(!rem.len)
5490  goto seqblck_again;
5491 
5492  if(has_any(RVAL))
5493  {
5494  _c4dbgpf("seqblck[RVAL]: col={}", m_state->pos.col);
5495  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5496  if(m_state->at_line_beginning())
5497  {
5498  _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_state->indref, m_state->line_contents.indentation);
5499  if(m_state->indentation_ge())
5500  {
5501  _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_state->line_contents.indentation);
5502  _line_progressed(m_state->line_contents.indentation);
5503  rem = m_state->line_contents.rem;
5504  if(!rem.len)
5505  goto seqblck_again;
5506  }
5507  else if(m_state->indentation_lt())
5508  {
5509  _c4dbgp("seqblck[RVAL]: smaller indentation!");
5510  _handle_indentation_pop_from_block_seq();
5511  goto seqblck_finish;
5512  }
5513  else if(m_state->line_contents.indentation == npos)
5514  {
5515  _c4dbgp("seqblck[RVAL]: empty line!");
5516  _line_progressed(m_state->line_contents.rem.len);
5517  goto seqblck_again;
5518  }
5519  }
5520  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5521  else
5522  {
5523  // accomodate annotation on the previous line. eg:
5524  // - &elm
5525  // foo # <-- on this line
5526  // - &elm
5527  // &foo foo: bar # <-- on this line
5528  if(rem.str[0] == ' ')
5529  {
5530  if(_handle_indentation_from_annotations())
5531  {
5532  _c4dbgp("seqblck[RVAL]: annotations!");
5533  rem = m_state->line_contents.rem;
5534  if(!rem.len)
5535  goto seqblck_again;
5536  }
5537  }
5538  }
5539  #endif
5540  _RYML_CB_ASSERT(callbacks(), rem.len);
5541  _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_state->node_id);
5542  const char first = rem.str[0];
5543  const size_t startline = m_state->pos.line;
5544  // warning: the gcc optimizer on x86 builds is brittle with
5545  // this function:
5546  const size_t startindent = m_state->line_contents.current_col();
5547  ScannedScalar sc;
5548  if(first == '\'')
5549  {
5550  _c4dbgp("seqblck[RVAL]: single-quoted scalar");
5551  sc = _scan_scalar_squot();
5552  if(!_maybe_scan_following_colon())
5553  {
5554  _c4dbgp("seqblck[RVAL]: set as val");
5555  _handle_annotations_before_blck_val_scalar();
5556  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
5557  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5558  addrem_flags(RNXT, RVAL);
5559  }
5560  else
5561  {
5562  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5563  addrem_flags(RNXT, RVAL);
5564  _handle_annotations_before_start_mapblck(startline);
5565  m_evt_handler->begin_map_val_block();
5566  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5567  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
5568  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5569  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5570  _maybe_skip_whitespace_tokens();
5571  goto seqblck_finish;
5572  }
5573  }
5574  else if(first == '"')
5575  {
5576  _c4dbgp("seqblck[RVAL]: double-quoted scalar");
5577  sc = _scan_scalar_dquot();
5578  if(!_maybe_scan_following_colon())
5579  {
5580  _c4dbgp("seqblck[RVAL]: set as val");
5581  _handle_annotations_before_blck_val_scalar();
5582  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
5583  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5584  addrem_flags(RNXT, RVAL);
5585  }
5586  else
5587  {
5588  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5589  addrem_flags(RNXT, RVAL);
5590  _handle_annotations_before_start_mapblck(startline);
5591  m_evt_handler->begin_map_val_block();
5592  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5593  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
5594  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5595  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5596  _maybe_skip_whitespace_tokens();
5597  goto seqblck_finish;
5598  }
5599  }
5600  // block scalars can only appear as keys when in QMRK scope
5601  // (ie, after ? tokens), so no need to scan following colon in
5602  // here.
5603  else if(first == '|')
5604  {
5605  _c4dbgp("seqblck[RVAL]: block-literal scalar");
5606  ScannedBlock sb;
5607  _scan_block(&sb, m_state->indref + 1);
5608  _handle_annotations_before_blck_val_scalar();
5609  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5610  m_evt_handler->set_val_scalar_literal(maybe_filtered);
5611  addrem_flags(RNXT, RVAL);
5612  }
5613  else if(first == '>')
5614  {
5615  _c4dbgp("seqblck[RVAL]: block-folded scalar");
5616  ScannedBlock sb;
5617  _scan_block(&sb, m_state->indref + 1);
5618  _handle_annotations_before_blck_val_scalar();
5619  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5620  m_evt_handler->set_val_scalar_folded(maybe_filtered);
5621  addrem_flags(RNXT, RVAL);
5622  }
5623  else if(_scan_scalar_plain_seq_blck(&sc))
5624  {
5625  _c4dbgp("seqblck[RVAL]: plain scalar.");
5626  if(!_maybe_scan_following_colon())
5627  {
5628  _c4dbgp("seqblck[RVAL]: set as val");
5629  _handle_annotations_before_blck_val_scalar();
5630  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref); // VAL!
5631  m_evt_handler->set_val_scalar_plain(maybe_filtered);
5632  addrem_flags(RNXT, RVAL);
5633  }
5634  else
5635  {
5636  if(startindent > m_state->indref)
5637  {
5638  _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
5639  addrem_flags(RNXT, RVAL);
5640  _handle_annotations_before_start_mapblck(startline);
5641  m_evt_handler->begin_map_val_block();
5642  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5643  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref); // KEY!
5644  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5645  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5646  _maybe_skip_whitespace_tokens();
5647  goto seqblck_finish;
5648  }
5649  else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
5650  {
5651  _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
5652  m_evt_handler->set_val_scalar_plain({});
5653  m_evt_handler->end_seq();
5654  m_evt_handler->add_sibling();
5655  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref); // KEY!
5656  m_evt_handler->set_key_scalar_plain(maybe_filtered);
5657  addrem_flags(RVAL, RNXT|RKEY);
5658  _maybe_skip_whitespace_tokens();
5659  goto seqblck_finish;
5660  }
5661  else
5662  {
5663  _c4err("parse error");
5664  }
5665  }
5666  }
5667  else if(first == '[')
5668  {
5669  _c4dbgp("seqblck[RVAL]: start child seqflow");
5670  addrem_flags(RNXT, RVAL);
5671  m_evt_handler->begin_seq_val_flow();
5672  addrem_flags(FLOW|RVAL, BLCK|RNXT);
5673  _line_progressed(1);
5674  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5675  goto seqblck_finish;
5676  }
5677  else if(first == '{')
5678  {
5679  _c4dbgp("seqblck[RVAL]: start child mapflow");
5680  addrem_flags(RNXT, RVAL);
5681  _handle_annotations_before_blck_val_scalar();
5682  m_evt_handler->begin_map_val_flow();
5683  addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
5684  _line_progressed(1);
5685  _set_indentation(m_evt_handler->m_parent->indref + 1u);
5686  goto seqblck_finish;
5687  }
5688  else if(first == '-')
5689  {
5690  if(startindent == m_state->indref)
5691  {
5692  _c4dbgp("seqblck[RVAL]: prev val was empty");
5693  _handle_annotations_before_blck_val_scalar();
5694  m_evt_handler->set_val_scalar_plain({});
5695  // keep in RVAL, but for the next sibling
5696  m_evt_handler->add_sibling();
5697  }
5698  else
5699  {
5700  _c4dbgp("seqblck[RVAL]: start child seqblck");
5701  _RYML_CB_ASSERT(this->callbacks(), startindent > m_state->indref);
5702  addrem_flags(RNXT, RVAL);
5703  _handle_annotations_before_blck_val_scalar();
5704  m_evt_handler->begin_seq_val_block();
5705  addrem_flags(RVAL, RNXT);
5706  _save_indentation();
5707  // keep going on inside this function
5708  }
5709  _line_progressed(1);
5710  _maybe_skip_whitespace_tokens();
5711  }
5712  else if(first == ':')
5713  {
5714  _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
5715  addrem_flags(RNXT, RVAL);
5716  _handle_annotations_before_start_mapblck(startline);
5717  m_evt_handler->begin_map_val_block();
5718  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5719  m_evt_handler->set_key_scalar_plain({});
5720  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5721  _line_progressed(1);
5722  _maybe_skip_whitespace_tokens();
5723  goto seqblck_finish;
5724  }
5725  else if(first == '&')
5726  {
5727  const csubstr anchor = _scan_anchor();
5728  _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5729  // we need to buffer the anchors, as there may be two
5730  // consecutive anchors in here
5731  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5732  }
5733  else if(first == '*')
5734  {
5735  csubstr ref = _scan_ref_seq();
5736  _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5737  if(!_maybe_scan_following_colon())
5738  {
5739  _c4dbgp("seqblck[RVAL]: set ref as val!");
5740  _handle_annotations_before_blck_val_scalar();
5741  m_evt_handler->set_val_ref(ref);
5742  addrem_flags(RNXT, RVAL);
5743  }
5744  else
5745  {
5746  _c4dbgp("seqblck[RVAL]: ref is key of map");
5747  addrem_flags(RNXT, RVAL);
5748  _handle_annotations_before_start_mapblck(startline);
5749  m_evt_handler->begin_map_val_block();
5750  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5751  m_evt_handler->set_key_ref(ref);
5752  addrem_flags(RMAP|RVAL, RSEQ|RNXT);
5753  _set_indentation(startindent);
5754  _maybe_skip_whitespace_tokens();
5755  goto seqblck_finish;
5756  }
5757  }
5758  else if(first == '!')
5759  {
5760  csubstr tag = _scan_tag();
5761  _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5762  // we need to buffer the tags, as there may be two
5763  // consecutive tags in here
5764  _add_annotation(&m_pending_tags, tag, startindent, startline);
5765  }
5766  else if(first == '?')
5767  {
5768  _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
5769  addrem_flags(RNXT, RVAL);
5770  m_was_inside_qmrk = true;
5771  m_evt_handler->begin_map_val_block();
5772  addrem_flags(RMAP|QMRK, RSEQ|RNXT);
5773  _save_indentation();
5774  _line_progressed(1);
5775  _maybe_skip_whitespace_tokens();
5776  goto seqblck_finish;
5777  }
5778  else
5779  {
5780  _c4err("parse error");
5781  }
5782  }
5783  else // RNXT
5784  {
5785  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5786  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5787  //
5788  // handle indentation
5789  //
5790  _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_state->indref, m_state->line_contents.indentation);
5791  if(C4_UNLIKELY(!_at_line_begin()))
5792  _c4err("parse error");
5793  if(m_state->indentation_ge())
5794  {
5795  _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_state->indref);
5796  _line_progressed(m_state->indref);
5797  _maybe_skip_whitespace_tokens();
5798  rem = m_state->line_contents.rem;
5799  if(!rem.len)
5800  goto seqblck_again;
5801  }
5802  else if(m_state->indentation_lt())
5803  {
5804  _c4dbgp("seqblck[RNXT]: smaller indentation!");
5805  _handle_indentation_pop_from_block_seq();
5806  if(has_all(RSEQ|BLCK))
5807  {
5808  _c4dbgp("seqblck[RNXT]: still seqblck!");
5809  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
5810  _line_progressed(m_state->line_contents.indentation);
5811  rem = m_state->line_contents.rem;
5812  if(!rem.len)
5813  goto seqblck_again;
5814  }
5815  else
5816  {
5817  _c4dbgp("seqblck[RNXT]: no longer seqblck!");
5818  goto seqblck_finish;
5819  }
5820  }
5821  else if(m_state->line_contents.indentation == npos)
5822  {
5823  _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_state->line_contents.rem);
5824  _line_progressed(m_state->line_contents.rem.len);
5825  rem = m_state->line_contents.rem;
5826  if(!rem.len)
5827  goto seqblck_again;
5828  }
5829  //
5830  // now handle the tokens
5831  //
5832  const char first = rem.str[0];
5833  _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_state->node_id);
5834  if(first == '-')
5835  {
5836  if(m_state->indref > 0 || m_state->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5837  {
5838  _c4dbgp("seqblck[RNXT]: expect next val");
5839  addrem_flags(RVAL, RNXT);
5840  m_evt_handler->add_sibling();
5841  _line_progressed(1);
5842  _maybe_skip_whitespace_tokens();
5843  }
5844  else
5845  {
5846  _c4dbgp("seqblck[RNXT]: start doc");
5847  _start_doc_suddenly();
5848  _line_progressed(3);
5849  _maybe_skip_whitespace_tokens();
5850  goto seqblck_finish;
5851  }
5852  }
5853  else if(first == ':')
5854  {
5855  // This happens for example in `- [a: b]: c` (after
5856  // terminating the seq, ie, after `]`). All other cases
5857  // (ie colon after scalars) are caught elsewhere (ie, in
5858  // RVAL state).
5859  auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
5860  if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
5861  {
5862  _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
5863  m_evt_handler->end_seq();
5864  goto seqblck_finish;
5865  }
5866  else
5867  {
5868  _c4err("parse error");
5869  }
5870  }
5871  else if(first == '.')
5872  {
5873  _c4dbgp("seqblck[RNXT]: maybe doc?");
5874  csubstr rs = rem.sub(1);
5875  if(rs == ".." || rs.begins_with(".. "))
5876  {
5877  _c4dbgp("seqblck[RNXT]: end+start doc");
5878  _end_doc_suddenly();
5879  _line_progressed(3);
5880  _maybe_skip_whitespace_tokens();
5881  goto seqblck_finish;
5882  }
5883  else
5884  {
5885  _c4err("parse error");
5886  }
5887  }
5888  else
5889  {
5890  // may be an indentless sequence nested in a map...
5891  //if(m_evt_handler->m_stack.size() >= 2)
5892  #ifdef RYML_DBG
5893  char flagbuf_[128];
5894  for(auto const& s : m_evt_handler->m_stack)
5895  {
5896  _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
5897  }
5898  #endif
5899  if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_state->indref == m_evt_handler->m_parent->indref)
5900  {
5901  _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_state->node_id);
5902  _RYML_CB_ASSERT(this->callbacks(), m_state != m_evt_handler->m_parent);
5903  _handle_indentation_pop(m_evt_handler->m_parent);
5904  _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
5905  m_evt_handler->add_sibling();
5906  addrem_flags(RKEY, RNXT);
5907  goto seqblck_finish;
5908  }
5909  else //if(first != '*')
5910  {
5911  _c4err("parse error");
5912  }
5913  }
5914  }
5915 
5916  seqblck_again:
5917  _c4dbgt("seqblck: go again", 0);
5918  if(_finished_line())
5919  {
5920  _line_ended();
5921  _scan_line();
5922  if(_finished_file())
5923  {
5924  _c4dbgp("seqblck: finish!");
5925  _end_seq_blck();
5926  goto seqblck_finish;
5927  }
5928  _c4dbgnextline();
5929  }
5930  goto seqblck_start;
5931 
5932  seqblck_finish:
5933  _c4dbgp("seqblck: finish");
5934 }
5935 
5936 
5937 //-----------------------------------------------------------------------------
5938 
5939 template<class EventHandler>
5940 void ParseEngine<EventHandler>::_handle_map_block()
5941 {
5942 mapblck_start:
5943  _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_state->node_id, m_state->level, m_state->indref);
5944 
5945  // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
5946  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
5947  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
5948  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
5949  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
5950 
5951  _maybe_skip_comment();
5952  csubstr rem = m_state->line_contents.rem;
5953  if(!rem.len)
5954  goto mapblck_again;
5955 
5956  if(has_any(RKEY))
5957  {
5958  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
5959  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
5960  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
5961  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
5962  //
5963  // handle indentation
5964  //
5965  if(m_state->at_line_beginning())
5966  {
5967  if(m_state->indentation_eq())
5968  {
5969  _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_state->indref);
5970  _line_progressed(m_state->indref);
5971  rem = m_state->line_contents.rem;
5972  if(!rem.len)
5973  goto mapblck_again;
5974  }
5975  else if(m_state->indentation_lt())
5976  {
5977  _c4dbgp("mapblck[RKEY]: smaller indentation!");
5978  _handle_indentation_pop_from_block_map();
5979  _line_progressed(m_state->line_contents.indentation);
5980  if(has_all(RMAP|BLCK))
5981  {
5982  _c4dbgp("mapblck[RKEY]: still mapblck!");
5983  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
5984  rem = m_state->line_contents.rem;
5985  if(!rem.len)
5986  goto mapblck_again;
5987  }
5988  else
5989  {
5990  _c4dbgp("mapblck[RKEY]: no longer mapblck!");
5991  goto mapblck_finish;
5992  }
5993  }
5994  else
5995  {
5996  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->indentation_gt());
5997  _c4err("invalid indentation");
5998  }
5999  }
6000  //
6001  // now handle the tokens
6002  //
6003  const char first = rem.str[0];
6004  const size_t startline = m_state->pos.line;
6005  const size_t startindent = m_state->line_contents.current_col();
6006  _c4dbgpf("mapblck[RKEY]: '{}'", first);
6007  ScannedScalar sc;
6008  if(first == '\'')
6009  {
6010  _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6011  sc = _scan_scalar_squot();
6012  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6013  _handle_annotations_before_blck_key_scalar();
6014  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6015  addrem_flags(RVAL, RKEY);
6016  if(!_maybe_scan_following_colon())
6017  _c4err("could not find ':' colon after key");
6018  _maybe_skip_whitespace_tokens();
6019  }
6020  else if(first == '"')
6021  {
6022  _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6023  sc = _scan_scalar_dquot();
6024  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6025  _handle_annotations_before_blck_key_scalar();
6026  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6027  addrem_flags(RVAL, RKEY);
6028  if(!_maybe_scan_following_colon())
6029  _c4err("could not find ':' colon after key");
6030  _maybe_skip_whitespace_tokens();
6031  }
6032  // block scalars (| and >) can not be used as keys unless they
6033  // appear in an explicit QMRK scope (ie, after the ? token),
6034  else if(C4_UNLIKELY(first == '|'))
6035  {
6036  _c4err("block literal keys must be enclosed in '?'");
6037  }
6038  else if(C4_UNLIKELY(first == '>'))
6039  {
6040  _c4err("block literal keys must be enclosed in '?'");
6041  }
6042  else if(_scan_scalar_plain_map_blck(&sc))
6043  {
6044  _c4dbgp("mapblck[RKEY]: plain scalar");
6045  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref);
6046  _handle_annotations_before_blck_key_scalar();
6047  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6048  addrem_flags(RVAL, RKEY);
6049  if(!_maybe_scan_following_colon())
6050  _c4err("could not find ':' colon after key");
6051  _maybe_skip_whitespace_tokens();
6052  }
6053  else if(first == '?')
6054  {
6055  _c4dbgp("mapblck[RKEY]: key token!");
6056  addrem_flags(QMRK, RKEY);
6057  _line_progressed(1);
6058  _maybe_skip_whitespace_tokens();
6059  m_was_inside_qmrk = true;
6060  goto mapblck_again;
6061  }
6062  else if(first == ':')
6063  {
6064  _c4dbgp("mapblck[RKEY]: setting empty key");
6065  _handle_annotations_before_blck_key_scalar();
6066  m_evt_handler->set_key_scalar_plain({});
6067  addrem_flags(RVAL, RKEY);
6068  _line_progressed(1);
6069  _maybe_skip_whitespace_tokens();
6070  }
6071  else if(first == '*')
6072  {
6073  csubstr ref = _scan_ref_map();
6074  _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6075  _handle_annotations_before_blck_key_scalar();
6076  m_evt_handler->set_key_ref(ref);
6077  addrem_flags(RVAL, RKEY);
6078  if(!_maybe_scan_following_colon())
6079  _c4err("could not find ':' colon after key");
6080  _maybe_skip_whitespace_tokens();
6081  }
6082  else if(first == '&')
6083  {
6084  csubstr anchor = _scan_anchor();
6085  _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6086  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6087  }
6088  else if(first == '!')
6089  {
6090  csubstr tag = _scan_tag();
6091  _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6092  _add_annotation(&m_pending_tags, tag, startindent, startline);
6093  }
6094  else if(first == '[')
6095  {
6096  // RYML's tree cannot store container keys, but that's
6097  // handled inside the tree handler. Other handlers may be
6098  // able to handle it.
6099  _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6100  addrem_flags(RKCL, RKEY);
6101  _handle_annotations_before_blck_key_scalar();
6102  m_evt_handler->begin_seq_key_flow();
6103  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
6104  _line_progressed(1);
6105  _set_indentation(startindent);
6106  goto mapblck_finish;
6107  }
6108  else if(first == '{')
6109  {
6110  // RYML's tree cannot store container keys, but that's
6111  // handled inside the tree handler. Other handlers may be
6112  // able to handle it.
6113  _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6114  addrem_flags(RKCL, RKEY);
6115  _handle_annotations_before_blck_key_scalar();
6116  m_evt_handler->begin_map_key_flow();
6117  addrem_flags(FLOW|RKEY, BLCK|RKCL);
6118  _line_progressed(1);
6119  _set_indentation(startindent);
6120  goto mapblck_finish;
6121  }
6122  else if(first == '-')
6123  {
6124  _c4dbgp("mapblck[RKEY]: maybe doc?");
6125  if(m_state->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6126  {
6127  _c4dbgp("mapblck[RKEY]: end+start doc");
6128  _start_doc_suddenly();
6129  _line_progressed(3);
6130  _maybe_skip_whitespace_tokens();
6131  goto mapblck_finish;
6132  }
6133  else
6134  {
6135  _c4err("parse error");
6136  }
6137  }
6138  else if(first == '.')
6139  {
6140  _c4dbgp("mapblck[RKEY]: maybe end doc?");
6141  if(m_state->line_contents.indentation == 0 && _is_doc_end_token(rem))
6142  {
6143  _c4dbgp("mapblck[RKEY]: end doc");
6144  _end_doc_suddenly();
6145  _line_progressed(3);
6146  _maybe_skip_whitespace_tokens();
6147  goto mapblck_finish;
6148  }
6149  else
6150  {
6151  _c4err("parse error");
6152  }
6153  }
6155  else if(first == '\t')
6156  {
6157  _c4dbgp("mapblck[RKEY]: skip tabs");
6158  _maybe_skipchars('\t');
6159  })
6160  else
6161  {
6162  _c4err("parse error");
6163  }
6164  }
6165  else if(has_any(RKCL)) // read the key colon
6166  {
6167  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6168  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6169  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6170  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6171  //
6172  // handle indentation
6173  //
6174  if(m_state->at_line_beginning())
6175  {
6176  if(m_state->indentation_eq())
6177  {
6178  _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_state->indref);
6179  _line_progressed(m_state->indref);
6180  rem = m_state->line_contents.rem;
6181  if(!rem.len)
6182  goto mapblck_again;
6183  }
6184  else if(C4_UNLIKELY(m_state->indentation_lt()))
6185  {
6186  _c4err("invalid indentation");
6187  }
6188  }
6189  const char first = rem.str[0];
6190  _c4dbgpf("mapblck[RKCL]: '{}'", first);
6191  if(first == ':')
6192  {
6193  _c4dbgp("mapblck[RKCL]: found the colon");
6194  addrem_flags(RVAL, RKCL);
6195  _line_progressed(1);
6196  _maybe_skip_whitespace_tokens();
6197  }
6198  else if(first == '?')
6199  {
6200  _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
6201  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6202  m_evt_handler->set_val_scalar_plain({});
6203  m_evt_handler->add_sibling();
6204  addrem_flags(QMRK, RKCL);
6205  _line_progressed(1);
6206  _maybe_skip_whitespace_tokens();
6207  }
6208  else if(first == '-')
6209  {
6210  if(m_state->indref == 0 || m_state->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6211  {
6212  _c4dbgp("mapblck[RKCL]: end+start doc");
6213  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6214  _start_doc_suddenly();
6215  _line_progressed(3);
6216  _maybe_skip_whitespace_tokens();
6217  goto mapblck_finish;
6218  }
6219  else
6220  {
6221  _c4err("parse error");
6222  }
6223  }
6224  else if(first == '.')
6225  {
6226  _c4dbgp("mapblck[RKCL]: maybe end doc?");
6227  csubstr rs = rem.sub(1);
6228  if(rs == ".." || rs.begins_with(".. "))
6229  {
6230  _c4dbgp("mapblck[RKCL]: end+start doc");
6231  _end_doc_suddenly();
6232  _line_progressed(3);
6233  goto mapblck_finish;
6234  }
6235  else
6236  {
6237  _c4err("parse error");
6238  }
6239  }
6240  else if(m_was_inside_qmrk)
6241  {
6242  _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_state->indentation_eq());
6243  _c4dbgp("mapblck[RKCL]: missing :");
6244  m_evt_handler->set_val_scalar_plain({});
6245  m_evt_handler->add_sibling();
6246  m_was_inside_qmrk = false;
6247  addrem_flags(RKEY, RKCL);
6248  }
6249  else
6250  {
6251  _c4err("parse error");
6252  }
6253  }
6254  else if(has_any(RVAL))
6255  {
6256  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6257  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6258  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6259  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6260  //
6261  // handle indentation
6262  //
6263  if(m_state->at_line_beginning())
6264  {
6265  _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_state->indref, m_state->line_contents.indentation);
6266  m_state->more_indented = false;
6267  if(m_state->indref == npos)
6268  {
6269  _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6270  _set_indentation(m_state->line_contents.indentation);
6271  _line_progressed(m_state->indref);
6272  rem = m_state->line_contents.rem;
6273  if(!rem.len)
6274  goto mapblck_again;
6275  }
6276  else if(m_state->indentation_eq())
6277  {
6278  _c4dbgp("mapblck[RVAL]: skip indentation!");
6279  _line_progressed(m_state->indref);
6280  rem = m_state->line_contents.rem;
6281  if(!rem.len)
6282  goto mapblck_again;
6283  // TODO: this is valid:
6284  //
6285  // ```yaml
6286  // a:
6287  // b:
6288  // ---
6289  // a:
6290  // b
6291  // ---
6292  // a:
6293  // b: c
6294  // ```
6295  //
6296  // ... but this is not:
6297  //
6298  // ```yaml
6299  // a:
6300  // v
6301  // ---
6302  // a: b: c
6303  // ```
6304  //
6305  // here, we probably need to set a boolean on the state
6306  // to disambiguate between these cases.
6307  }
6308  else if(m_state->indentation_gt())
6309  {
6310  _c4dbgp("mapblck[RVAL]: more indented!");
6311  m_state->more_indented = true;
6312  _line_progressed(m_state->line_contents.indentation);
6313  rem = m_state->line_contents.rem;
6314  if(!rem.len)
6315  goto mapblck_again;
6316  }
6317  else if(m_state->indentation_lt())
6318  {
6319  _c4dbgp("mapblck[RVAL]: smaller indentation!");
6320  _handle_indentation_pop_from_block_map();
6321  if(has_all(RMAP|BLCK))
6322  {
6323  _c4dbgp("mapblck[RVAL]: still mapblck!");
6324  _line_progressed(m_state->line_contents.indentation);
6325  goto mapblck_again;
6326  }
6327  else
6328  {
6329  _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6330  goto mapblck_finish;
6331  }
6332  }
6333  else if(m_state->line_contents.indentation == npos)
6334  {
6335  _c4dbgp("mapblck[RVAL]: empty line!");
6336  _line_progressed(m_state->line_contents.rem.len);
6337  goto mapblck_again;
6338  }
6339  }
6340  //
6341  // now handle the tokens
6342  //
6343  const char first = rem.str[0];
6344  const size_t startline = m_state->pos.line;
6345  const size_t startindent = m_state->line_contents.current_col();
6346  _c4dbgpf("mapblck[RVAL]: '{}'", first);
6347  ScannedScalar sc;
6348  if(first == '\'')
6349  {
6350  _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6351  sc = _scan_scalar_squot();
6352  if(!_maybe_scan_following_colon())
6353  {
6354  _c4dbgp("mapblck[RVAL]: set as val");
6355  _handle_annotations_before_blck_val_scalar();
6356  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6357  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6358  addrem_flags(RNXT, RVAL);
6359  }
6360  else
6361  {
6362  if(startindent != m_state->indref)
6363  {
6364  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6365  _handle_annotations_before_start_mapblck(startline);
6366  addrem_flags(RNXT, RVAL);
6367  m_evt_handler->begin_map_val_block();
6368  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6369  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6370  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6371  _maybe_skip_whitespace_tokens();
6372  _set_indentation(m_state->line_contents.indentation);
6373  // keep the child state on RVAL
6374  addrem_flags(RVAL, RNXT);
6375  }
6376  else
6377  {
6378  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6379  m_evt_handler->set_val_scalar_plain({});
6380  m_evt_handler->add_sibling();
6381  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6382  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6383  // keep going on RVAL
6384  _maybe_skip_whitespace_tokens();
6385  }
6386  }
6387  }
6388  else if(first == '"')
6389  {
6390  _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6391  sc = _scan_scalar_dquot();
6392  if(!_maybe_scan_following_colon())
6393  {
6394  _c4dbgp("mapblck[RVAL]: set as val");
6395  _handle_annotations_before_blck_val_scalar();
6396  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6397  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6398  addrem_flags(RNXT, RVAL);
6399  }
6400  else
6401  {
6402  if(startindent != m_state->indref)
6403  {
6404  _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6405  _handle_annotations_before_start_mapblck(startline);
6406  addrem_flags(RNXT, RVAL);
6407  m_evt_handler->begin_map_val_block();
6408  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6409  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6410  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6411  _maybe_skip_whitespace_tokens();
6412  _set_indentation(m_state->line_contents.indentation);
6413  // keep the child state on RVAL
6414  addrem_flags(RVAL, RNXT);
6415  }
6416  else
6417  {
6418  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6419  m_evt_handler->set_val_scalar_plain({});
6420  m_evt_handler->add_sibling();
6421  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6422  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6423  // keep going on RVAL
6424  _maybe_skip_whitespace_tokens();
6425  }
6426  }
6427  }
6428  // block scalars can only appear as keys when in QMRK scope
6429  // (ie, after ? tokens), so no need to scan following colon
6430  else if(first == '|')
6431  {
6432  _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6433  ScannedBlock sb;
6434  _scan_block(&sb, m_state->indref + 1);
6435  _handle_annotations_before_blck_val_scalar();
6436  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6437  m_evt_handler->set_val_scalar_literal(maybe_filtered);
6438  addrem_flags(RNXT, RVAL);
6439  }
6440  else if(first == '>')
6441  {
6442  _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6443  ScannedBlock sb;
6444  _scan_block(&sb, m_state->indref + 1);
6445  _handle_annotations_before_blck_val_scalar();
6446  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6447  m_evt_handler->set_val_scalar_folded(maybe_filtered);
6448  addrem_flags(RNXT, RVAL);
6449  }
6450  else if(_scan_scalar_plain_map_blck(&sc))
6451  {
6452  _c4dbgp("mapblck[RVAL]: plain scalar.");
6453  if(!_maybe_scan_following_colon())
6454  {
6455  _c4dbgp("mapblck[RVAL]: set as val");
6456  _handle_annotations_before_blck_val_scalar();
6457  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_state->indref); // VAL!
6458  m_evt_handler->set_val_scalar_plain(maybe_filtered);
6459  addrem_flags(RNXT, RVAL);
6460  }
6461  else
6462  {
6463  if(startindent != m_state->indref)
6464  {
6465  _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_state->indref);
6466  addrem_flags(RNXT, RVAL);
6467  _handle_annotations_before_start_mapblck(startline);
6468  m_evt_handler->begin_map_val_block();
6469  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6470  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref); // KEY!
6471  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6472  _maybe_skip_whitespace_tokens();
6473  _set_indentation(m_state->line_contents.indentation);
6474  // keep the child state on RVAL
6475  addrem_flags(RVAL, RNXT);
6476  }
6477  else
6478  {
6479  _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
6480  _handle_annotations_before_blck_val_scalar();
6481  m_evt_handler->set_val_scalar_plain({});
6482  m_evt_handler->add_sibling();
6483  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref); // KEY!
6484  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6485  // keep going on RVAL
6486  _maybe_skip_whitespace_tokens();
6487  }
6488  }
6489  }
6490  else if(first == '-')
6491  {
6492  if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
6493  {
6494  _c4dbgp("mapblck[RVAL]: start val seqblck");
6495  addrem_flags(RNXT, RVAL);
6496  _handle_annotations_before_blck_val_scalar();
6497  m_evt_handler->begin_seq_val_block();
6498  addrem_flags(RSEQ|RVAL, RMAP|RNXT);
6499  _set_indentation(startindent);
6500  _line_progressed(1);
6501  _maybe_skip_whitespace_tokens();
6502  goto mapblck_finish;
6503  }
6504  else if(m_state->indref == 0 || m_state->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6505  {
6506  _c4dbgp("mapblck[RVAL]: end+start doc");
6507  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6508  _start_doc_suddenly();
6509  _line_progressed(3);
6510  _maybe_skip_whitespace_tokens();
6511  goto mapblck_finish;
6512  }
6513  else
6514  {
6515  _c4err("parse error");
6516  }
6517  }
6518  else if(first == '[')
6519  {
6520  _c4dbgp("mapblck[RVAL]: start val seqflow");
6521  addrem_flags(RNXT, RVAL);
6522  _handle_annotations_before_blck_val_scalar();
6523  m_evt_handler->begin_seq_val_flow();
6524  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT|BLCK);
6525  _set_indentation(m_state->indref + 1u);
6526  _line_progressed(1);
6527  goto mapblck_finish;
6528  }
6529  else if(first == '{')
6530  {
6531  _c4dbgp("mapblck[RVAL]: start val mapflow");
6532  addrem_flags(RNXT, RVAL);
6533  _handle_annotations_before_blck_val_scalar();
6534  m_evt_handler->begin_map_val_flow();
6535  addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
6536  m_state->scalar_col = m_state->line_contents.indentation;
6537  _set_indentation(m_state->indref + 1u);
6538  _line_progressed(1);
6539  goto mapblck_finish;
6540  }
6541  else if(first == '*')
6542  {
6543  csubstr ref = _scan_ref_map();
6544  _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6545  if(startindent == m_state->indref)
6546  {
6547  _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
6548  m_evt_handler->set_val_ref(ref);
6549  addrem_flags(RNXT, RVAL);
6550  }
6551  else
6552  {
6553  _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_state->indref);
6554  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_state->indref);
6555  if(_maybe_scan_following_colon())
6556  {
6557  _c4dbgp("mapblck[RVAL]: start child map, block");
6558  addrem_flags(RNXT, RVAL);
6559  _handle_annotations_before_blck_val_scalar();
6560  m_evt_handler->begin_map_val_block();
6561  m_evt_handler->set_key_ref(ref);
6562  _set_indentation(startindent);
6563  // keep going in RVAL
6564  addrem_flags(RVAL, RNXT);
6565  }
6566  else
6567  {
6568  _c4dbgp("mapblck[RVAL]: was val ref");
6569  _handle_annotations_before_blck_val_scalar();
6570  m_evt_handler->set_val_ref(ref);
6571  addrem_flags(RNXT, RVAL);
6572  }
6573  }
6574  _maybe_skip_whitespace_tokens();
6575  }
6576  else if(first == '&')
6577  {
6578  csubstr anchor = _scan_anchor();
6579  _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6580  if(startindent == m_state->indref)
6581  {
6582  _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
6583  m_evt_handler->set_val_scalar_plain({});
6584  m_evt_handler->add_sibling();
6585  addrem_flags(RKEY, RVAL);
6586  }
6587  // we need to buffer the anchors, as there may be two
6588  // consecutive anchors in here
6589  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6590  }
6591  else if(first == '!')
6592  {
6593  csubstr tag = _scan_tag();
6594  _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6595  if(startindent == m_state->indref)
6596  {
6597  _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
6598  _handle_annotations_before_blck_val_scalar();
6599  m_evt_handler->set_val_scalar_plain({});
6600  m_evt_handler->add_sibling();
6601  addrem_flags(RKEY, RVAL);
6602  }
6603  // we need to buffer the tags, as there may be two
6604  // consecutive tags in here
6605  _add_annotation(&m_pending_tags, tag, startindent, startline);
6606  }
6607  else if(first == '?')
6608  {
6609  if(startindent == m_state->indref)
6610  {
6611  _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
6612  _handle_annotations_before_blck_val_scalar();
6613  m_evt_handler->set_val_scalar_plain({});
6614  m_evt_handler->add_sibling();
6615  addrem_flags(QMRK, RVAL);
6616  }
6617  else if(startindent > m_state->indref)
6618  {
6619  _c4dbgp("mapblck[RVAL]: start val mapblck");
6620  addrem_flags(RNXT, RVAL);
6621  _handle_annotations_before_blck_val_scalar();
6622  m_evt_handler->begin_map_val_block();
6623  addrem_flags(QMRK|BLCK, RNXT);
6624  _set_indentation(startindent);
6625  }
6626  else
6627  {
6628  _c4err("parse error");
6629  }
6630  m_was_inside_qmrk = true;
6631  _line_progressed(1);
6632  _maybe_skip_whitespace_tokens();
6633  goto mapblck_again;
6634  }
6635  else if(first == ':')
6636  {
6637  if(startindent == m_state->indref)
6638  {
6639  _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
6640  m_evt_handler->set_val_scalar_plain({});
6641  m_evt_handler->add_sibling();
6642  m_evt_handler->set_key_scalar_plain({});
6643  _line_progressed(1);
6644  _maybe_skip_whitespace_tokens();
6645  goto mapblck_again;
6646  }
6647  else
6648  {
6649  _c4err("parse error");
6650  }
6651  }
6652  else if(first == '.')
6653  {
6654  _c4dbgp("mapblck[RVAL]: maybe doc?");
6655  csubstr rs = rem.sub(1);
6656  if(rs == ".." || rs.begins_with(".. "))
6657  {
6658  _c4dbgp("seqblck[RVAL]: end doc expl");
6659  _end_doc_suddenly();
6660  _line_progressed(3);
6661  _maybe_skip_whitespace_tokens();
6662  goto mapblck_finish;
6663  }
6664  else
6665  {
6666  _c4err("parse error");
6667  }
6668  }
6670  else if(first == '\t')
6671  {
6672  _c4dbgp("mapblck[RVAL]: skip tabs");
6673  _maybe_skipchars('\t');
6674  })
6675  else
6676  {
6677  _c4err("parse error");
6678  }
6679  }
6680  else if(has_any(RNXT))
6681  {
6682  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6683  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6684  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6685  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
6686  //
6687  // handle indentation
6688  //
6689  if(m_state->at_line_beginning())
6690  {
6691  _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_state->indref, m_state->line_contents.indentation);
6692  if(m_state->indentation_eq())
6693  {
6694  _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_state->indref);
6695  _line_progressed(m_state->indref);
6696  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6697  m_evt_handler->add_sibling();
6698  addrem_flags(RKEY, RNXT);
6699  goto mapblck_again;
6700  }
6701  else if(m_state->indentation_lt())
6702  {
6703  _c4dbgp("mapblck[RNXT]: smaller indentation!");
6704  _handle_indentation_pop_from_block_map();
6705  if(has_all(RMAP|BLCK))
6706  {
6707  _line_progressed(m_state->line_contents.indentation);
6708  if(!has_any(RKCL))
6709  {
6710  _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
6711  m_evt_handler->add_sibling();
6712  addrem_flags(RKEY, RNXT);
6713  }
6714  goto mapblck_again;
6715  }
6716  else
6717  {
6718  goto mapblck_finish;
6719  }
6720  }
6721  }
6722  //
6723  // handle tokens
6724  //
6725  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6726  const char first = rem.str[0];
6727  _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
6728  if(first == ':')
6729  {
6730  if(m_state->more_indented)
6731  {
6732  _c4dbgp("mapblck[RNXT]: start child block map");
6733  C4_NOT_IMPLEMENTED();
6734  //m_evt_handler->actually_as_block_map();
6735  _line_progressed(1);
6736  _set_indentation(m_state->scalar_col);
6737  m_state->more_indented = false;
6738  goto mapblck_again;
6739  }
6740  else
6741  {
6742  _c4err("parse error");
6743  }
6744  }
6745  else if(first == ' ')
6746  {
6747  _c4dbgp("mapblck[RNXT]: skip spaces");
6748  _maybe_skip_whitespace_tokens();
6749  }
6750  else
6751  {
6752  _c4err("parse error");
6753  }
6754  }
6755  else if(has_any(QMRK))
6756  {
6757  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
6758  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
6759  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
6760  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
6761  //
6762  // handle indentation
6763  //
6764  if(m_state->at_line_beginning())
6765  {
6766  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_state->line_contents.indentation != npos);
6767  if(m_state->indentation_eq())
6768  {
6769  _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_state->indref);
6770  _line_progressed(m_state->indref);
6771  rem = m_state->line_contents.rem;
6772  if(!rem.len)
6773  goto mapblck_again;
6774  }
6775  else if(m_state->indentation_lt())
6776  {
6777  _c4dbgp("mapblck[QMRK]: smaller indentation!");
6778  _handle_indentation_pop_from_block_map();
6779  _line_progressed(m_state->line_contents.indentation);
6780  if(has_all(RMAP|BLCK))
6781  {
6782  _c4dbgp("mapblck[QMRK]: still mapblck!");
6783  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
6784  rem = m_state->line_contents.rem;
6785  if(!rem.len)
6786  goto mapblck_again;
6787  }
6788  else
6789  {
6790  _c4dbgp("mapblck[QMRK]: no longer mapblck!");
6791  goto mapblck_finish;
6792  }
6793  }
6794  // indentation can be larger in QMRK state
6795  else
6796  {
6797  _c4dbgp("mapblck[QMRK]: larger indentation !");
6798  _line_progressed(m_state->line_contents.indentation);
6799  rem = m_state->line_contents.rem;
6800  if(!rem.len)
6801  goto mapblck_again;
6802  }
6803  }
6804  //
6805  // now handle the tokens
6806  //
6807  const char first = rem.str[0];
6808  const size_t startline = m_state->pos.line;
6809  const size_t startindent = m_state->line_contents.current_col();
6810  _c4dbgpf("mapblck[QMRK]: '{}'", first);
6811  ScannedScalar sc;
6812  if(first == '\'')
6813  {
6814  _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
6815  sc = _scan_scalar_squot();
6816  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6817  if(!_maybe_scan_following_colon())
6818  {
6819  _c4dbgp("mapblck[QMRK]: set as key");
6820  _handle_annotations_before_blck_key_scalar();
6821  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6822  addrem_flags(RKCL, QMRK);
6823  }
6824  else
6825  {
6826  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6827  addrem_flags(RKCL, QMRK);
6828  _handle_annotations_before_start_mapblck_as_key();
6829  m_evt_handler->begin_map_key_block();
6830  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6831  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6832  _maybe_skip_whitespace_tokens();
6833  _set_indentation(startindent);
6834  // keep the child state on RVAL
6835  addrem_flags(RVAL, RKCL|QMRK);
6836  }
6837  }
6838  else if(first == '"')
6839  {
6840  _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
6841  sc = _scan_scalar_dquot();
6842  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6843  if(!_maybe_scan_following_colon())
6844  {
6845  _c4dbgp("mapblck[QMRK]: set as key");
6846  _handle_annotations_before_blck_key_scalar();
6847  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6848  addrem_flags(RKCL, QMRK);
6849  }
6850  else
6851  {
6852  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6853  addrem_flags(RKCL, QMRK);
6854  _handle_annotations_before_start_mapblck_as_key();
6855  m_evt_handler->begin_map_key_block();
6856  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6857  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6858  _maybe_skip_whitespace_tokens();
6859  _set_indentation(startindent);
6860  // keep the child state on RVAL
6861  addrem_flags(RVAL, RKCL|QMRK);
6862  }
6863  }
6864  else if(first == '|')
6865  {
6866  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
6867  ScannedBlock sb;
6868  _scan_block(&sb, m_state->indref + 1);
6869  csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
6870  _handle_annotations_before_blck_key_scalar();
6871  m_evt_handler->set_key_scalar_literal(maybe_filtered);
6872  addrem_flags(RKCL, QMRK);
6873  }
6874  else if(first == '>')
6875  {
6876  _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
6877  ScannedBlock sb;
6878  _scan_block(&sb, m_state->indref + 1);
6879  csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
6880  _handle_annotations_before_blck_key_scalar();
6881  m_evt_handler->set_key_scalar_folded(maybe_filtered);
6882  addrem_flags(RKCL, QMRK);
6883  }
6884  else if(_scan_scalar_plain_map_blck(&sc))
6885  {
6886  _c4dbgp("mapblck[QMRK]: plain scalar");
6887  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_state->indref); // KEY!
6888  if(!_maybe_scan_following_colon())
6889  {
6890  _c4dbgp("mapblck[QMRK]: set as key");
6891  _handle_annotations_before_blck_key_scalar();
6892  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6893  addrem_flags(RKCL, QMRK);
6894  }
6895  else
6896  {
6897  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
6898  addrem_flags(RKCL, QMRK);
6899  _handle_annotations_before_start_mapblck_as_key();
6900  m_evt_handler->begin_map_key_block();
6901  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6902  m_evt_handler->set_key_scalar_plain(maybe_filtered);
6903  _maybe_skip_whitespace_tokens();
6904  _set_indentation(startindent);
6905  // keep the child state on RVAL
6906  addrem_flags(RVAL, RKCL|QMRK);
6907  }
6908  }
6909  else if(first == ':')
6910  {
6911  if(startindent == m_state->indref)
6912  {
6913  _c4dbgp("mapblck[QMRK]: empty key");
6914  addrem_flags(RVAL, QMRK);
6915  _handle_annotations_before_blck_key_scalar();
6916  m_evt_handler->set_key_scalar_plain({});
6917  _line_progressed(1);
6918  _maybe_skip_whitespace_tokens();
6919  }
6920  else
6921  {
6922  _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
6923  addrem_flags(RKCL, QMRK);
6924  _handle_annotations_before_start_mapblck_as_key();
6925  m_evt_handler->begin_map_key_block();
6926  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6927  m_evt_handler->set_key_scalar_plain({});
6928  _line_progressed(1);
6929  _maybe_skip_whitespace_tokens();
6930  _set_indentation(startindent);
6931  // keep the child state on RVAL
6932  addrem_flags(RVAL, RKCL|QMRK);
6933  }
6934  }
6935  else if(first == '*')
6936  {
6937  csubstr ref = _scan_ref_map();
6938  _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
6939  if(!_maybe_scan_following_colon())
6940  {
6941  _c4dbgp("mapblck[QMRK]: set ref as key");
6942  _handle_annotations_before_blck_key_scalar();
6943  m_evt_handler->set_key_ref(ref);
6944  addrem_flags(RKCL, QMRK);
6945  }
6946  else
6947  {
6948  _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
6949  addrem_flags(RKCL, QMRK);
6950  _handle_annotations_before_blck_key_scalar();
6951  m_evt_handler->begin_map_key_block();
6952  m_evt_handler->set_key_ref(ref);
6953  _set_indentation(startindent);
6954  // keep the child state on RVAL
6955  addrem_flags(RVAL, RKCL|QMRK);
6956  }
6957  _maybe_skip_whitespace_tokens();
6958  }
6959  else if(first == '&')
6960  {
6961  csubstr anchor = _scan_anchor();
6962  _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6963  _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6964  }
6965  else if(first == '!')
6966  {
6967  csubstr tag = _scan_tag();
6968  _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
6969  _add_annotation(&m_pending_tags, tag, startindent, startline);
6970  }
6971  else if(first == '-')
6972  {
6973  _c4dbgp("mapblck[QMRK]: maybe doc?");
6974  csubstr rs = rem.sub(1);
6975  if(rs == "--" || rs.begins_with("-- "))
6976  {
6977  _c4dbgp("mapblck[QMRK]: end+start doc");
6978  _start_doc_suddenly();
6979  _line_progressed(3);
6980  }
6981  else
6982  {
6983  _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
6984  addrem_flags(RKCL, RKEY|QMRK);
6985  m_evt_handler->begin_seq_key_block();
6986  addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
6987  _set_indentation(startindent);
6988  _line_progressed(1);
6989  }
6990  _maybe_skip_whitespace_tokens();
6991  goto mapblck_finish;
6992  }
6993  else if(first == '[')
6994  {
6995  _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
6996  addrem_flags(RKCL, RKEY|QMRK);
6997  m_evt_handler->begin_seq_key_flow();
6998  addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
6999  _set_indentation(m_evt_handler->m_parent->indref);
7000  _line_progressed(1);
7001  goto mapblck_finish;
7002  }
7003  else if(first == '{')
7004  {
7005  _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
7006  addrem_flags(RKCL, RKEY|QMRK);
7007  m_evt_handler->begin_map_key_flow();
7008  addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
7009  _set_indentation(m_evt_handler->m_parent->indref);
7010  _line_progressed(1);
7011  goto mapblck_finish;
7012  }
7013  else if(first == '?')
7014  {
7015  _c4dbgp("mapblck[QMRK]: another QMRK '?'");
7016  m_evt_handler->set_key_scalar_plain({});
7017  m_evt_handler->set_val_scalar_plain({});
7018  m_evt_handler->add_sibling();
7019  _line_progressed(1);
7020  }
7021  else if(first == '.')
7022  {
7023  _c4dbgp("mapblck[QMRK]: maybe end doc?");
7024  csubstr rs = rem.sub(1);
7025  if(rs == ".." || rs.begins_with(".. "))
7026  {
7027  _c4dbgp("mapblck[QMRK]: end+start doc");
7028  _end_doc_suddenly();
7029  _line_progressed(3);
7030  goto mapblck_finish;
7031  }
7032  else
7033  {
7034  _c4err("parse error");
7035  }
7036  }
7037  else
7038  {
7039  _c4err("parse error");
7040  }
7041  }
7042 
7043  mapblck_again:
7044  _c4dbgt("mapblck: again", 0);
7045  if(_finished_line())
7046  {
7047  _line_ended();
7048  _scan_line();
7049  if(_finished_file())
7050  {
7051  _c4dbgp("mapblck: file finished!");
7052  _end_map_blck();
7053  goto mapblck_finish;
7054  }
7055  _c4dbgnextline();
7056  }
7057  goto mapblck_start;
7058 
7059  mapblck_finish:
7060  _c4dbgp("mapblck: finish");
7061 }
7062 
7063 
7064 //-----------------------------------------------------------------------------
7065 
7066 template<class EventHandler>
7067 void ParseEngine<EventHandler>::_handle_unk_json()
7068 {
7069  _c4dbgpf("handle_unk_json indref={} target={}", m_state->indref, m_state->node_id);
7070 
7071  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7072  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7073 
7074  _maybe_skip_comment();
7075  csubstr rem = m_state->line_contents.rem;
7076  if(!rem.len)
7077  return;
7078 
7079  size_t pos = rem.first_not_of(" \t");
7080  if(pos)
7081  {
7082  pos = pos != npos ? pos : rem.len;
7083  _c4dbgpf("skipping indentation of {}", pos);
7084  _line_progressed(pos);
7085  rem = m_state->line_contents.rem;
7086  if(!rem.len)
7087  return;
7088  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7089  }
7090 
7091  if(rem.begins_with('['))
7092  {
7093  _c4dbgp("it's a seq");
7094  m_evt_handler->check_trailing_doc_token();
7095  _maybe_begin_doc();
7096  m_evt_handler->begin_seq_val_flow();
7097  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7098  _set_indentation(m_state->line_contents.current_col(rem));
7099  m_doc_empty = false;
7100  _line_progressed(1);
7101  }
7102  else if(rem.begins_with('{'))
7103  {
7104  _c4dbgp("it's a map");
7105  m_evt_handler->check_trailing_doc_token();
7106  _maybe_begin_doc();
7107  m_evt_handler->begin_map_val_flow();
7108  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7109  m_doc_empty = false;
7110  _set_indentation(m_state->line_contents.current_col(rem));
7111  _line_progressed(1);
7112  }
7113  else
7114  {
7115  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7116  _maybe_skip_whitespace_tokens();
7117  csubstr s = m_state->line_contents.rem;
7118  if(!s.len)
7119  return;
7120  const size_t startindent = m_state->line_contents.indentation; // save
7121  const char first = s.str[0];
7122  ScannedScalar sc;
7123  if(first == '"')
7124  {
7125  _c4dbgp("runk_json: scanning double-quoted scalar");
7126  m_evt_handler->check_trailing_doc_token();
7127  _maybe_begin_doc();
7128  add_flags(RDOC);
7129  m_doc_empty = false;
7130  sc = _scan_scalar_dquot();
7131  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7132  if(!_maybe_scan_following_colon())
7133  {
7134  _c4dbgp("runk_json: set as val");
7135  _handle_annotations_before_blck_val_scalar();
7136  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7137  }
7138  else
7139  {
7140  _c4err("parse error");
7141  }
7142  }
7143  else if(_scan_scalar_plain_unk(&sc))
7144  {
7145  _c4dbgp("runk_json: got a plain scalar");
7146  m_evt_handler->check_trailing_doc_token();
7147  _maybe_begin_doc();
7148  add_flags(RDOC);
7149  m_doc_empty = false;
7150  if(!_maybe_scan_following_colon())
7151  {
7152  _c4dbgp("runk_json: set as val");
7153  _handle_annotations_before_blck_val_scalar();
7154  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7155  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7156  }
7157  else
7158  {
7159  _c4err("parse error");
7160  }
7161  }
7162  else
7163  {
7164  _c4err("parse error");
7165  }
7166  }
7167 }
7168 
7169 
7170 //-----------------------------------------------------------------------------
7171 
7172 template<class EventHandler>
7173 void ParseEngine<EventHandler>::_handle_unk()
7174 {
7175  _c4dbgpf("handle_unk indref={} target={}", m_state->indref, m_state->node_id);
7176 
7177  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
7178  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
7179 
7180  _maybe_skip_comment();
7181  csubstr rem = m_state->line_contents.rem;
7182  if(!rem.len)
7183  return;
7184 
7185  size_t pos = rem.first_not_of(" \t");
7186  if(pos)
7187  {
7188  pos = pos != npos ? pos : rem.len;
7189  _c4dbgpf("skipping {} whitespace characters", pos);
7190  _line_progressed(pos);
7191  rem = m_state->line_contents.rem;
7192  if(!rem.len)
7193  return;
7194  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7195  }
7196 
7197  if(m_state->line_contents.indentation == 0u && _at_line_begin())
7198  {
7199  const char first = rem.str[0];
7200  _c4dbgp("rtop: zero indent + at line begin");
7201  if(first == '-')
7202  {
7203  _c4dbgp("rtop: suspecting doc");
7204  if(_is_doc_begin_token(rem))
7205  {
7206  _c4dbgp("rtop: begin doc");
7207  _maybe_end_doc();
7208  _begin2_doc_expl();
7209  _set_indentation(0);
7211  _line_progressed(3u);
7212  _maybe_skip_whitespace_tokens();
7213  return;
7214  }
7215  }
7216  else if(first == '.')
7217  {
7218  _c4dbgp("rtop: suspecting doc end");
7219  if(_is_doc_end_token(rem))
7220  {
7221  _c4dbgp("rtop: end doc");
7222  if(has_any(RDOC))
7223  {
7224  _end2_doc_expl();
7225  }
7226  else
7227  {
7228  _c4dbgp("rtop: ignore end doc");
7229  }
7230  addrem_flags(NDOC|RUNK, RDOC);
7231  _line_progressed(3u);
7232  _maybe_skip_whitespace_tokens();
7233  return;
7234  }
7235  }
7236  else if(first == '%')
7237  {
7238  _c4dbgpf("directive: {}", rem);
7239  if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
7240  _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
7241  _handle_directive(rem);
7242  return;
7243  }
7244  }
7245 
7246  /* no else-if! */
7247  char first = rem.str[0];
7248 
7249  if(first == '[')
7250  {
7251  m_evt_handler->check_trailing_doc_token();
7252  _maybe_begin_doc();
7253  m_doc_empty = false;
7254  const size_t startindent = m_state->line_contents.current_col(rem);
7255  if(C4_LIKELY( ! _annotations_require_key_container()))
7256  {
7257  _c4dbgp("it's a seq, flow");
7258  _handle_annotations_before_blck_val_scalar();
7259  m_evt_handler->begin_seq_val_flow();
7260  addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
7261  _set_indentation(startindent);
7262  }
7263  else
7264  {
7265  _c4dbgp("start new block map, set flow seq as key (!)");
7266  _handle_annotations_before_start_mapblck(m_state->pos.line);
7267  m_evt_handler->begin_map_val_block();
7268  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7269  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_state->pos.line);
7270  m_evt_handler->begin_seq_key_flow();
7271  addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
7272  _set_indentation(startindent);
7273  }
7274  _line_progressed(1);
7275  }
7276  else if(first == '{')
7277  {
7278  m_evt_handler->check_trailing_doc_token();
7279  _maybe_begin_doc();
7280  m_doc_empty = false;
7281  const size_t startindent = m_state->line_contents.current_col(rem);
7282  if(C4_LIKELY( ! _annotations_require_key_container()))
7283  {
7284  _c4dbgp("it's a map, flow");
7285  _handle_annotations_before_blck_val_scalar();
7286  m_evt_handler->begin_map_val_flow();
7287  addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7288  _set_indentation(startindent);
7289  }
7290  else
7291  {
7292  _c4dbgp("start new block map, set flow map as key (!)");
7293  _handle_annotations_before_start_mapblck(m_state->pos.line);
7294  m_evt_handler->begin_map_val_block();
7295  addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
7296  _handle_annotations_and_indentation_after_start_mapblck(startindent, m_state->pos.line);
7297  m_evt_handler->begin_map_key_flow();
7298  addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
7299  _set_indentation(startindent);
7300  }
7301  _line_progressed(1);
7302  }
7303  else if(first == '-' && _is_blck_token(rem))
7304  {
7305  _c4dbgp("it's a seq, block");
7306  m_evt_handler->check_trailing_doc_token();
7307  _maybe_begin_doc();
7308  _handle_annotations_before_blck_val_scalar();
7309  m_evt_handler->begin_seq_val_block();
7310  addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7311  m_doc_empty = false;
7312  _set_indentation(m_state->line_contents.current_col(rem));
7313  _line_progressed(1);
7314  _maybe_skip_whitespace_tokens();
7315  }
7316  else if(first == '?' && _is_blck_token(rem))
7317  {
7318  _c4dbgp("it's a map + this key is complex");
7319  m_evt_handler->check_trailing_doc_token();
7320  _maybe_begin_doc();
7321  _handle_annotations_before_blck_val_scalar();
7322  m_evt_handler->begin_map_val_block();
7323  addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7324  m_doc_empty = false;
7325  m_was_inside_qmrk = true;
7326  _save_indentation();
7327  _line_progressed(1);
7328  _maybe_skip_whitespace_tokens();
7329  }
7330  else if(first == ':' && _is_blck_token(rem))
7331  {
7332  if(m_doc_empty)
7333  {
7334  _c4dbgp("it's a map with an empty key");
7335  m_evt_handler->check_trailing_doc_token();
7336  _maybe_begin_doc();
7337  _handle_annotations_before_blck_val_scalar();
7338  m_evt_handler->begin_map_val_block();
7339  m_evt_handler->set_key_scalar_plain({});
7340  m_doc_empty = false;
7341  _save_indentation();
7342  }
7343  else
7344  {
7345  _c4dbgp("actually prev val is a key!");
7346  size_t prev_indentation = m_state->indref;
7347  m_evt_handler->actually_val_is_first_key_of_new_map_block();
7348  _set_indentation(prev_indentation);
7349  }
7350  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7351  _line_progressed(1);
7352  _maybe_skip_whitespace_tokens();
7353  }
7354  else if(first == '&')
7355  {
7356  csubstr anchor = _scan_anchor();
7357  _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
7358  m_evt_handler->check_trailing_doc_token();
7359  _maybe_begin_doc();
7360  const size_t indentation = m_state->line_contents.current_col(rem);
7361  const size_t line = m_state->pos.line;
7362  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7363  _set_indentation(m_state->line_contents.current_col(rem));
7364  m_doc_empty = false;
7365  }
7366  else if(first == '*')
7367  {
7368  csubstr ref = _scan_ref_map();
7369  _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
7370  m_evt_handler->check_trailing_doc_token();
7371  _maybe_begin_doc();
7372  m_doc_empty = false;
7373  if(!_maybe_scan_following_colon())
7374  {
7375  _c4dbgp("runk: set val ref");
7376  _handle_annotations_before_blck_val_scalar();
7377  m_evt_handler->set_val_ref(ref);
7378  }
7379  else
7380  {
7381  _c4dbgp("runk: start new block map, set ref as key");
7382  const size_t startindent = m_state->line_contents.indentation; // save
7383  const size_t startline = m_state->pos.line; // save
7384  _handle_annotations_before_start_mapblck(startline);
7385  m_evt_handler->begin_map_val_block();
7386  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7387  m_evt_handler->set_key_ref(ref);
7388  _maybe_skip_whitespace_tokens();
7389  _set_indentation(startindent);
7390  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7391  }
7392  }
7393  else if(first == '!')
7394  {
7395  csubstr tag = _scan_tag();
7396  _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7397  // we need to buffer the tags, as there may be two
7398  // consecutive tags in here
7399  const size_t indentation = m_state->line_contents.current_col(rem);
7400  const size_t line = m_state->pos.line;
7401  _add_annotation(&m_pending_tags, tag, indentation, line);
7402  }
7403  else
7404  {
7405  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7406  _maybe_skip_whitespace_tokens();
7407  csubstr s = m_state->line_contents.rem;
7408  if(!s.len)
7409  return;
7410  const size_t startindent = m_state->line_contents.indentation; // save
7411  const size_t startline = m_state->pos.line; // save
7412  first = s.str[0];
7413  ScannedScalar sc;
7414  if(first == '\'')
7415  {
7416  _c4dbgp("runk: scanning single-quoted scalar");
7417  m_evt_handler->check_trailing_doc_token();
7418  _maybe_begin_doc();
7419  add_flags(RDOC);
7420  m_doc_empty = false;
7421  sc = _scan_scalar_squot();
7422  if(!_maybe_scan_following_colon())
7423  {
7424  _c4dbgp("runk: set as val");
7425  _handle_annotations_before_blck_val_scalar();
7426  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7427  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7428  }
7429  else
7430  {
7431  _c4dbgp("runk: start new block map, set scalar as key");
7432  _handle_annotations_before_start_mapblck(startline);
7433  m_evt_handler->begin_map_val_block();
7434  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7435  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7436  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7437  _maybe_skip_whitespace_tokens();
7438  _set_indentation(startindent);
7439  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7440  }
7441  }
7442  else if(first == '"')
7443  {
7444  _c4dbgp("runk: scanning double-quoted scalar");
7445  m_evt_handler->check_trailing_doc_token();
7446  _maybe_begin_doc();
7447  add_flags(RDOC);
7448  m_doc_empty = false;
7449  sc = _scan_scalar_dquot();
7450  if(!_maybe_scan_following_colon())
7451  {
7452  _c4dbgp("runk: set as val");
7453  _handle_annotations_before_blck_val_scalar();
7454  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7455  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7456  }
7457  else
7458  {
7459  _c4dbgp("runk: start new block map, set double-quoted scalar as key");
7460  _handle_annotations_before_start_mapblck(startline);
7461  m_evt_handler->begin_map_val_block();
7462  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7463  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7464  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7465  _maybe_skip_whitespace_tokens();
7466  _set_indentation(startindent);
7467  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7468  }
7469  }
7470  else if(first == '|')
7471  {
7472  _c4dbgp("runk: scanning block-literal scalar");
7473  m_evt_handler->check_trailing_doc_token();
7474  _maybe_begin_doc();
7475  add_flags(RDOC);
7476  m_doc_empty = false;
7477  ScannedBlock sb;
7478  _scan_block(&sb, startindent);
7479  if(C4_LIKELY(!_maybe_scan_following_colon()))
7480  {
7481  _c4dbgp("runk: set as val");
7482  _handle_annotations_before_blck_val_scalar();
7483  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7484  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7485  }
7486  else
7487  {
7488  _c4err("block literal keys must be enclosed in '?'");
7489  }
7490  }
7491  else if(first == '>')
7492  {
7493  _c4dbgp("runk: scanning block-folded scalar");
7494  m_evt_handler->check_trailing_doc_token();
7495  _maybe_begin_doc();
7496  add_flags(RDOC);
7497  m_doc_empty = false;
7498  ScannedBlock sb;
7499  _scan_block(&sb, startindent);
7500  if(C4_LIKELY(!_maybe_scan_following_colon()))
7501  {
7502  _c4dbgp("runk: set as val");
7503  _handle_annotations_before_blck_val_scalar();
7504  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7505  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7506  }
7507  else
7508  {
7509  _c4err("block folded keys must be enclosed in '?'");
7510  }
7511  }
7512  else if(_scan_scalar_plain_unk(&sc))
7513  {
7514  _c4dbgp("runk: got a plain scalar");
7515  m_evt_handler->check_trailing_doc_token();
7516  _maybe_begin_doc();
7517  add_flags(RDOC);
7518  m_doc_empty = false;
7519  if(!_maybe_scan_following_colon())
7520  {
7521  _c4dbgp("runk: set as val");
7522  _handle_annotations_before_blck_val_scalar();
7523  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7524  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7525  }
7526  else
7527  {
7528  _c4dbgp("runk: start new block map, set scalar as key");
7529  _handle_annotations_before_start_mapblck(startline);
7530  m_evt_handler->begin_map_val_block();
7531  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7532  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7533  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7534  _maybe_skip_whitespace_tokens();
7535  _set_indentation(startindent);
7536  addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
7537  }
7538  }
7539  }
7540 }
7541 
7542 
7543 //-----------------------------------------------------------------------------
7544 
7545 template<class EventHandler>
7546 C4_COLD void ParseEngine<EventHandler>::_handle_usty()
7547 {
7548  _c4dbgpf("handle_usty target={}", m_state->indref, m_state->node_id);
7549 
7550  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
7551 
7552  #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7553  if(has_any(RNXT))
7554  {
7555  _c4dbgp("usty[RNXT]: finishing!");
7556  _end_stream();
7557  }
7558  #endif
7559 
7560  _maybe_skip_comment();
7561  csubstr rem = m_state->line_contents.rem;
7562  if(!rem.len)
7563  return;
7564 
7565  size_t pos = rem.first_not_of(" \t");
7566  if(pos)
7567  {
7568  pos = pos != npos ? pos : rem.len;
7569  _c4dbgpf("skipping indentation of {}", pos);
7570  _line_progressed(pos);
7571  rem = m_state->line_contents.rem;
7572  if(!rem.len)
7573  return;
7574  _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
7575  }
7576 
7577  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7578  size_t startindent = m_state->line_contents.indentation; // save
7579  char first = rem.str[0];
7580  if(has_any(RSEQ)) // destination is a sequence
7581  {
7582  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
7583  _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
7584  if(first == '[')
7585  {
7586  _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
7587  add_flags(RNXT);
7588  m_evt_handler->_push();
7589  addrem_flags(FLOW|RVAL, RNXT|USTY);
7590  _set_indentation(startindent);
7591  _line_progressed(1);
7592  _maybe_skip_whitespace_tokens();
7593  }
7594  else if(first == '-' && _is_blck_token(rem))
7595  {
7596  _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
7597  add_flags(RNXT);
7598  m_evt_handler->_push();
7599  addrem_flags(BLCK|RVAL, RNXT|USTY);
7600  _set_indentation(startindent);
7601  _line_progressed(1);
7602  _maybe_skip_whitespace_tokens();
7603  }
7604  else
7605  {
7606  _c4err("can only parse a seq into an existing seq");
7607  }
7608  }
7609  else if(has_any(RMAP)) // destination is a map
7610  {
7611  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7612  _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
7613  if(first == '{')
7614  {
7615  _c4dbgp("usty[RMAP]: it's a flow map. merging it");
7616  add_flags(RNXT);
7617  _handle_annotations_before_blck_val_scalar();
7618  m_evt_handler->_push();
7619  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7620  _set_indentation(startindent);
7621  _line_progressed(1);
7622  _maybe_skip_whitespace_tokens();
7623  }
7624  else if(first == '?' && _is_blck_token(rem))
7625  {
7626  _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
7627  add_flags(RNXT);
7628  _handle_annotations_before_blck_val_scalar();
7629  m_evt_handler->_push();
7630  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7631  m_was_inside_qmrk = true;
7632  _save_indentation();
7633  _line_progressed(1);
7634  _maybe_skip_whitespace_tokens();
7635  }
7636  else if(first == ':' && _is_blck_token(rem))
7637  {
7638  _c4dbgp("usty[RMAP]: it's a map with an empty key");
7639  add_flags(RNXT);
7640  _handle_annotations_before_blck_val_scalar();
7641  m_evt_handler->_push();
7642  m_evt_handler->set_key_scalar_plain({});
7643  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7644  _save_indentation();
7645  _line_progressed(1);
7646  _maybe_skip_whitespace_tokens();
7647  }
7648  else if(rem.begins_with('&'))
7649  {
7650  csubstr anchor = _scan_anchor();
7651  _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7652  const size_t indentation = m_state->line_contents.current_col(rem);
7653  const size_t line = m_state->pos.line;
7654  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7655  _set_indentation(m_state->line_contents.current_col(rem));
7656  }
7657  else if(first == '*')
7658  {
7659  csubstr ref = _scan_ref_map();
7660  _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7661  if(!_maybe_scan_following_colon())
7662  {
7663  _c4err("cannot read a VAL to a map");
7664  }
7665  else
7666  {
7667  _c4dbgp("usty[RMAP]: start new block map, set ref as key");
7668  const size_t startline = m_state->pos.line; // save
7669  add_flags(RNXT);
7670  _handle_annotations_before_start_mapblck(startline);
7671  m_evt_handler->_push();
7672  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7673  m_evt_handler->set_key_ref(ref);
7674  _maybe_skip_whitespace_tokens();
7675  _set_indentation(startindent);
7676  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7677  }
7678  }
7679  else if(first == '!')
7680  {
7681  csubstr tag = _scan_tag();
7682  _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7683  // we need to buffer the tags, as there may be two
7684  // consecutive tags in here
7685  const size_t indentation = m_state->line_contents.current_col(rem);
7686  const size_t line = m_state->pos.line;
7687  _add_annotation(&m_pending_tags, tag, indentation, line);
7688  }
7689  else if(first == '[' || (first == '-' && _is_blck_token(rem)))
7690  {
7691  _c4err("cannot parse a seq into an existing map");
7692  }
7693  else
7694  {
7695  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7696  startindent = m_state->line_contents.indentation; // save
7697  const size_t startline = m_state->pos.line; // save
7698  ScannedScalar sc;
7699  _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7700  if(first == '\'')
7701  {
7702  _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
7703  sc = _scan_scalar_squot();
7704  if(!_maybe_scan_following_colon())
7705  {
7706  _c4err("cannot read a VAL to a map");
7707  }
7708  else
7709  {
7710  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7711  add_flags(RNXT);
7712  _handle_annotations_before_start_mapblck(startline);
7713  m_evt_handler->_push();
7714  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7715  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7716  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7717  _set_indentation(startindent);
7718  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7719  _maybe_skip_whitespace_tokens();
7720  }
7721  }
7722  else if(first == '"')
7723  {
7724  _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
7725  sc = _scan_scalar_dquot();
7726  if(!_maybe_scan_following_colon())
7727  {
7728  _c4err("cannot read a VAL to a map");
7729  }
7730  else
7731  {
7732  _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
7733  add_flags(RNXT);
7734  _handle_annotations_before_start_mapblck(startline);
7735  m_evt_handler->_push();
7736  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7737  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7738  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7739  _set_indentation(startindent);
7740  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7741  _maybe_skip_whitespace_tokens();
7742  }
7743  }
7744  else if(first == '|')
7745  {
7746  _c4err("block literal keys must be enclosed in '?'");
7747  }
7748  else if(first == '>')
7749  {
7750  _c4err("block literal keys must be enclosed in '?'");
7751  }
7752  else if(_scan_scalar_plain_unk(&sc))
7753  {
7754  _c4dbgp("usty[RMAP]: got a plain scalar");
7755  if(!_maybe_scan_following_colon())
7756  {
7757  _c4err("cannot read a VAL to a map");
7758  }
7759  else
7760  {
7761  _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
7762  add_flags(RNXT);
7763  _handle_annotations_before_start_mapblck(startline);
7764  m_evt_handler->_push();
7765  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7766  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7767  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7768  _set_indentation(startindent);
7769  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7770  _maybe_skip_whitespace_tokens();
7771  }
7772  }
7773  else
7774  {
7775  _c4err("parse error");
7776  }
7777  }
7778  }
7779  else // destination is unknown
7780  {
7781  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
7782  _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
7783  if(first == '[')
7784  {
7785  _c4dbgp("usty[UNK]: it's a flow seq");
7786  add_flags(RNXT);
7787  _handle_annotations_before_blck_val_scalar();
7788  m_evt_handler->begin_seq_val_flow();
7789  addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
7790  _set_indentation(startindent);
7791  _line_progressed(1);
7792  _maybe_skip_whitespace_tokens();
7793  }
7794  else if(first == '-' && _is_blck_token(rem))
7795  {
7796  _c4dbgp("usty[UNK]: it's a block seq");
7797  add_flags(RNXT);
7798  _handle_annotations_before_blck_val_scalar();
7799  m_evt_handler->begin_seq_val_block();
7800  addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
7801  _set_indentation(startindent);
7802  _line_progressed(1);
7803  _maybe_skip_whitespace_tokens();
7804  }
7805  else if(first == '{')
7806  {
7807  _c4dbgp("usty[UNK]: it's a flow map");
7808  add_flags(RNXT);
7809  _handle_annotations_before_blck_val_scalar();
7810  m_evt_handler->begin_map_val_flow();
7811  addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
7812  _set_indentation(startindent);
7813  _line_progressed(1);
7814  _maybe_skip_whitespace_tokens();
7815  }
7816  else if(first == '?' && _is_blck_token(rem))
7817  {
7818  _c4dbgp("usty[UNK]: it's a map + this key is complex");
7819  add_flags(RNXT);
7820  _handle_annotations_before_blck_val_scalar();
7821  m_evt_handler->begin_map_val_block();
7822  addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
7823  m_was_inside_qmrk = true;
7824  _save_indentation();
7825  _line_progressed(1);
7826  _maybe_skip_whitespace_tokens();
7827  }
7828  else if(first == ':' && _is_blck_token(rem))
7829  {
7830  _c4dbgp("usty[UNK]: it's a map with an empty key");
7831  add_flags(RNXT);
7832  _handle_annotations_before_blck_val_scalar();
7833  m_evt_handler->begin_map_val_block();
7834  m_evt_handler->set_key_scalar_plain({});
7835  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7836  _save_indentation();
7837  _line_progressed(1);
7838  _maybe_skip_whitespace_tokens();
7839  }
7840  else if(first == '&')
7841  {
7842  csubstr anchor = _scan_anchor();
7843  _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7844  const size_t indentation = m_state->line_contents.current_col(rem);
7845  const size_t line = m_state->pos.line;
7846  _add_annotation(&m_pending_anchors, anchor, indentation, line);
7847  _set_indentation(m_state->line_contents.current_col(rem));
7848  }
7849  else if(first == '*')
7850  {
7851  csubstr ref = _scan_ref_map();
7852  _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
7853  if(!_maybe_scan_following_colon())
7854  {
7855  _c4dbgp("usty[UNK]: set val ref");
7856  _handle_annotations_before_blck_val_scalar();
7857  m_evt_handler->set_val_ref(ref);
7858  }
7859  else
7860  {
7861  _c4dbgp("usty[UNK]: start new block map, set ref as key");
7862  const size_t startline = m_state->pos.line; // save
7863  add_flags(RNXT);
7864  _handle_annotations_before_start_mapblck(startline);
7865  m_evt_handler->begin_map_val_block();
7866  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7867  m_evt_handler->set_key_ref(ref);
7868  _maybe_skip_whitespace_tokens();
7869  _set_indentation(startindent);
7870  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7871  }
7872  }
7873  else if(first == '!')
7874  {
7875  csubstr tag = _scan_tag();
7876  _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
7877  // we need to buffer the tags, as there may be two
7878  // consecutive tags in here
7879  const size_t indentation = m_state->line_contents.current_col(rem);
7880  const size_t line = m_state->pos.line;
7881  _add_annotation(&m_pending_tags, tag, indentation, line);
7882  }
7883  else
7884  {
7885  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
7886  startindent = m_state->line_contents.indentation; // save
7887  const size_t startline = m_state->pos.line; // save
7888  first = rem.str[0];
7889  ScannedScalar sc;
7890  _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
7891  if(first == '\'')
7892  {
7893  _c4dbgp("usty[UNK]: scanning single-quoted scalar");
7894  sc = _scan_scalar_squot();
7895  if(!_maybe_scan_following_colon())
7896  {
7897  _c4dbgp("usty[UNK]: set as val");
7898  _handle_annotations_before_blck_val_scalar();
7899  csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7900  m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7901  _end_stream();
7902  }
7903  else
7904  {
7905  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
7906  add_flags(RNXT);
7907  _handle_annotations_before_start_mapblck(startline);
7908  m_evt_handler->begin_map_val_block();
7909  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7910  csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7911  m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7912  _set_indentation(startindent);
7913  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7914  _maybe_skip_whitespace_tokens();
7915  }
7916  }
7917  else if(first == '"')
7918  {
7919  _c4dbgp("usty[UNK]: scanning double-quoted scalar");
7920  sc = _scan_scalar_dquot();
7921  if(!_maybe_scan_following_colon())
7922  {
7923  _c4dbgp("usty[UNK]: set as val");
7924  _handle_annotations_before_blck_val_scalar();
7925  csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7926  m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7927  _end_stream();
7928  }
7929  else
7930  {
7931  _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
7932  add_flags(RNXT);
7933  _handle_annotations_before_start_mapblck(startline);
7934  m_evt_handler->begin_map_val_block();
7935  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7936  csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7937  m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7938  _set_indentation(startindent);
7939  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7940  _maybe_skip_whitespace_tokens();
7941  }
7942  }
7943  else if(first == '|')
7944  {
7945  _c4dbgp("usty[UNK]: scanning block-literal scalar");
7946  ScannedBlock sb;
7947  _scan_block(&sb, startindent);
7948  _c4dbgp("usty[UNK]: set as val");
7949  _handle_annotations_before_blck_val_scalar();
7950  csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7951  m_evt_handler->set_val_scalar_literal(maybe_filtered);
7952  _end_stream();
7953  }
7954  else if(first == '>')
7955  {
7956  _c4dbgp("usty[UNK]: scanning block-folded scalar");
7957  ScannedBlock sb;
7958  _scan_block(&sb, startindent);
7959  _c4dbgp("usty[UNK]: set as val");
7960  _handle_annotations_before_blck_val_scalar();
7961  csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7962  m_evt_handler->set_val_scalar_folded(maybe_filtered);
7963  _end_stream();
7964  }
7965  else if(_scan_scalar_plain_unk(&sc))
7966  {
7967  _c4dbgp("usty[UNK]: got a plain scalar");
7968  if(!_maybe_scan_following_colon())
7969  {
7970  _c4dbgp("usty[UNK]: set as val");
7971  _handle_annotations_before_blck_val_scalar();
7972  csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7973  m_evt_handler->set_val_scalar_plain(maybe_filtered);
7974  _end_stream();
7975  }
7976  else
7977  {
7978  _c4dbgp("usty[UNK]: start new block map, set scalar as key");
7979  add_flags(RNXT);
7980  _handle_annotations_before_start_mapblck(startline);
7981  m_evt_handler->begin_map_val_block();
7982  _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7983  csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7984  m_evt_handler->set_key_scalar_plain(maybe_filtered);
7985  _set_indentation(startindent);
7986  addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
7987  _maybe_skip_whitespace_tokens();
7988  }
7989  }
7990  else
7991  {
7992  _c4err("parse error");
7993  }
7994  }
7995  }
7996 }
7997 
7998 
7999 //-----------------------------------------------------------------------------
8000 
8001 template<class EventHandler>
8002 void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
8003 {
8004  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8005  m_file = filename;
8006  m_buf = src;
8007  _reset();
8008  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8009  m_evt_handler->begin_stream();
8010  while( ! _finished_file())
8011  {
8012  _scan_line();
8013  while( ! _finished_line())
8014  {
8015  _c4dbgnextline();
8016  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8017  if(has_any(RSEQ))
8018  {
8019  _handle_seq_json();
8020  }
8021  else if(has_any(RMAP))
8022  {
8023  _handle_map_json();
8024  }
8025  else if(has_any(RUNK))
8026  {
8027  _handle_unk_json();
8028  }
8029  else
8030  {
8031  _c4err("internal error");
8032  }
8033  }
8034  if(_finished_file())
8035  break; // it may have finished because of multiline blocks
8036  _line_ended();
8037  }
8038  _end_stream();
8039  m_evt_handler->finish_parse();
8040 }
8041 
8042 
8043 //-----------------------------------------------------------------------------
8044 
8045 template<class EventHandler>
8046 void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
8047 {
8048  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8049  m_file = filename;
8050  m_buf = src;
8051  _reset();
8052  m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
8053  m_evt_handler->begin_stream();
8054  while( ! _finished_file())
8055  {
8056  _scan_line();
8057  while( ! _finished_line())
8058  {
8059  _c4dbgnextline();
8060  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8061  if(has_any(FLOW))
8062  {
8063  if(has_none(RSEQIMAP))
8064  {
8065  if(has_any(RSEQ))
8066  {
8067  _handle_seq_flow();
8068  }
8069  else
8070  {
8071  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8072  _handle_map_flow();
8073  }
8074  }
8075  else
8076  {
8077  _handle_seq_imap();
8078  }
8079  }
8080  else if(has_any(BLCK))
8081  {
8082  if(has_any(RSEQ))
8083  {
8084  _handle_seq_block();
8085  }
8086  else
8087  {
8088  _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
8089  _handle_map_block();
8090  }
8091  }
8092  else if(has_any(RUNK))
8093  {
8094  _handle_unk();
8095  }
8096  else if(has_any(USTY))
8097  {
8098  _handle_usty();
8099  }
8100  else
8101  {
8102  _c4err("internal error");
8103  }
8104  }
8105  if(_finished_file())
8106  break; // it may have finished because of multiline blocks
8107  _line_ended();
8108  }
8109  _end_stream();
8110  m_evt_handler->finish_parse();
8111 }
8112 
8113 } // namespace yml
8114 } // namespace c4
8115 
8116 #undef _set_flags2
8117 #undef _add_flags2
8118 #undef _addrem_flags2
8119 #undef _rem_flags2
8120 
8121 #if defined(_MSC_VER)
8122 # pragma warning(pop)
8123 #elif defined(__clang__)
8124 # pragma clang diagnostic pop
8125 #elif defined(__GNUC__)
8126 # pragma GCC diagnostic pop
8127 #endif
8128 
8129 #endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
Holds a pointer to an existing tree, and a node id.
Definition: node.hpp:836
Tree const * tree() const noexcept
Definition: node.hpp:908
id_type id() const noexcept
Definition: node.hpp:909
bool readable() const noexcept
because a ConstNodeRef cannot be used to write to the tree, readable() has the same meaning as !...
Definition: node.hpp:894
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&)
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
NodeType type(id_type node) const
Definition: tree.hpp:374
id_type prev_sibling(id_type node) const
Definition: tree.hpp:493
bool has_key(id_type node) const
Definition: tree.hpp:405
id_type parent(id_type node) const
Definition: tree.hpp:491
id_type next_sibling(id_type node) const
Definition: tree.hpp:494
csubstr const & key(id_type node) const
Definition: tree.hpp:377
bool has_val(id_type node) const
Definition: tree.hpp:406
csubstr const & val(id_type node) const
Definition: tree.hpp:383
bool is_container(id_type node) const
Definition: tree.hpp:402
#define RYML_ERRMSG_SIZE
size for the error message buffer
Definition: common.hpp:23
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition: common.hpp:48
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
Definition: charconv.hpp:1548
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:893
size_t to_chars(substr buf, uint8_t v) noexcept
Definition: charconv.hpp:2328
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:252
@ npos
a null string position
Definition: common.hpp:266
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
Definition: parse.cpp:132
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:259
Definition: common.cpp:12
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define m_state
#define _c4dbgfbf(...)
#define _c4dbgchomp(...)
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _c4dbgfsq(fmt,...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _c4dbgfdq(...)
#define _RYML_WITH_TAB_TOKENS(...)
#define _c4dbgfws(...)
#define _c4dbgfps(fmt,...)
#define _c4dbgfbl(...)
#define _addrem_flags2(on, off)
#define _c4dbgfb(...)
Filters an input string into a different output string.
a source file position
Definition: common.hpp:296
size_t col
column
Definition: common.hpp:302
size_t line
line
Definition: common.hpp:300
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:298
csubstr name
file name
Definition: common.hpp:304
Options to give to the parser to control its behavior.