1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
5 #include "c4/error.hpp"
11 #include "c4/yml/detail/parser_dbg.hpp"
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
31 #define _c4dbgnextline() \
33 _c4dbgq("\n-----------"); \
34 _c4dbgt("handling line={}, offset={}B", \
35 m_evt_handler->m_curr->pos.line, \
36 m_evt_handler->m_curr->pos.offset); \
41 # pragma warning(push)
42 # pragma warning(disable: 4296)
43 # pragma warning(disable: 4702)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits"
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits"
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
66 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s) noexcept
68 RYML_ASSERT(s.len > 0);
69 RYML_ASSERT(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
73 inline bool _is_doc_begin_token(csubstr s)
75 RYML_ASSERT(s.begins_with(
'-'));
76 RYML_ASSERT(!s.ends_with(
"\n"));
77 RYML_ASSERT(!s.ends_with(
"\r"));
78 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
82 inline bool _is_doc_end_token(csubstr s)
84 RYML_ASSERT(s.begins_with(
'.'));
85 RYML_ASSERT(!s.ends_with(
"\n"));
86 RYML_ASSERT(!s.ends_with(
"\r"));
87 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
91 inline bool _is_doc_token(csubstr s) noexcept
119 return (s.str[1] ==
'-' && s.str[2] ==
'-')
123 return (s.str[1] ==
'.' && s.str[2] ==
'.')
130 inline size_t _is_special_json_scalar(csubstr s)
136 if(s.len >= 5 && s.begins_with(
"false"))
140 if(s.len >= 4 && s.begins_with(
"true"))
144 if(s.len >= 4 && s.begins_with(
"null"))
154 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
156 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
160 inline substr from_next_line(substr rem)
162 size_t nlpos = rem.first_of(
"\r\n");
165 const char nl = rem[nlpos];
166 rem = rem.right_of(nlpos);
169 if(_extend_from_combined_newline(nl, rem.front()))
177 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
179 RYML_ASSERT(r[*i] ==
'\n');
180 size_t numnl_following = 0;
182 for( ; *i < r.len; ++(*i))
184 if(r.str[*i] ==
'\n')
187 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
192 return numnl_following;
197 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
199 RYML_ASSERT(r[*i] ==
'\n');
200 size_t numnl_following = 0;
204 for( ; *i < r.len; ++(*i))
206 if(r.str[*i] ==
'\n')
209 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
217 for( ; *i < r.len; ++(*i))
219 if(r.str[*i] ==
'\n')
223 size_t stop = *i + indentation;
224 for( ; *i < r.len; ++(*i))
226 if(r.str[*i] !=
' ' && r.str[*i] !=
'\r')
228 RYML_ASSERT(*i < stop);
233 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
239 return numnl_following;
249 template<
class EventHandler>
256 template<
class EventHandler>
261 , m_evt_handler(evt_handler)
262 , m_pending_anchors()
264 , m_was_inside_qmrk(false)
267 , m_newline_offsets()
268 , m_newline_offsets_size(0)
269 , m_newline_offsets_capacity(0)
270 , m_newline_offsets_buf()
272 RYML_CHECK(evt_handler);
275 template<
class EventHandler>
277 : m_options(that.m_options)
278 , m_file(that.m_file)
280 , m_evt_handler(that.m_evt_handler)
281 , m_pending_anchors(that.m_pending_anchors)
282 , m_pending_tags(that.m_pending_tags)
283 , m_was_inside_qmrk(
false)
286 , m_newline_offsets(that.m_newline_offsets)
287 , m_newline_offsets_size(that.m_newline_offsets_size)
288 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
289 , m_newline_offsets_buf(that.m_newline_offsets_buf)
294 template<
class EventHandler>
296 : m_options(that.m_options)
297 , m_file(that.m_file)
299 , m_evt_handler(that.m_evt_handler)
300 , m_pending_anchors(that.m_pending_anchors)
301 , m_pending_tags(that.m_pending_tags)
302 , m_was_inside_qmrk(false)
305 , m_newline_offsets()
306 , m_newline_offsets_size()
307 , m_newline_offsets_capacity()
308 , m_newline_offsets_buf()
310 if(that.m_newline_offsets_capacity)
312 _resize_locations(that.m_newline_offsets_capacity);
313 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
314 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
315 m_newline_offsets_size = that.m_newline_offsets_size;
319 template<
class EventHandler>
323 m_options = (that.m_options);
324 m_file = (that.m_file);
325 m_buf = (that.m_buf);
326 m_evt_handler = that.m_evt_handler;
327 m_pending_anchors = that.m_pending_anchors;
328 m_pending_tags = that.m_pending_tags;
329 m_was_inside_qmrk = that.m_was_inside_qmrk;
330 m_doc_empty = that.m_doc_empty;
331 m_encoding = that.m_encoding;
332 m_newline_offsets = (that.m_newline_offsets);
333 m_newline_offsets_size = (that.m_newline_offsets_size);
334 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
335 m_newline_offsets_buf = (that.m_newline_offsets_buf);
340 template<
class EventHandler>
346 m_options = (that.m_options);
347 m_file = (that.m_file);
348 m_buf = (that.m_buf);
349 m_evt_handler = that.m_evt_handler;
350 m_pending_anchors = that.m_pending_anchors;
351 m_pending_tags = that.m_pending_tags;
352 m_was_inside_qmrk = that.m_was_inside_qmrk;
353 m_doc_empty = that.m_doc_empty;
354 m_encoding = that.m_encoding;
355 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
356 _resize_locations(that.m_newline_offsets_capacity);
357 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
358 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
359 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
360 m_newline_offsets_size = that.m_newline_offsets_size;
361 m_newline_offsets_buf = that.m_newline_offsets_buf;
366 template<
class EventHandler>
373 m_pending_anchors = {};
375 m_was_inside_qmrk =
false;
378 m_newline_offsets = {};
379 m_newline_offsets_size = {};
380 m_newline_offsets_capacity = {};
381 m_newline_offsets_buf = {};
384 template<
class EventHandler>
385 void ParseEngine<EventHandler>::_free()
387 if(m_newline_offsets)
389 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
390 m_newline_offsets =
nullptr;
391 m_newline_offsets_size = 0u;
392 m_newline_offsets_capacity = 0u;
393 m_newline_offsets_buf =
nullptr;
400 template<
class EventHandler>
401 void ParseEngine<EventHandler>::_reset()
403 m_pending_anchors = {};
406 m_was_inside_qmrk =
false;
408 if(m_options.locations())
410 _prepare_locations();
417 template<
class EventHandler>
418 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
420 #define _ryml_relocate(s) \
421 if((s).is_sub(prev_arena)) \
423 (s).str = next_arena.str + ((s).str - prev_arena.str); \
427 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
429 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
431 #undef _ryml_relocate
434 template<
class EventHandler>
435 void ParseEngine<EventHandler>::_s_relocate_arena(
void* data, csubstr prev_arena, substr next_arena)
437 ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
443 template<
class EventHandler>
444 template<
class DumpFn>
445 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
447 auto const *
const C4_RESTRICT st = m_evt_handler->m_curr;
448 auto const& lc = st->line_contents;
449 csubstr contents = lc.stripped;
453 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
456 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
457 offs += m_file.len + 1;
459 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
460 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
461 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
462 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
464 size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
465 size_t lastcol = firstcol + lc.rem.len;
466 for(
size_t i = 0; i < offs + firstcol; ++i)
467 std::forward<DumpFn>(dumpfn)(
" ");
468 std::forward<DumpFn>(dumpfn)(
"^");
469 for(
size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
470 std::forward<DumpFn>(dumpfn)(
"~");
471 detail::_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
475 std::forward<DumpFn>(dumpfn)(
"\n");
482 detail::_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
490 template<
class EventHandler>
491 template<
class ...Args>
492 void ParseEngine<EventHandler>::_err(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
495 detail::_SubstrWriter writer(errmsg);
496 auto dumpfn = [&writer](csubstr s){ writer.append(s); };
497 detail::_dump(dumpfn, fmt, args...);
501 m_evt_handler->cancel_parse();
502 m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
508 template<
class EventHandler>
509 template<
class ...Args>
510 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
514 auto dumpfn = [](csubstr s){
if(s.str) fwrite(s.str, 1, s.len, stdout); };
515 detail::_dump(dumpfn, fmt, args...);
524 template<
class EventHandler>
525 bool ParseEngine<EventHandler>::_finished_file()
const
527 bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
530 _c4dbgp(
"finished file!!!");
535 template<
class EventHandler>
536 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line()
const
538 return m_evt_handler->m_curr->line_contents.rem.empty();
544 template<
class EventHandler>
545 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
547 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
553 _c4dbgpf(
"skip {} whitespace characters", pos);
554 _line_progressed(pos);
558 template<
class EventHandler>
559 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
561 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
562 if(rem.len && rem.str[0] == c)
564 size_t pos = rem.first_not_of(c);
567 _c4dbgpf(
"skip {}x'{}'", pos, c);
568 _line_progressed(pos);
572 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
573 template<
class EventHandler>
574 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(
char c,
size_t max_to_skip)
576 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
577 if(rem.len && rem.str[0] == c)
579 size_t pos = rem.first_not_of(c);
582 if(pos > max_to_skip)
584 _c4dbgpf(
"skip {}x'{}'", pos, c);
585 _line_progressed(pos);
590 template<
class EventHandler>
592 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
594 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
595 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
597 pos = m_evt_handler->m_curr->line_contents.rem.len;
598 _c4dbgpf(
"skip {} characters", pos);
599 _line_progressed(pos);
602 template<
class EventHandler>
603 void ParseEngine<EventHandler>::_skip_comment()
605 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'));
606 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
607 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
608 csubstr full = m_evt_handler->m_curr->line_contents.full;
610 if(!full.begins_with(
'#'))
612 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
613 const char c = full[(size_t)(rem.str - full.str - 1)];
614 if(C4_UNLIKELY(c !=
' ' && c !=
'\t'))
615 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"comment not preceded by whitespace");
619 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
621 _c4dbgpf(
"comment was '{}'", rem);
622 _line_progressed(rem.len);
625 template<
class EventHandler>
626 void ParseEngine<EventHandler>::_maybe_skip_comment()
628 csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
629 if(s.begins_with(
'#'))
631 _line_progressed((
size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
636 template<
class EventHandler>
637 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
639 if(m_evt_handler->m_curr->line_contents.rem.len)
641 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
643 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
645 pos = m_evt_handler->m_curr->line_contents.rem.len;
646 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
647 _line_progressed(pos);
649 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
':'))
651 _c4dbgp(
"found ':' colon next");
659 template<
class EventHandler>
660 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
662 if(m_evt_handler->m_curr->line_contents.rem.len)
664 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
666 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
668 pos = m_evt_handler->m_curr->line_contents.rem.len;
669 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
670 _line_progressed(pos);
672 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
','))
674 _c4dbgp(
"found ',' comma next");
685 template<
class EventHandler>
686 csubstr ParseEngine<EventHandler>::_scan_anchor()
688 csubstr s = m_evt_handler->m_curr->line_contents.rem;
689 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'));
690 csubstr anchor = s.range(1, s.first_of(
' '));
691 _line_progressed(1u + anchor.len);
692 _maybe_skipchars(
' ');
696 template<
class EventHandler>
697 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
699 csubstr s = m_evt_handler->m_curr->line_contents.rem;
700 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
701 csubstr ref = s.first(s.first_of(
",] :"));
702 _line_progressed(ref.len);
706 template<
class EventHandler>
707 csubstr ParseEngine<EventHandler>::_scan_ref_map()
709 csubstr s = m_evt_handler->m_curr->line_contents.rem;
710 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
711 csubstr ref = s.first(s.first_of(
",} "));
712 _line_progressed(ref.len);
716 template<
class EventHandler>
717 csubstr ParseEngine<EventHandler>::_scan_tag()
719 csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
720 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
722 if(rem.begins_with(
"!!"))
724 _c4dbgp(
"begins with '!!'");
726 t = rem.left_of(rem.first_of(
" ,"));
728 t = rem.left_of(rem.first_of(
' '));
730 else if(rem.begins_with(
"!<"))
732 _c4dbgp(
"begins with '!<'");
733 t = rem.left_of(rem.first_of(
'>'),
true);
735 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
736 else if(rem.begins_with(
"!h!"))
738 _c4dbgp(
"begins with '!h!'");
739 t = rem.left_of(rem.first_of(
' '));
744 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
745 _c4dbgp(
"begins with '!'");
747 t = rem.left_of(rem.first_of(
" ,"));
749 t = rem.left_of(rem.first_of(
' '));
751 _line_progressed(t.len);
752 _maybe_skip_whitespace_tokens();
759 template<
class EventHandler>
760 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
762 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
778 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
792 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
799 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
821 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
827 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
846 template<
class EventHandler>
847 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
849 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
850 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
851 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP));
852 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
853 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
855 substr s = m_evt_handler->m_curr->line_contents.rem;
856 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
857 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
'\n'));
862 if(!_is_valid_start_scalar_plain_flow(s))
865 _c4dbgp(
"scanning seqflow scalar...");
867 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
868 bool needs_filter =
false;
871 _c4dbgpf(
"scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
872 for(
size_t i = 0; i < s.len; ++i)
874 const char c = s.str[i];
878 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
880 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
886 _c4dbgp(
"at the beginning. no scalar here.");
891 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
896 _c4dbgp(
"found suspicious '#'");
899 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
905 _c4dbgp(
"found suspicious ':'");
908 const char next = s.str[i+1];
909 _c4dbgpf(
"next char is '{}'", _c4prc(next));
912 _c4dbgp(
"map starting!");
913 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
915 _c4dbgp(
"scalar finished!");
921 _c4dbgp(
"at the beginning. no scalar here.");
927 _c4dbgp(
"it's a scalar indeed.");
931 else if(s.len == i+1)
933 _c4dbgp(
"':' at line end. map starting!");
941 _c4err(
"invalid character: '{}'", c);
946 _line_progressed(s.len);
947 if(!_finished_file())
949 _c4dbgp(
"next line!");
955 _c4dbgp(
"file finished!");
958 s = m_evt_handler->m_curr->line_contents.rem;
965 sc->needs_filter = needs_filter;
967 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
972 template<
class EventHandler>
973 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
975 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP));
976 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
977 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP));
978 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
979 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
981 substr s = m_evt_handler->m_curr->line_contents.rem;
982 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
987 if(!_is_valid_start_scalar_plain_flow(s))
990 _c4dbgp(
"scanning scalar...");
992 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
993 bool needs_filter =
false;
996 for(
size_t i = 0; i < s.len; ++i)
998 const char c = s.str[i];
1003 _line_progressed(i);
1004 _c4dbgpf(
"found terminating character: '{}'", c);
1007 if(s.len == i+1 || s.str[i+1] ==
' ' || s.str[i+1] ==
',' || s.str[i+1] ==
'}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] ==
'\t'))
1009 _line_progressed(i);
1010 _c4dbgpf(
"found terminating character: '{}'", c);
1016 _line_progressed(i);
1017 _c4err(
"invalid character: '{}'", c);
1020 _line_progressed(i);
1024 _c4err(
"invalid character: '{}'", c);
1029 _line_progressed(i);
1030 _c4dbgpf(
"found terminating character: '{}'", c);
1038 _c4dbgp(
"next line!");
1039 _line_progressed(s.len);
1040 if(!_finished_file())
1042 _c4dbgp(
"next line!");
1048 _c4dbgp(
"file finished!");
1051 s = m_evt_handler->m_curr->line_contents.rem;
1052 needs_filter =
true;
1058 sc->needs_filter = needs_filter;
1060 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1062 return sc->scalar.len > 0u;
1065 template<
class EventHandler>
1066 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1068 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1069 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1070 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1071 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1073 substr s = m_evt_handler->m_curr->line_contents.rem;
1074 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1079 _c4dbgp(
"scanning scalar...");
1086 _c4dbgp(
"not a scalar.");
1091 const size_t len = _is_special_json_scalar(s);
1094 sc->scalar = s.first(len);
1095 sc->needs_filter =
false;
1096 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1097 _line_progressed(len);
1104 for( ; i < s.len; ++i)
1106 const char c = s.str[i];
1113 _c4dbgpf(
"found terminating character: '{}'", c);
1116 if(!i || s.str[i-1] ==
' ')
1118 _c4dbgpf(
"found terminating character: '{}'", c);
1129 if(C4_LIKELY(i > 0))
1131 _line_progressed(i);
1132 sc->scalar = s.first(i);
1133 sc->needs_filter =
false;
1134 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1141 template<
class EventHandler>
1142 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1144 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1145 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1146 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1147 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1148 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL));
1150 substr s = m_evt_handler->m_curr->line_contents.rem;
1151 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1156 _c4dbgp(
"scanning scalar...");
1159 const size_t len = _is_special_json_scalar(s);
1162 sc->scalar = s.first(len);
1163 sc->needs_filter =
false;
1164 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1165 _line_progressed(len);
1172 for( ; i < s.len; ++i)
1174 const char c = s.str[i];
1181 _c4dbgpf(
"found terminating character: '{}'", c);
1184 if(!i || s.str[i-1] ==
' ')
1186 _c4dbgpf(
"found terminating character: '{}'", c);
1197 if(C4_LIKELY(i > 0))
1199 _line_progressed(i);
1200 sc->scalar = s.first(i);
1201 sc->needs_filter =
false;
1202 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1209 template<
class EventHandler>
1210 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1212 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-');
1213 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1216 template<
class EventHandler>
1217 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1219 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.');
1220 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1223 template<
class EventHandler>
1224 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1226 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1227 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1228 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK|
RUNK|
USTY));
1230 substr s = m_evt_handler->m_curr->line_contents.rem;
1231 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1239 if(_is_blck_token(s))
1243 else if(_is_doc_begin(s))
1245 _c4dbgp(
"token is doc start");
1251 if(_is_blck_token(s))
1264 _c4dbgp(
"token is doc end");
1270 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1272 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1273 const size_t start_line = m_evt_handler->m_curr->pos.line;
1275 bool needs_filter =
false;
1278 _c4dbgpf(
"plain scalar line: [{}]~~~{}~~~", s.len, s);
1279 for(
size_t i = 0; i < s.len; ++i)
1281 const char curr = s.str[i];
1286 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1290 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1291 _line_progressed(i);
1293 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1295 _c4dbgp(
"start line. scalar ends here");
1300 _c4err(
"parse error");
1306 while(j + 1 < s.len && s.str[j+1] ==
':')
1308 _c4dbgp(
"skip colon");
1311 i = j > i ? j-1 : i;
1312 _c4dbgp(
"nothing to see here");
1316 _c4dbgp(
"got suspicious '#'");
1317 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1319 _c4dbgp(
"comment! scalar ends here");
1320 _line_progressed(i);
1325 _c4dbgp(
"nothing to see here");
1330 _line_progressed(s.len);
1331 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1332 next_peeked = next_peeked.trimr(
"\n\r");
1333 const size_t next_indentation = next_peeked.first_not_of(
' ');
1334 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1335 if(next_indentation < indentation)
1337 _c4dbgp(
"smaller indentation! scalar ended");
1340 else if(next_indentation == 0 && next_peeked.len > 0)
1342 const char first = next_peeked.str[0];
1346 next_peeked = next_peeked.trimr(
"\n\r");
1347 _c4dbgpf(
"doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1348 if(_is_doc_begin_token(next_peeked))
1350 _c4dbgp(
"doc begin! scalar ended");
1355 next_peeked = next_peeked.trimr(
"\n\r");
1356 _c4dbgpf(
"doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1357 if(_is_doc_end_token(next_peeked))
1359 _c4dbgp(
"doc end! scalar ended");
1366 _c4dbgp(
"next line!");
1367 if(!_finished_file())
1369 _c4dbgp(
"next line!");
1375 _c4dbgp(
"file finished!");
1378 s = m_evt_handler->m_curr->line_contents.rem;
1379 needs_filter =
true;
1384 sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1385 sc->needs_filter = needs_filter;
1387 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1392 template<
class EventHandler>
1393 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1395 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1396 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1397 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1398 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1399 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1400 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
1401 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1404 template<
class EventHandler>
1405 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1407 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1408 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1409 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1410 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1411 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1412 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1415 template<
class EventHandler>
1416 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1418 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY));
1419 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1425 template<
class EventHandler>
1426 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1430 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1431 if(pos >= m_buf.len)
1435 rem = from_next_line(m_buf.sub(pos));
1440 nlpos = rem.first_of(
"\r\n");
1442 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1443 rem = rem.left_of(nlpos,
true);
1445 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1449 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1455 template<
class EventHandler>
1456 void ParseEngine<EventHandler>::_scan_line()
1458 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1459 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1461 m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1464 template<
class EventHandler>
1465 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1467 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1468 m_evt_handler->m_curr->pos.offset += ahead;
1469 m_evt_handler->m_curr->pos.col += ahead;
1470 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1471 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1474 template<
class EventHandler>
1475 void ParseEngine<EventHandler>::_line_ended()
1477 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1478 m_evt_handler->m_curr->pos.line,
1479 m_evt_handler->m_curr->line_contents.full.len,
1480 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1481 m_evt_handler->m_curr->pos.col, 1);
1482 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1483 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1484 ++m_evt_handler->m_curr->pos.line;
1485 m_evt_handler->m_curr->pos.col = 1;
1488 template<
class EventHandler>
1489 void ParseEngine<EventHandler>::_line_ended_undo()
1491 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1492 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1493 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1494 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1495 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1496 m_evt_handler->m_curr->pos.offset -= delta;
1497 --m_evt_handler->m_curr->pos.line;
1498 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1501 m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1506 template<
class EventHandler>
1507 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
1509 m_evt_handler->m_curr->indref = indentation;
1510 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1513 template<
class EventHandler>
1514 void ParseEngine<EventHandler>::_save_indentation()
1516 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1517 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1518 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1524 template<
class EventHandler>
1525 void ParseEngine<EventHandler>::_end_map_blck()
1527 _c4dbgp(
"mapblck: end");
1530 _c4dbgp(
"mapblck: set missing val");
1531 _handle_annotations_before_blck_val_scalar();
1532 m_evt_handler->set_val_scalar_plain_empty();
1534 else if(has_any(
QMRK))
1536 _c4dbgp(
"mapblck: set missing keyval");
1537 _handle_annotations_before_blck_key_scalar();
1538 m_evt_handler->set_key_scalar_plain_empty();
1539 _handle_annotations_before_blck_val_scalar();
1540 m_evt_handler->set_val_scalar_plain_empty();
1542 m_evt_handler->end_map();
1545 template<
class EventHandler>
1546 void ParseEngine<EventHandler>::_end_seq_blck()
1550 _c4dbgp(
"seqblck: set missing val");
1551 _handle_annotations_before_blck_val_scalar();
1552 m_evt_handler->set_val_scalar_plain_empty();
1554 m_evt_handler->end_seq();
1557 template<
class EventHandler>
1558 void ParseEngine<EventHandler>::_end2_map()
1560 _c4dbgp(
"map: end");
1561 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1568 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1569 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1570 m_evt_handler->_pop();
1574 template<
class EventHandler>
1575 void ParseEngine<EventHandler>::_end2_seq()
1577 _c4dbgp(
"seq: end");
1578 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1585 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1586 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1587 m_evt_handler->_pop();
1591 template<
class EventHandler>
1592 void ParseEngine<EventHandler>::_begin2_doc()
1596 m_evt_handler->begin_doc();
1597 m_evt_handler->m_curr->indref = 0;
1600 template<
class EventHandler>
1601 void ParseEngine<EventHandler>::_begin2_doc_expl()
1605 m_evt_handler->begin_doc_expl();
1606 m_evt_handler->m_curr->indref = 0;
1609 template<
class EventHandler>
1610 void ParseEngine<EventHandler>::_end2_doc()
1612 _c4dbgp(
"doc: end");
1613 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1616 _c4dbgp(
"doc was empty; add empty val");
1617 m_evt_handler->set_val_scalar_plain_empty();
1619 m_evt_handler->end_doc();
1622 template<
class EventHandler>
1623 void ParseEngine<EventHandler>::_end2_doc_expl()
1625 _c4dbgp(
"doc: end");
1628 _c4dbgp(
"doc: no children; add empty val");
1629 m_evt_handler->set_val_scalar_plain_empty();
1631 m_evt_handler->end_doc_expl();
1634 template<
class EventHandler>
1635 void ParseEngine<EventHandler>::_maybe_begin_doc()
1639 _c4dbgp(
"doc must be started");
1643 template<
class EventHandler>
1644 void ParseEngine<EventHandler>::_maybe_end_doc()
1648 _c4dbgp(
"doc must be finished");
1653 template<
class EventHandler>
1654 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1656 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1657 if(m_evt_handler->m_stack[0].flags &
RDOC)
1659 _c4dbgp(
"root is RDOC");
1660 if(m_evt_handler->m_curr->level != 0)
1661 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1663 else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags &
RDOC))
1665 _c4dbgp(
"root is STREAM");
1666 if(m_evt_handler->m_curr->level != 1)
1667 _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1671 _c4err(
"internal error");
1673 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1676 template<
class EventHandler>
1677 void ParseEngine<EventHandler>::_end_doc_suddenly()
1679 _c4dbgp(
"end doc suddenly");
1680 _end_doc_suddenly__pop();
1685 template<
class EventHandler>
1686 void ParseEngine<EventHandler>::_start_doc_suddenly()
1688 _c4dbgp(
"start doc suddenly");
1689 _end_doc_suddenly__pop();
1694 template<
class EventHandler>
1695 void ParseEngine<EventHandler>::_end_stream()
1697 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1699 _c4err(
"missing terminating ]");
1701 _c4err(
"missing terminating }");
1702 if(m_evt_handler->m_stack.size() > 1)
1703 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1710 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1714 m_evt_handler->begin_doc();
1715 _handle_annotations_before_blck_val_scalar();
1716 m_evt_handler->set_val_scalar_plain_empty();
1717 m_evt_handler->end_doc();
1721 m_evt_handler->end_stream();
1725 template<
class EventHandler>
1726 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
1728 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1729 while(m_evt_handler->m_curr != popto)
1733 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1736 else if(has_any(
RMAP))
1738 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1746 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1749 template<
class EventHandler>
1750 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1753 using state_type =
typename EventHandler::state;
1754 state_type
const* popto =
nullptr;
1755 auto &stack = m_evt_handler->m_stack;
1756 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1757 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1758 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1763 for(state_type
const& s : stack)
1764 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1767 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1769 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1770 if(s->indref == ind)
1772 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
1777 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1779 _c4err(
"parse error: incorrect indentation?");
1781 _handle_indentation_pop(popto);
1784 template<
class EventHandler>
1785 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1788 using state_type =
typename EventHandler::state;
1789 auto &stack = m_evt_handler->m_stack;
1790 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1791 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1792 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1793 state_type
const* popto =
nullptr;
1798 for(state_type
const& s : stack)
1799 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1802 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
1804 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1809 else if(s->indref == ind)
1811 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
1812 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
1819 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1820 const size_t first = rem.first_not_of(
' ');
1821 _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first ==
npos);
1822 rem = rem.right_of(first,
true);
1823 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
1824 if(rem.begins_with(
'-') && _is_blck_token(rem))
1826 _c4dbgp(
"parent was indentless seq");
1832 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1834 _c4err(
"parse error: incorrect indentation?");
1836 _handle_indentation_pop(popto);
1841 template<
class EventHandler>
1842 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1848 size_t b = m_evt_handler->m_curr->pos.offset;
1849 substr s = m_buf.sub(b);
1850 if(s.begins_with(
' '))
1853 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1854 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1855 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1857 b = m_evt_handler->m_curr->pos.offset;
1858 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'\''));
1861 _line_progressed(1);
1864 bool needs_filter =
false;
1866 size_t numlines = 1;
1868 while( ! _finished_file())
1870 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1871 bool line_is_blank =
true;
1872 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1873 for(
size_t i = 0; i < line.len; ++i)
1875 const char curr = line.str[i];
1878 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1886 needs_filter =
true;
1890 else if(curr !=
' ')
1892 line_is_blank =
false;
1897 needs_filter = needs_filter
1900 || (_at_line_begin() && line.begins_with(
' '));
1904 _line_progressed(line.len);
1909 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1910 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'\'');
1911 _line_progressed(pos + 1);
1912 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1922 _c4err(
"reached end of file while looking for closing quote");
1926 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1927 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1928 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'\'');
1929 s = s.sub(0, pos-1);
1932 _c4prscalar(
"scanned squoted scalar", s,
true);
1934 return ScannedScalar { s, needs_filter };
1939 template<
class EventHandler>
1940 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1946 size_t b = m_evt_handler->m_curr->pos.offset;
1947 substr s = m_buf.sub(b);
1948 if(s.begins_with(
' '))
1951 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1952 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1953 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1955 b = m_evt_handler->m_curr->pos.offset;
1956 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'"'));
1959 _line_progressed(1);
1962 bool needs_filter =
false;
1964 size_t numlines = 1;
1966 while( ! _finished_file())
1968 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1969 #if defined(__GNUC__) && __GNUC__ == 11
1970 C4_DONT_OPTIMIZE(line);
1972 bool line_is_blank =
true;
1973 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1974 for(
size_t i = 0; i < line.len; ++i)
1976 const char curr = line.str[i];
1978 line_is_blank =
false;
1982 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1983 needs_filter =
true;
1984 if(next ==
'"' || next ==
'\\')
1987 else if(curr ==
'"')
1995 needs_filter = needs_filter
1998 || (_at_line_begin() && line.begins_with(
' '));
2002 _line_progressed(line.len);
2007 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2008 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'"');
2009 _line_progressed(pos + 1);
2010 pos = m_evt_handler->m_curr->pos.offset - b - 1;
2020 _c4err(
"reached end of file looking for closing quote");
2024 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
2025 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'"');
2026 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2027 s = s.sub(0, pos-1);
2030 _c4prscalar(
"scanned dquoted scalar", s,
true);
2032 return ScannedScalar { s, needs_filter };
2037 template<
class EventHandler>
2038 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2040 _c4dbgpf(
"blck: indref={}", indref);
2041 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref !=
npos);
2044 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2045 csubstr trimmed = s.triml(
' ');
2046 if(trimmed.str > s.str)
2048 _c4dbgp(
"skipping whitespace");
2049 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2050 _line_progressed(
static_cast<size_t>(trimmed.str - s.str));
2053 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'));
2055 _c4dbgpf(
"blck: specs=[{}]~~~{}~~~", s.len, s);
2058 BlockChomp_e chomp = CHOMP_CLIP;
2059 size_t indentation =
npos;
2063 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"));
2064 csubstr t = s.sub(1);
2065 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2066 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2067 size_t pos = t.first_of(
"-+");
2068 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2072 chomp = CHOMP_STRIP;
2073 else if(t[pos] ==
'+')
2081 digits = t.left_of(t.first_not_of(
"0123456789"));
2082 if( ! digits.empty())
2084 if(C4_UNLIKELY(digits.len > 1))
2085 _c4err(
"parse error: invalid indentation");
2086 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2087 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2088 _c4err(
"parse error: could not read indentation as decimal");
2089 if(C4_UNLIKELY( ! indentation))
2090 _c4err(
"parse error: null indentation");
2091 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2092 indentation += m_evt_handler->m_curr->indref;
2096 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2099 _line_progressed(s.len);
2104 substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2105 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2113 size_t num_lines = 0;
2114 size_t first = m_evt_handler->m_curr->pos.line;
2115 size_t provisional_indentation =
npos;
2117 while(( ! _finished_file()))
2120 lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2121 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2122 C4_DONT_OPTIMIZE(lc.rem);
2124 _c4dbgpf(
"blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2126 if(indentation !=
npos)
2128 _c4dbgpf(
"blck: indentation={}", indentation);
2130 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2134 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2138 _c4err(
"indentation decreased without any scalar");
2142 else if(indentation == 0)
2144 _c4dbgpf(
"blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2145 if(_is_doc_token(lc.rem))
2147 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2154 const size_t fns = lc.stripped.first_not_of(
' ');
2155 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2159 if(C4_UNLIKELY(lc.stripped.begins_with(
'\t')))
2160 _c4err(
"parse error");
2162 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2163 if(provisional_indentation ==
npos)
2165 if(lc.indentation < indref)
2167 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2168 if(raw_block.len == 0)
2170 _c4dbgp(
"blck: was empty, undo next line");
2175 else if(lc.indentation == m_evt_handler->m_curr->indref)
2179 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2183 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2184 indentation = lc.indentation;
2188 if(lc.indentation >= provisional_indentation)
2190 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2192 indentation = lc.indentation;
2203 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2204 if(provisional_indentation !=
npos)
2206 if(lc.stripped.len >= provisional_indentation)
2208 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2209 provisional_indentation = lc.stripped.len;
2211 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2212 else if(lc.indentation >= provisional_indentation && lc.indentation !=
npos)
2214 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2215 provisional_indentation = lc.indentation;
2221 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2222 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2223 if(provisional_indentation ==
npos)
2225 provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(
RSEQ|
RVAL);
2226 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2228 if(provisional_indentation < indref)
2230 provisional_indentation = indref;
2231 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2237 m_evt_handler->m_curr->line_contents = lc;
2238 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2239 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2240 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2244 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2245 C4_UNUSED(num_lines);
2248 if(indentation ==
npos)
2250 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2251 indentation = provisional_indentation;
2257 _c4prscalar(
"scanned block", raw_block,
true);
2259 sb->scalar = raw_block;
2260 sb->indentation = indentation;
2272 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2274 #define _c4dbgfws(...)
2277 template<
class EventHandler>
2278 template<
class FilterProcessor>
2279 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2281 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2282 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t');
2284 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2285 if(first_pos !=
npos)
2287 const char first_char = proc.src[first_pos];
2288 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2289 if(first_char ==
'\n' || first_char ==
'\r')
2291 _c4dbgfws(
"whitespace is trailing on line",
"");
2292 proc.skip(first_pos - proc.rpos);
2297 _c4dbgfws(
"legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2301 _c4dbgfws(
"whitespace is trailing on line",
"");
2305 template<
class EventHandler>
2306 template<
class FilterProcessor>
2307 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2309 if(!_filter_ws_handle_to_first_non_space(proc))
2311 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2312 proc.copy(proc.src.len - proc.rpos);
2316 template<
class EventHandler>
2317 template<
class FilterProcessor>
2318 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2320 if(!_filter_ws_handle_to_first_non_space(proc))
2322 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2323 proc.skip(proc.src.len - proc.rpos);
2337 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2339 #define _c4dbgfps(fmt, ...)
2342 template<
class EventHandler>
2343 template<
class FilterProcessor>
2344 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2346 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2348 _c4dbgfps(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2349 size_t ii = proc.rpos;
2350 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2353 proc.set(
'\n', numnl_following);
2354 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2358 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2362 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2366 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2373 template<
class EventHandler>
2374 template<
class FilterProcessor>
2375 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2377 _RYML_CB_ASSERT(this->callbacks(), indentation !=
npos);
2378 _c4dbgfps(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2380 while(proc.has_more_chars())
2382 const char curr = proc.curr();
2383 _c4dbgfps(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2388 _c4dbgfps(
"whitespace", curr);
2389 _filter_ws_skip_trailing(proc);
2392 _c4dbgfps(
"newline", curr);
2393 _filter_nl_plain(proc, indentation);
2396 _c4dbgfps(
"carriage return, ignore", curr);
2405 _c4dbgfps(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2407 return proc.result();
2413 template<
class EventHandler>
2416 FilterProcessorSrcDst proc(scalar, dst);
2417 return _filter_plain(proc, indentation);
2420 template<
class EventHandler>
2423 FilterProcessorInplaceEndExtending proc(dst, cap);
2424 return _filter_plain(proc, indentation);
2435 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2437 #define _c4dbgfsq(fmt, ...)
2440 template<
class EventHandler>
2441 template<
class FilterProcessor>
2442 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2444 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2446 _c4dbgfsq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2447 size_t ii = proc.rpos;
2448 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2451 proc.set(
'\n', numnl_following);
2452 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2456 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2460 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2465 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2471 template<
class EventHandler>
2472 template<
class FilterProcessor>
2473 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2475 _c4dbgfsq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2479 while(proc.has_more_chars())
2481 const char curr = proc.curr();
2482 _c4dbgfsq(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2487 _c4dbgfsq(
"whitespace", curr);
2488 _filter_ws_copy_trailing(proc);
2491 _c4dbgfsq(
"newline", curr);
2492 _filter_nl_squoted(proc);
2495 _c4dbgfsq(
"skip cr", curr);
2499 _c4dbgfsq(
"squote", curr);
2500 if(proc.next() ==
'\'')
2502 _c4dbgfsq(
"two consecutive squotes", curr);
2508 _c4err(
"filter error");
2517 _c4dbgfsq(
": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2519 return proc.result();
2524 template<
class EventHandler>
2527 FilterProcessorSrcDst proc(scalar, dst);
2528 return _filter_squoted(proc);
2531 template<
class EventHandler>
2534 FilterProcessorInplaceEndExtending proc(dst, cap);
2535 return _filter_squoted(proc);
2546 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2548 #define _c4dbgfdq(...)
2551 template<
class EventHandler>
2552 template<
class FilterProcessor>
2553 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2555 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2557 _c4dbgfdq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2558 size_t ii = proc.rpos;
2559 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2562 proc.set(
'\n', numnl_following);
2563 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2567 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2571 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2576 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2578 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2580 _c4dbgfdq(
"backslash at [{}]", ii);
2581 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2582 if(next ==
' ' || next ==
'\t')
2584 _c4dbgfdq(
"extend skip to backslash",
"");
2592 template<
class EventHandler>
2593 template<
class FilterProcessor>
2594 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2596 char next = proc.next();
2597 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2600 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2604 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2610 size_t ii = proc.rpos + 2;
2611 for( ; ii < proc.src.len; ++ii)
2614 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2619 proc.skip(ii - proc.rpos);
2621 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2624 proc.translate_esc(next);
2625 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2627 else if(next ==
'\r')
2631 else if(next ==
'n')
2633 proc.translate_esc(
'\n');
2635 else if(next ==
'r')
2637 proc.translate_esc(
'\r');
2639 else if(next ==
't')
2641 proc.translate_esc(
'\t');
2643 else if(next ==
'\\')
2645 proc.translate_esc(
'\\');
2647 else if(next ==
'x')
2649 if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2650 _c4err(
"\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2651 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2652 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2653 uint8_t byteval = {};
2654 if(C4_UNLIKELY(!
read_hex(codepoint, &byteval)))
2655 _c4err(
"failed to read \\x codepoint. scalar pos={}", proc.rpos);
2656 proc.translate_esc_bulk((
const char*)&byteval, 1u, 3u);
2657 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2659 else if(next ==
'u')
2661 if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2662 _c4err(
"\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2664 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2665 uint32_t codepoint_val = {};
2666 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2667 _c4err(
"failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2668 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2669 if(C4_UNLIKELY(numbytes == 0))
2670 _c4err(
"failed to decode code point={}", proc.rpos);
2671 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2672 proc.translate_esc_bulk(readbuf, numbytes, 5u);
2674 else if(next ==
'U')
2676 if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2677 _c4err(
"\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2679 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2680 uint32_t codepoint_val = {};
2681 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2682 _c4err(
"failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2683 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2684 if(C4_UNLIKELY(numbytes == 0))
2685 _c4err(
"failed to decode code point={}", proc.rpos);
2686 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2687 proc.translate_esc_bulk(readbuf, numbytes, 9u);
2690 else if(next ==
'0')
2692 proc.translate_esc(
'\0');
2694 else if(next ==
'b')
2696 proc.translate_esc(
'\b');
2698 else if(next ==
'f')
2700 proc.translate_esc(
'\f');
2702 else if(next ==
'a')
2704 proc.translate_esc(
'\a');
2706 else if(next ==
'v')
2708 proc.translate_esc(
'\v');
2710 else if(next ==
'e')
2712 proc.translate_esc(
'\x1b');
2714 else if(next ==
'_')
2717 const char payload[] = {
2718 _RYML_CHCONST(-0x3e, 0xc2),
2719 _RYML_CHCONST(-0x60, 0xa0),
2721 proc.translate_esc_bulk(payload, 2, 1);
2723 else if(next ==
'N')
2726 const char payload[] = {
2727 _RYML_CHCONST(-0x3e, 0xc2),
2728 _RYML_CHCONST(-0x7b, 0x85),
2730 proc.translate_esc_bulk(payload, 2, 1);
2732 else if(next ==
'L')
2735 const char payload[] = {
2736 _RYML_CHCONST(-0x1e, 0xe2),
2737 _RYML_CHCONST(-0x80, 0x80),
2738 _RYML_CHCONST(-0x58, 0xa8),
2740 proc.translate_esc_extending(payload, 3, 1);
2742 else if(next ==
'P')
2745 const char payload[] = {
2746 _RYML_CHCONST(-0x1e, 0xe2),
2747 _RYML_CHCONST(-0x80, 0x80),
2748 _RYML_CHCONST(-0x57, 0xa9),
2750 proc.translate_esc_extending(payload, 3, 1);
2752 else if(next ==
'\0')
2758 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2760 _c4dbgfdq(
"backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2764 template<
class EventHandler>
2765 template<
class FilterProcessor>
2766 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2768 _c4dbgfdq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2771 while(proc.has_more_chars())
2773 const char curr = proc.curr();
2774 _c4dbgfdq(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2780 _c4dbgfdq(
"whitespace", curr);
2781 _filter_ws_copy_trailing(proc);
2786 _c4dbgfdq(
"newline", curr);
2787 _filter_nl_dquoted(proc);
2792 _c4dbgfdq(
"carriage return, ignore", curr);
2798 _filter_dquoted_backslash(proc);
2808 _c4dbgfdq(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2809 return proc.result();
2815 template<
class EventHandler>
2818 FilterProcessorSrcDst proc(scalar, dst);
2819 return _filter_dquoted(proc);
2822 template<
class EventHandler>
2825 FilterProcessorInplaceMidExtending proc(dst, cap);
2826 return _filter_dquoted(proc);
2835 template<
class EventHandler>
2836 template<
class FilterProcessor>
2837 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation)
2839 _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2840 _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos);
2844 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2846 #define _c4dbgchomp(...)
2854 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
2855 last = proc.rpos + last + size_t(1) + indentation;
2856 _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2858 while((proc.rpos < last) && proc.has_more_chars())
2860 const char curr = proc.curr();
2861 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
2866 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
2869 csubstr at_next_line = proc.rem();
2870 if(at_next_line.begins_with(
' '))
2872 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
2874 size_t first_non_space = at_next_line.first_not_of(
' ');
2875 _c4dbgchomp(
"first_non_space={}", first_non_space);
2876 if(first_non_space ==
npos)
2878 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
2879 first_non_space = at_next_line.len;
2881 if(first_non_space <= indentation)
2883 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
2884 proc.skip(first_non_space);
2888 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
2889 proc.skip(indentation);
2891 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2892 proc.copy(first_non_space - indentation);
2901 _c4err(
"parse error");
2913 bool had_one =
false;
2914 while(proc.has_more_chars())
2916 const char curr = proc.curr();
2917 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
2922 _c4dbgchomp(
"copy newline!", curr);
2930 _c4dbgchomp(
"skip!", curr);
2937 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
2944 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2945 while(proc.has_more_chars())
2947 const char curr = proc.curr();
2948 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
2952 _c4dbgchomp(
"copy newline!", curr);
2957 _c4dbgchomp(
"skip!", curr);
2966 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
2978 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2980 #define _c4dbgfb(...)
2983 template<
class EventHandler>
2984 template<
class FilterProcessor>
2985 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2987 csubstr rem = proc.rem();
2990 size_t first = rem.first_not_of(
' ');
2993 _c4dbgfb(
"{} spaces follow before next nonws character", first);
2994 if(first < indentation)
2996 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3001 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3002 proc.skip(indentation);
3005 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3008 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3012 if(first < indentation)
3014 _c4dbgfb(
"skip everything", first);
3015 proc.skip(proc.src.len - proc.rpos);
3019 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3020 proc.skip(indentation);
3028 template<
class EventHandler>
3029 template<
class FilterProcessor>
3030 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3032 csubstr contents = proc.src.trimr(
" \n\r");
3033 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3036 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3037 if(chomp == CHOMP_KEEP && proc.src.len)
3039 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3040 while(proc.has_more_chars())
3042 const char curr = proc.curr();
3054 return contents.len;
3057 template<
class EventHandler>
3058 template<
class FilterProcessor>
3059 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3061 _c4dbgfb(
"contents_len={}", contents_len);
3063 _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3067 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3068 if(firstnewl !=
npos)
3070 contents_len = firstnewl;
3071 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3075 contents_len = proc.src.len;
3076 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3079 return contents_len;
3091 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3093 #define _c4dbgfbl(...)
3096 template<
class EventHandler>
3097 template<
class FilterProcessor>
3098 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3100 _c4dbgfbl(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3102 size_t contents_len = _handle_all_whitespace(proc, chomp);
3104 return proc.result();
3106 contents_len = _extend_to_chomp(proc, contents_len);
3108 _c4dbgfbl(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3110 _filter_block_indentation(proc, indentation);
3113 while(proc.has_more_chars(contents_len))
3115 const char curr = proc.curr();
3116 _c4dbgfbl(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3121 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3123 _filter_block_indentation(proc, indentation);
3135 _c4dbgfbl(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3137 _filter_chomp(proc, chomp, indentation);
3139 _c4dbgfbl(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3141 return proc.result();
3146 template<
class EventHandler>
3149 FilterProcessorSrcDst proc(scalar, dst);
3150 return _filter_block_literal(proc, indentation, chomp);
3153 template<
class EventHandler>
3156 FilterProcessorInplaceEndExtending proc(scalar, cap);
3157 return _filter_block_literal(proc, indentation, chomp);
3167 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3169 #define _c4dbgfbf(...)
3173 template<
class EventHandler>
3174 template<
class FilterProcessor>
3175 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3177 _filter_block_indentation(proc, indentation);
3178 while(proc.has_more_chars(len))
3180 const char curr = proc.curr();
3181 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3185 _c4dbgfbf(
"newline.", curr);
3187 _filter_block_indentation(proc, indentation);
3195 size_t first = proc.rem().first_not_of(
" \t");
3196 _c4dbgfbf(
"space. first={}", first);
3198 first = proc.rem().len;
3199 _c4dbgfbf(
"... indentation increased to {}", first);
3200 _filter_block_folded_indented_block(proc, indentation, len, first);
3204 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3210 template<
class EventHandler>
3211 template<
class FilterProcessor>
3212 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3217 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3218 wpos_at_first_newl = proc.wpos;
3223 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3224 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl !=
npos);
3225 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ');
3226 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3228 proc.set_at(wpos_at_first_newl,
'\n');
3229 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n');
3232 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3236 return wpos_at_first_newl;
3239 template<
class EventHandler>
3240 template<
class FilterProcessor>
3241 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3243 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
3244 size_t num_newl = 0;
3245 size_t wpos_at_first_newl =
npos;
3246 while(proc.has_more_chars(len))
3248 const char curr = proc.curr();
3249 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3254 _c4dbgfbf(
"newline. sofar={}", num_newl);
3290 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3291 _filter_block_indentation(proc, indentation);
3297 size_t first = proc.rem().first_not_of(
" \t");
3298 _c4dbgfbf(
"space. first={}", first);
3300 first = proc.rem().len;
3301 _c4dbgfbf(
"... indentation increased to {}", first);
3304 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3305 proc.set_at(wpos_at_first_newl,
'\n');
3309 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3312 _filter_block_folded_indented_block(proc, indentation, len, first);
3314 wpos_at_first_newl =
npos;
3321 _c4dbgfbf(
"not space, not newline. stop.", 0);
3328 template<
class EventHandler>
3329 template<
class FilterProcessor>
3330 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3332 _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos));
3333 if(curr_indentation)
3334 proc.copy(curr_indentation);
3335 while(proc.has_more_chars(len))
3337 const char curr = proc.curr();
3338 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3344 _filter_block_indentation(proc, indentation);
3345 csubstr rem = proc.rem();
3346 const size_t first = rem.first_not_of(
' ');
3347 _c4dbgfbf(
"newline. firstns={}", first);
3350 const char c = rem[first];
3351 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3352 if(c ==
'\n' || c ==
'\r')
3358 _c4dbgfbf(
"done with indented block", first);
3362 else if(first !=
npos)
3365 _c4dbgfbf(
"copy all {} spaces", first);
3383 template<
class EventHandler>
3384 template<
class FilterProcessor>
3385 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3387 _c4dbgfbf(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3389 size_t contents_len = _handle_all_whitespace(proc, chomp);
3391 return proc.result();
3393 contents_len = _extend_to_chomp(proc, contents_len);
3395 _c4dbgfbf(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3397 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3400 while(proc.has_more_chars(contents_len))
3402 const char curr = proc.curr();
3403 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3408 _c4dbgfbf(
"found newline", curr);
3409 _filter_block_folded_newlines(proc, indentation, contents_len);
3421 _c4dbgfbf(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3423 _filter_chomp(proc, chomp, indentation);
3425 _c4dbgfbf(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3427 return proc.result();
3432 template<
class EventHandler>
3435 FilterProcessorSrcDst proc(scalar, dst);
3436 return _filter_block_folded(proc, indentation, chomp);
3439 template<
class EventHandler>
3442 FilterProcessorInplaceEndExtending proc(scalar, cap);
3443 return _filter_block_folded(proc, indentation, chomp);
3451 template<
class EventHandler>
3452 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3454 _c4dbgpf(
"filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3455 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3456 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3457 _c4dbgpf(
"filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3463 template<
class EventHandler>
3464 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3466 _c4dbgpf(
"filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3467 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3468 _RYML_CB_ASSERT(this->callbacks(), r.valid());
3469 _c4dbgpf(
"filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3476 template<
class EventHandler>
3477 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3479 _c4dbgpf(
"filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3480 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3481 if(C4_LIKELY(r.valid()))
3483 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3488 const size_t len = r.required_len();
3489 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3490 substr dst = m_evt_handler->alloc_arena(len, &s);
3491 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3492 _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3493 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3494 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3495 _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len);
3496 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3497 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3504 template<
class EventHandler>
3505 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3507 _c4dbgpf(
"filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3508 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3509 if(C4_LIKELY(r.valid()))
3511 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3516 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3517 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3518 FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3519 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3520 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3527 template<
class EventHandler>
3528 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3530 _c4dbgpf(
"filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3531 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3532 if(C4_LIKELY(r.valid()))
3534 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3539 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3540 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3541 FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3542 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3543 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3551 template<
class EventHandler>
3552 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3556 if(m_options.scalar_filtering())
3558 return _filter_scalar_plain(sc.scalar, indentation);
3562 _c4dbgp(
"plain scalar left unfiltered");
3563 m_evt_handler->mark_key_scalar_unfiltered();
3568 _c4dbgp(
"plain scalar doesn't need filtering");
3573 template<
class EventHandler>
3574 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3578 if(m_options.scalar_filtering())
3580 return _filter_scalar_plain(sc.scalar, indentation);
3584 _c4dbgp(
"plain scalar left unfiltered");
3585 m_evt_handler->mark_val_scalar_unfiltered();
3590 _c4dbgp(
"plain scalar doesn't need filtering");
3598 template<
class EventHandler>
3599 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3603 if(m_options.scalar_filtering())
3605 return _filter_scalar_squot(sc.scalar);
3609 _c4dbgp(
"squo key scalar left unfiltered");
3610 m_evt_handler->mark_key_scalar_unfiltered();
3615 _c4dbgp(
"squo key scalar doesn't need filtering");
3620 template<
class EventHandler>
3621 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3625 if(m_options.scalar_filtering())
3627 return _filter_scalar_squot(sc.scalar);
3631 _c4dbgp(
"squo val scalar left unfiltered");
3632 m_evt_handler->mark_val_scalar_unfiltered();
3637 _c4dbgp(
"squo val scalar doesn't need filtering");
3645 template<
class EventHandler>
3646 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3650 if(m_options.scalar_filtering())
3652 return _filter_scalar_dquot(sc.scalar);
3656 _c4dbgp(
"dquo scalar left unfiltered");
3657 m_evt_handler->mark_key_scalar_unfiltered();
3662 _c4dbgp(
"dquo scalar doesn't need filtering");
3667 template<
class EventHandler>
3668 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3672 if(m_options.scalar_filtering())
3674 return _filter_scalar_dquot(sc.scalar);
3678 _c4dbgp(
"dquo scalar left unfiltered");
3679 m_evt_handler->mark_val_scalar_unfiltered();
3684 _c4dbgp(
"dquo scalar doesn't need filtering");
3692 template<
class EventHandler>
3693 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3695 if(m_options.scalar_filtering())
3697 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3701 _c4dbgp(
"literal scalar left unfiltered");
3702 m_evt_handler->mark_key_scalar_unfiltered();
3707 template<
class EventHandler>
3708 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3710 if(m_options.scalar_filtering())
3712 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3716 _c4dbgp(
"literal scalar left unfiltered");
3717 m_evt_handler->mark_val_scalar_unfiltered();
3725 template<
class EventHandler>
3726 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3728 if(m_options.scalar_filtering())
3730 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3734 _c4dbgp(
"folded scalar left unfiltered");
3735 m_evt_handler->mark_key_scalar_unfiltered();
3740 template<
class EventHandler>
3741 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3743 if(m_options.scalar_filtering())
3745 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3749 _c4dbgp(
"folded scalar left unfiltered");
3750 m_evt_handler->mark_val_scalar_unfiltered();
3762 template<
class EventHandler>
3763 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on, ParserState * s)
3765 char buf1_[64], buf2_[64], buf3_[64];
3766 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3767 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3768 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3769 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3773 template<
class EventHandler>
3776 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3777 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3778 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3779 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3780 csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3781 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3786 template<
class EventHandler>
3787 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off, ParserState * s)
3789 char buf1_[64], buf2_[64], buf3_[64];
3790 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3791 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3792 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3793 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3797 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
3800 bool gotone =
false;
3802 #define _prflag(fl) \
3803 if((flags & fl) == (fl)) \
3807 if(pos + 1 < buf.len) \
3811 csubstr fltxt = #fl; \
3812 if(pos + fltxt.len <= buf.len) \
3813 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3843 RYML_CHECK(pos <= buf.len);
3845 return buf.first(pos);
3855 template<
class EventHandler>
3858 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3859 return m_buf.sub(loc.offset);
3862 template<
class EventHandler>
3865 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
3866 return location(*node.tree(), node.id());
3869 template<
class EventHandler>
3874 if(_location_from_node(tree, node, &loc, 0))
3876 return val_location(m_buf.str);
3879 template<
class EventHandler>
3880 bool ParseEngine<EventHandler>::_location_from_node(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc,
id_type level)
const
3882 if(tree.has_key(node))
3884 csubstr k = tree.key(node);
3885 if(C4_LIKELY(k.str !=
nullptr))
3887 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3888 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3889 *loc = val_location(k.str);
3894 if(tree.has_val(node))
3896 csubstr v = tree.val(node);
3897 if(C4_LIKELY(v.str !=
nullptr))
3899 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3900 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3901 *loc = val_location(v.str);
3906 if(tree.is_container(node))
3908 if(_location_from_cont(tree, node, loc))
3912 if(tree.type(node) !=
NOTYPE && level == 0)
3916 const id_type prev = tree.prev_sibling(node);
3919 if(_location_from_node(tree, prev, loc, level+1))
3925 const id_type next = tree.next_sibling(node);
3928 if(_location_from_node(tree, next, loc, level+1))
3934 const id_type parent = tree.parent(node);
3937 if(_location_from_node(tree, parent, loc, level+1))
3946 template<
class EventHandler>
3947 bool ParseEngine<EventHandler>::_location_from_cont(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc)
const
3949 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3950 if(!tree.is_stream(node))
3952 const char *node_start = tree._p(node)->m_val.scalar.str;
3953 if(tree.has_children(node))
3955 id_type child = tree.first_child(node);
3956 if(tree.has_key(child))
3959 csubstr k = tree.key(child);
3960 if(k.str && node_start > k.str)
3964 *loc = val_location(node_start);
3969 *loc = val_location(m_buf.str);
3975 template<
class EventHandler>
3978 if(C4_UNLIKELY(val ==
nullptr))
3979 return {m_file, 0, 0, 0};
3980 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3983 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3984 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3985 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3986 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3987 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
3988 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3990 csubstr src = m_buf;
3991 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
3992 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
3994 using lineptr_type =
size_t const* C4_RESTRICT;
3995 lineptr_type lineptr =
nullptr;
3996 size_t offset = (size_t)(val - src.begin());
4000 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4015 size_t count = m_newline_offsets_size;
4018 lineptr = m_newline_offsets;
4022 it = lineptr + step;
4034 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4035 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4036 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4039 loc.offset = offset;
4040 loc.line = (size_t)(lineptr - m_newline_offsets);
4041 if(lineptr > m_newline_offsets)
4042 loc.col = (offset - *(lineptr-1) - 1u);
4048 template<
class EventHandler>
4049 void ParseEngine<EventHandler>::_prepare_locations()
4051 m_newline_offsets_buf = m_buf;
4052 size_t numnewlines = 1u + m_buf.count(
'\n');
4053 _resize_locations(numnewlines);
4054 m_newline_offsets_size = 0;
4055 for(
size_t i = 0; i < m_buf.len; i++)
4056 if(m_buf[i] ==
'\n')
4057 m_newline_offsets[m_newline_offsets_size++] = i;
4058 m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4059 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4062 template<
class EventHandler>
4063 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4065 if(numnewlines > m_newline_offsets_capacity)
4067 if(m_newline_offsets)
4068 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4069 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4070 m_newline_offsets_capacity = numnewlines;
4074 template<
class EventHandler>
4075 bool ParseEngine<EventHandler>::_locations_dirty()
const
4077 return !m_newline_offsets_size;
4085 template<
class EventHandler>
4086 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4089 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4091 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4093 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4097 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4099 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4100 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4109 template<
class EventHandler>
4110 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4112 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4113 if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4114 _c4err(
"too many annotations");
4115 dst->annotations[dst->num_entries].str = str;
4116 dst->annotations[dst->num_entries].indentation = indentation;
4117 dst->annotations[dst->num_entries].line = line;
4121 template<
class EventHandler>
4122 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4124 dst->num_entries = 0;
4127 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4128 template<
class EventHandler>
4129 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4131 if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4133 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4134 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4135 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4136 size_t to_skip = m_evt_handler->m_curr->indref;
4137 if(m_pending_anchors.num_entries)
4138 to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4139 if(m_pending_tags.num_entries)
4140 to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4141 _c4dbgpf(
"annotations pending, skip indentation up to {}!", to_skip);
4142 _maybe_skipchars_up_to(
' ', to_skip);
4149 template<
class EventHandler>
4150 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4152 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4155 template<
class EventHandler>
4156 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4158 if(!tag.begins_with(
"!<"))
4160 if(C4_UNLIKELY(tag.first_of(
"[]{},") !=
npos))
4161 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4165 if(C4_UNLIKELY(!tag.ends_with(
'>')))
4166 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"malformed tag", m_evt_handler->m_curr->pos);
4170 template<
class EventHandler>
4171 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4173 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4174 if(m_pending_tags.num_entries)
4176 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4177 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4179 _check_tag(m_pending_tags.annotations[0].str);
4180 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4181 _clear_annotations(&m_pending_tags);
4185 _c4err(
"too many tags");
4188 if(m_pending_anchors.num_entries)
4190 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4191 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4193 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4194 _clear_annotations(&m_pending_anchors);
4198 _c4err(
"too many anchors");
4203 template<
class EventHandler>
4204 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4206 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4207 if(m_pending_tags.num_entries)
4209 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4210 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4212 _check_tag(m_pending_tags.annotations[0].str);
4213 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4214 _clear_annotations(&m_pending_tags);
4218 _c4err(
"too many tags");
4221 if(m_pending_anchors.num_entries)
4223 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4224 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4226 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4227 _clear_annotations(&m_pending_anchors);
4231 _c4err(
"too many anchors");
4236 template<
class EventHandler>
4237 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4239 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4240 if(m_pending_tags.num_entries == 2)
4242 _c4dbgp(
"2 tags, setting entry 0");
4243 _check_tag(m_pending_tags.annotations[0].str);
4244 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4246 else if(m_pending_tags.num_entries == 1)
4248 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4249 if(m_pending_tags.annotations[0].line < current_line)
4251 _c4dbgp(
"...tag is for the map. setting it.");
4252 _check_tag(m_pending_tags.annotations[0].str);
4253 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4254 _clear_annotations(&m_pending_tags);
4258 if(m_pending_anchors.num_entries == 2)
4260 _c4dbgp(
"2 anchors, setting entry 0");
4261 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4263 else if(m_pending_anchors.num_entries == 1)
4265 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4266 if(m_pending_anchors.annotations[0].line < current_line)
4268 _c4dbgp(
"...anchor is for the map. setting it.");
4269 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4270 _clear_annotations(&m_pending_anchors);
4275 template<
class EventHandler>
4276 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4278 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4279 if(m_pending_tags.num_entries == 2)
4281 _check_tag(m_pending_tags.annotations[0].str);
4282 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4284 if(m_pending_anchors.num_entries == 2)
4286 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4290 template<
class EventHandler>
4291 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4293 _c4dbgp(
"annotations_after_start_mapblck");
4294 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4295 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4296 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4298 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4299 switch(m_pending_tags.num_entries)
4302 _check_tag(m_pending_tags.annotations[0].str);
4303 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4304 _clear_annotations(&m_pending_tags);
4307 _check_tag(m_pending_tags.annotations[1].str);
4308 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4309 _clear_annotations(&m_pending_tags);
4312 switch(m_pending_anchors.num_entries)
4315 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4316 _clear_annotations(&m_pending_anchors);
4319 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4320 _clear_annotations(&m_pending_anchors);
4324 _set_indentation(key_indentation);
4327 template<
class EventHandler>
4328 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4330 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4332 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4333 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4335 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4336 if(ann.line > curr->line)
4338 else if(ann.indentation < curr->indentation)
4341 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4343 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4344 if(ann.line > curr->line)
4346 else if(ann.indentation < curr->indentation)
4349 return curr->line < val_line ? val_indentation : curr->indentation;
4352 template<
class EventHandler>
4353 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4355 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4356 const size_t pos = rem.find(
'#');
4357 _c4dbgpf(
"handle_directive: pos={} rem={}", pos, rem);
4360 m_evt_handler->add_directive(rem);
4361 _line_progressed(rem.len);
4365 csubstr to_comment = rem.first(pos);
4366 csubstr trimmed = to_comment.trimr(
" \t");
4367 m_evt_handler->add_directive(trimmed);
4368 _line_progressed(pos);
4373 template<
class EventHandler>
4374 bool ParseEngine<EventHandler>::_handle_bom()
4376 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4379 const csubstr rest = rem.sub(1);
4381 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4382 if(rem.begins_with({
"\x00\x00\xfe\xff", 4}) || (rem.begins_with({
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4384 _c4dbgp(
"byte order mark: UTF32BE");
4386 _line_progressed(4);
4389 else if(rem.begins_with(
"\xff\xfe\x00\x00") || (rest.begins_with({
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4391 _c4dbgp(
"byte order mark: UTF32LE");
4393 _line_progressed(4);
4396 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4398 _c4dbgp(
"byte order mark: UTF16BE");
4400 _line_progressed(2);
4403 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4405 _c4dbgp(
"byte order mark: UTF16LE");
4407 _line_progressed(2);
4410 else if(rem.begins_with(
"\xef\xbb\xbf"))
4412 _c4dbgp(
"byte order mark: UTF8");
4414 _line_progressed(3);
4422 template<
class EventHandler>
4423 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4425 if(m_encoding ==
NOBOM)
4427 const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
4428 if(enc ==
UTF8 || is_beginning_of_file)
4431 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4433 else if(enc != m_encoding)
4435 _c4err(
"byte order mark can only be set once");
4442 template<
class EventHandler>
4443 void ParseEngine<EventHandler>::_handle_seq_json()
4446 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4448 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4449 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4450 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4451 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4452 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4454 _handle_flow_skip_whitespace();
4455 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4461 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4462 const char first = rem.str[0];
4463 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4468 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4469 ScannedScalar sc = _scan_scalar_dquot();
4470 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4471 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4477 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4479 m_evt_handler->begin_seq_val_flow();
4481 _line_progressed(1);
4486 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4488 m_evt_handler->begin_map_val_flow();
4490 _line_progressed(1);
4491 goto seqjson_finish;
4495 _c4dbgp(
"seqjson[RVAL]: end!");
4497 m_evt_handler->end_seq();
4498 _line_progressed(1);
4500 goto seqjson_finish;
4506 if(_scan_scalar_seq_json(&sc))
4508 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4509 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4510 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4515 _c4err(
"parse error");
4522 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4523 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4524 const char first = rem.str[0];
4525 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4530 _c4dbgp(
"seqjson[RNXT]: expect next val");
4532 m_evt_handler->add_sibling();
4533 _line_progressed(1);
4538 _c4dbgp(
"seqjson[RNXT]: end!");
4539 m_evt_handler->end_seq();
4540 _line_progressed(1);
4541 goto seqjson_finish;
4544 _c4err(
"parse error");
4549 _c4dbgt(
"seqjson: go again", 0);
4550 if(_finished_line())
4552 if(C4_LIKELY(!_finished_file()))
4560 _c4err(
"missing terminating ]");
4566 _c4dbgp(
"seqjson: finish");
4572 template<
class EventHandler>
4573 void ParseEngine<EventHandler>::_handle_map_json()
4576 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4578 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
4579 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4580 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4581 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT));
4582 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)));
4584 _handle_flow_skip_whitespace();
4585 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4591 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4592 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4593 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4594 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4595 const char first = rem.str[0];
4596 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
4601 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
4602 ScannedScalar sc = _scan_scalar_dquot();
4603 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4604 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4610 _c4dbgp(
"mapjson[RKEY]: end!");
4611 m_evt_handler->end_map();
4612 _line_progressed(1);
4613 goto mapjson_finish;
4616 _c4err(
"parse error");
4619 else if(has_any(
RVAL))
4621 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4622 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4623 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4624 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4625 const char first = rem.str[0];
4626 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4631 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
4632 ScannedScalar sc = _scan_scalar_dquot();
4633 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4634 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4640 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
4642 m_evt_handler->begin_seq_val_flow();
4643 _set_indentation(m_evt_handler->m_parent->indref);
4645 _line_progressed(1);
4646 goto mapjson_finish;
4650 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
4652 m_evt_handler->begin_map_val_flow();
4653 _set_indentation(m_evt_handler->m_parent->indref);
4655 _line_progressed(1);
4662 if(_scan_scalar_map_json(&sc))
4664 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
4665 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4666 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4671 _c4err(
"parse error");
4677 else if(has_any(
RKCL))
4679 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4680 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4681 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4682 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4683 const char first = rem.str[0];
4684 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
4687 _c4dbgp(
"mapjson[RKCL]: found the colon");
4689 _line_progressed(1);
4693 _c4err(
"parse error");
4696 else if(has_any(
RNXT))
4698 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4699 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4700 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4701 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4702 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
4703 if(rem.begins_with(
','))
4705 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
4706 m_evt_handler->add_sibling();
4708 _line_progressed(1);
4710 else if(rem.begins_with(
'}'))
4712 _c4dbgp(
"mapjson[RNXT]: end!");
4713 m_evt_handler->end_map();
4714 _line_progressed(1);
4715 goto mapjson_finish;
4719 _c4err(
"parse error");
4724 _c4dbgt(
"mapjson: go again", 0);
4725 if(_finished_line())
4727 if(C4_LIKELY(!_finished_file()))
4735 _c4err(
"missing terminating }");
4741 _c4dbgp(
"mapjson: finish");
4747 template<
class EventHandler>
4748 void ParseEngine<EventHandler>::_handle_seq_imap()
4751 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4753 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP));
4754 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4755 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL));
4756 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL));
4757 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4759 _handle_flow_skip_whitespace();
4760 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4766 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
4767 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4768 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4769 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4770 const char first = rem.str[0];
4771 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
4775 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
4776 sc = _scan_scalar_squot();
4777 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4778 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4779 m_evt_handler->end_map();
4780 goto seqimap_finish;
4782 else if(first ==
'"')
4784 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
4785 sc = _scan_scalar_dquot();
4786 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4787 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4788 m_evt_handler->end_map();
4789 goto seqimap_finish;
4792 else if(_scan_scalar_plain_map_flow(&sc))
4794 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
4795 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4796 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4797 m_evt_handler->end_map();
4798 goto seqimap_finish;
4800 else if(first ==
'[')
4802 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
4804 m_evt_handler->begin_seq_val_flow();
4806 _set_indentation(m_evt_handler->m_parent->indref);
4807 _line_progressed(1);
4808 goto seqimap_finish;
4810 else if(first ==
'{')
4812 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
4814 m_evt_handler->begin_map_val_flow();
4816 _set_indentation(m_evt_handler->m_parent->indref);
4817 _line_progressed(1);
4818 goto seqimap_finish;
4820 else if(first ==
',' || first ==
']')
4822 _c4dbgp(
"seqimap[RVAL]: finish without val.");
4823 m_evt_handler->set_val_scalar_plain_empty();
4824 m_evt_handler->end_map();
4825 goto seqimap_finish;
4827 else if(first ==
'&')
4829 csubstr anchor = _scan_anchor();
4830 _c4dbgp(
"seqimap[RVAL]: anchor!");
4831 m_evt_handler->set_val_anchor(anchor);
4833 else if(first ==
'*')
4835 csubstr ref = _scan_ref_seq();
4836 _c4dbgp(
"seqimap[RVAL]: ref!");
4837 m_evt_handler->set_val_ref(ref);
4842 _c4err(
"parse error");
4845 else if(has_any(
RNXT))
4847 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4848 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4849 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4850 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4851 const char first = rem.str[0];
4852 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
4853 if(first ==
',' || first ==
']')
4857 _c4dbgp(
"seqimap: done");
4858 m_evt_handler->end_map();
4859 goto seqimap_finish;
4863 _c4err(
"parse error");
4866 else if(has_any(
QMRK))
4868 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
4869 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4870 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4871 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4872 const char first = rem.str[0];
4873 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
4877 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
4878 sc = _scan_scalar_squot();
4879 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4880 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4884 else if(first ==
'"')
4886 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
4887 sc = _scan_scalar_dquot();
4888 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4889 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4894 else if(_scan_scalar_plain_map_flow(&sc))
4896 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
4897 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4898 m_evt_handler->set_key_scalar_plain(maybe_filtered);
4902 else if(first ==
'[')
4904 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
4906 m_evt_handler->begin_seq_key_flow();
4908 _set_indentation(m_evt_handler->m_parent->indref);
4909 _line_progressed(1);
4910 goto seqimap_finish;
4912 else if(first ==
'{')
4914 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
4916 m_evt_handler->begin_map_key_flow();
4918 _set_indentation(m_evt_handler->m_parent->indref);
4919 _line_progressed(1);
4920 goto seqimap_finish;
4922 else if(first ==
',' || first ==
']')
4924 _c4dbgp(
"seqimap[QMRK]: finish without key.");
4925 m_evt_handler->set_key_scalar_plain_empty();
4926 m_evt_handler->set_val_scalar_plain_empty();
4927 m_evt_handler->end_map();
4928 goto seqimap_finish;
4930 else if(first ==
'&')
4932 csubstr anchor = _scan_anchor();
4933 _c4dbgp(
"seqimap[QMRK]: anchor!");
4934 m_evt_handler->set_key_anchor(anchor);
4936 else if(first ==
'*')
4938 csubstr ref = _scan_ref_seq();
4939 _c4dbgp(
"seqimap[QMRK]: ref!");
4940 m_evt_handler->set_key_ref(ref);
4945 _c4err(
"parse error");
4948 else if(has_any(
RKCL))
4950 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4951 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4952 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4953 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL));
4954 const char first = rem.str[0];
4955 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
4958 _c4dbgp(
"seqimap[RKCL]: found ':'");
4960 _line_progressed(1);
4963 else if(first ==
',' || first ==
']')
4965 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
4966 m_evt_handler->set_val_scalar_plain_empty();
4967 m_evt_handler->end_map();
4968 goto seqimap_finish;
4972 _c4err(
"parse error");
4977 _c4dbgt(
"seqimap: go again", 0);
4978 if(_finished_line())
4980 if(C4_LIKELY(!_finished_file()))
4988 _c4err(
"parse error");
4994 _c4dbgp(
"seqimap: finish");
5000 template<
class EventHandler>
5001 void ParseEngine<EventHandler>::_handle_seq_flow()
5004 _c4dbgpf(
"handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5006 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5007 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5008 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
5009 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5010 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
5011 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos);
5013 _handle_flow_skip_whitespace();
5015 if(!m_evt_handler->m_curr->line_contents.rem.len)
5020 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5021 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5025 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5026 sc = _scan_scalar_squot();
5027 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5028 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5031 else if(first ==
'"')
5033 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5034 sc = _scan_scalar_dquot();
5035 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5036 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5040 else if(_scan_scalar_plain_seq_flow(&sc))
5042 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5043 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5044 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5047 else if(first ==
'[')
5049 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5051 m_evt_handler->begin_seq_val_flow();
5052 _set_indentation(m_evt_handler->m_parent->indref);
5054 _line_progressed(1);
5056 else if(first ==
'{')
5058 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5060 m_evt_handler->begin_map_val_flow();
5061 _set_indentation(m_evt_handler->m_parent->indref);
5063 _line_progressed(1);
5064 goto seqflow_finish;
5066 else if(first ==
']')
5068 _c4dbgp(
"seqflow[RVAL]: end!");
5069 _line_progressed(1);
5070 m_evt_handler->end_seq();
5071 goto seqflow_finish;
5073 else if(first ==
'*')
5075 csubstr ref = _scan_ref_seq();
5076 _c4dbgpf(
"seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5077 m_evt_handler->set_val_ref(ref);
5080 else if(first ==
'&')
5082 csubstr anchor = _scan_anchor();
5083 _c4dbgpf(
"seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5084 m_evt_handler->set_val_anchor(anchor);
5085 if(_maybe_scan_following_comma())
5087 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5088 m_evt_handler->set_val_scalar_plain_empty();
5089 m_evt_handler->add_sibling();
5092 else if(first ==
'!')
5094 csubstr tag = _scan_tag();
5095 _c4dbgpf(
"seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5097 m_evt_handler->set_val_tag(tag);
5098 if(_maybe_scan_following_comma())
5100 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5101 m_evt_handler->set_val_scalar_plain_empty();
5102 m_evt_handler->add_sibling();
5105 else if(first ==
':')
5107 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5109 m_evt_handler->begin_map_val_flow();
5110 _set_indentation(m_evt_handler->m_parent->indref);
5111 m_evt_handler->set_key_scalar_plain_empty();
5113 _line_progressed(1);
5114 goto seqflow_finish;
5116 else if(first ==
'?')
5118 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5120 m_was_inside_qmrk =
true;
5121 m_evt_handler->begin_map_val_flow();
5122 _set_indentation(m_evt_handler->m_parent->indref);
5124 _line_progressed(1);
5125 _maybe_skip_whitespace_tokens();
5126 goto seqflow_finish;
5130 _c4err(
"parse error");
5135 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5136 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5137 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5140 _c4dbgp(
"seqflow[RNXT]: expect next val");
5142 m_evt_handler->add_sibling();
5143 _line_progressed(1);
5145 else if(first ==
']')
5147 _c4dbgp(
"seqflow[RNXT]: end!");
5148 m_evt_handler->end_seq();
5149 _line_progressed(1);
5150 goto seqflow_finish;
5152 else if(first ==
':')
5154 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5155 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5156 _set_indentation(m_evt_handler->m_parent->indref);
5157 _line_progressed(1);
5159 goto seqflow_finish;
5163 _c4err(
"parse error");
5168 _c4dbgt(
"seqflow: go again", 0);
5169 if(_finished_line())
5171 if(C4_LIKELY(!_finished_file()))
5179 _c4err(
"missing terminating ]");
5185 _c4dbgp(
"seqflow: finish");
5191 template<
class EventHandler>
5192 void ParseEngine<EventHandler>::_handle_map_flow()
5195 _c4dbgpf(
"handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5197 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5198 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
5200 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5202 _handle_flow_skip_whitespace();
5203 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5209 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5210 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5211 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5212 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5213 const char first = rem.str[0];
5214 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5218 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5219 sc = _scan_scalar_squot();
5220 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5221 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5224 else if(first ==
'"')
5226 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5227 sc = _scan_scalar_dquot();
5228 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5229 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5233 else if(_scan_scalar_plain_map_flow(&sc))
5235 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5236 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5237 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5240 else if(first ==
'?')
5242 _c4dbgp(
"mapflow[RKEY]: explicit key");
5243 _line_progressed(1);
5245 _maybe_skip_whitespace_tokens();
5247 else if(first ==
':')
5249 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5250 m_evt_handler->set_key_scalar_plain_empty();
5252 _line_progressed(1);
5253 _maybe_skip_whitespace_tokens();
5255 else if(first ==
',')
5257 _c4dbgp(
"mapflow[RKEY]: empty key+val!");
5258 m_evt_handler->set_key_scalar_plain_empty();
5259 m_evt_handler->set_val_scalar_plain_empty();
5263 else if(first ==
'}')
5265 _c4dbgp(
"mapflow[RKEY]: end!");
5266 m_evt_handler->end_map();
5267 _line_progressed(1);
5268 goto mapflow_finish;
5270 else if(first ==
'&')
5272 csubstr anchor = _scan_anchor();
5273 _c4dbgpf(
"mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5274 m_evt_handler->set_key_anchor(anchor);
5276 else if(first ==
'*')
5278 csubstr ref = _scan_ref_map();
5279 _c4dbgpf(
"mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5280 m_evt_handler->set_key_ref(ref);
5283 else if(first ==
'[')
5288 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5290 m_evt_handler->begin_seq_key_flow();
5292 _set_indentation(m_evt_handler->m_parent->indref);
5293 _line_progressed(1);
5294 goto mapflow_finish;
5296 else if(first ==
'{')
5301 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5303 m_evt_handler->begin_map_key_flow();
5305 _set_indentation(m_evt_handler->m_parent->indref);
5306 _line_progressed(1);
5309 else if(first ==
'!')
5311 csubstr tag = _scan_tag();
5312 _c4dbgpf(
"mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5314 m_evt_handler->set_key_tag(tag);
5318 _c4err(
"parse error");
5321 else if(has_any(
RKCL))
5323 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5324 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5325 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5326 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5327 const char first = rem.str[0];
5328 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5331 _c4dbgp(
"mapflow[RKCL]: found the colon");
5333 _line_progressed(1);
5335 else if(first ==
'}')
5337 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5339 m_evt_handler->set_val_scalar_plain_empty();
5340 m_evt_handler->end_map();
5341 _line_progressed(1);
5342 goto mapflow_finish;
5344 else if(first ==
',')
5346 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5347 m_evt_handler->set_val_scalar_plain_empty();
5348 m_evt_handler->add_sibling();
5350 _line_progressed(1);
5354 _c4err(
"parse error");
5357 else if(has_any(
RVAL))
5359 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5360 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5361 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5362 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5363 const char first = rem.str[0];
5364 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5368 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5369 sc = _scan_scalar_squot();
5370 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5371 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5374 else if(first ==
'"')
5376 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5377 sc = _scan_scalar_dquot();
5378 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5379 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5383 else if(_scan_scalar_plain_map_flow(&sc))
5385 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5386 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5387 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5390 else if(first ==
'[')
5392 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5394 m_evt_handler->begin_seq_val_flow();
5395 _set_indentation(m_evt_handler->m_parent->indref);
5397 _line_progressed(1);
5398 goto mapflow_finish;
5400 else if(first ==
'{')
5402 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5404 m_evt_handler->begin_map_val_flow();
5405 _set_indentation(m_evt_handler->m_parent->indref);
5407 _line_progressed(1);
5410 else if(first ==
'}')
5412 _c4dbgp(
"mapflow[RVAL]: end!");
5413 m_evt_handler->set_val_scalar_plain_empty();
5414 m_evt_handler->end_map();
5415 _line_progressed(1);
5416 goto mapflow_finish;
5418 else if(first ==
',')
5420 _c4dbgp(
"mapflow[RVAL]: empty val!");
5421 m_evt_handler->set_val_scalar_plain_empty();
5425 else if(first ==
'*')
5427 csubstr ref = _scan_ref_map();
5428 _c4dbgpf(
"mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5429 m_evt_handler->set_val_ref(ref);
5432 else if(first ==
'&')
5434 csubstr anchor = _scan_anchor();
5435 _c4dbgpf(
"mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5436 m_evt_handler->set_val_anchor(anchor);
5438 else if(first ==
'!')
5440 csubstr tag = _scan_tag();
5441 _c4dbgpf(
"mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5443 m_evt_handler->set_val_tag(tag);
5447 _c4err(
"parse error");
5450 else if(has_any(
RNXT))
5452 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5453 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5454 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5455 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5456 _c4dbgpf(
"mapflow[RNXT]: '{}'", rem.str[0]);
5457 if(rem.begins_with(
','))
5459 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5460 m_evt_handler->add_sibling();
5462 _line_progressed(1);
5464 else if(rem.begins_with(
'}'))
5466 _c4dbgp(
"mapflow[RNXT]: end!");
5467 m_evt_handler->end_map();
5468 _line_progressed(1);
5469 goto mapflow_finish;
5473 _c4err(
"parse error");
5476 else if(has_any(
QMRK))
5478 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5479 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5480 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5481 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5482 const char first = rem.str[0];
5483 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5487 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5488 sc = _scan_scalar_squot();
5489 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5490 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5493 else if(first ==
'"')
5495 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
5496 sc = _scan_scalar_dquot();
5497 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5498 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5502 else if(_scan_scalar_plain_map_flow(&sc))
5504 _c4dbgp(
"mapflow[QMRK]: plain scalar");
5505 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5506 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5509 else if(first ==
':')
5511 _c4dbgp(
"mapflow[QMRK]: setting empty key");
5512 m_evt_handler->set_key_scalar_plain_empty();
5514 _line_progressed(1);
5515 _maybe_skip_whitespace_tokens();
5517 else if(first ==
'}')
5519 _c4dbgp(
"mapflow[QMRK]: end!");
5520 m_evt_handler->set_key_scalar_plain_empty();
5521 m_evt_handler->set_val_scalar_plain_empty();
5522 m_evt_handler->end_map();
5523 _line_progressed(1);
5524 goto mapflow_finish;
5526 else if(first ==
',')
5528 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
5529 m_evt_handler->set_key_scalar_plain_empty();
5530 m_evt_handler->set_val_scalar_plain_empty();
5533 else if(first ==
'&')
5535 csubstr anchor = _scan_anchor();
5536 _c4dbgpf(
"mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5537 m_evt_handler->set_key_anchor(anchor);
5539 else if(first ==
'*')
5541 csubstr ref = _scan_ref_map();
5542 _c4dbgpf(
"mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5543 m_evt_handler->set_key_ref(ref);
5546 else if(first ==
'[')
5551 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
5553 m_evt_handler->begin_seq_key_flow();
5555 _set_indentation(m_evt_handler->m_parent->indref);
5556 _line_progressed(1);
5557 goto mapflow_finish;
5559 else if(first ==
'{')
5564 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
5566 m_evt_handler->begin_map_key_flow();
5567 _set_indentation(m_evt_handler->m_parent->indref);
5569 _line_progressed(1);
5572 else if(first ==
'!')
5574 csubstr tag = _scan_tag();
5575 _c4dbgpf(
"mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5577 m_evt_handler->set_key_tag(tag);
5581 _c4err(
"parse error");
5586 _c4dbgt(
"mapflow: go again", 0);
5587 if(_finished_line())
5589 if(C4_LIKELY(!_finished_file()))
5597 _c4err(
"missing terminating }");
5603 _c4dbgp(
"mapflow: finish");
5609 template<
class EventHandler>
5610 void ParseEngine<EventHandler>::_handle_seq_block()
5613 _c4dbgpf(
"handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5615 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5616 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
5617 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5618 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)));
5620 _maybe_skip_comment();
5621 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5627 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5628 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5629 if(m_evt_handler->m_curr->at_line_beginning())
5631 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5632 if(m_evt_handler->m_curr->indentation_ge())
5634 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5635 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5636 rem = m_evt_handler->m_curr->line_contents.rem;
5640 else if(m_evt_handler->m_curr->indentation_lt())
5642 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
5643 _handle_indentation_pop_from_block_seq();
5644 goto seqblck_finish;
5646 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5648 _c4dbgp(
"seqblck[RVAL]: empty line!");
5649 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5653 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5661 if(rem.str[0] ==
' ')
5663 if(_handle_indentation_from_annotations())
5665 _c4dbgp(
"seqblck[RVAL]: annotations!");
5666 rem = m_evt_handler->m_curr->line_contents.rem;
5673 _RYML_CB_ASSERT(callbacks(), rem.len);
5674 _c4dbgpf(
"seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5675 const char first = rem.str[0];
5676 const size_t startline = m_evt_handler->m_curr->pos.line;
5679 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5683 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
5684 sc = _scan_scalar_squot();
5685 if(!_maybe_scan_following_colon())
5687 _c4dbgp(
"seqblck[RVAL]: set as val");
5688 _handle_annotations_before_blck_val_scalar();
5689 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5690 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5695 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5697 _handle_annotations_before_start_mapblck(startline);
5698 m_evt_handler->begin_map_val_block();
5699 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5700 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5701 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5703 _maybe_skip_whitespace_tokens();
5704 goto seqblck_finish;
5707 else if(first ==
'"')
5709 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
5710 sc = _scan_scalar_dquot();
5711 if(!_maybe_scan_following_colon())
5713 _c4dbgp(
"seqblck[RVAL]: set as val");
5714 _handle_annotations_before_blck_val_scalar();
5715 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5716 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5721 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5723 _handle_annotations_before_start_mapblck(startline);
5724 m_evt_handler->begin_map_val_block();
5725 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5726 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5727 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5729 _maybe_skip_whitespace_tokens();
5730 goto seqblck_finish;
5736 else if(first ==
'|')
5738 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
5740 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5741 _handle_annotations_before_blck_val_scalar();
5742 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5743 m_evt_handler->set_val_scalar_literal(maybe_filtered);
5746 else if(first ==
'>')
5748 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
5750 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5751 _handle_annotations_before_blck_val_scalar();
5752 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5753 m_evt_handler->set_val_scalar_folded(maybe_filtered);
5756 else if(_scan_scalar_plain_seq_blck(&sc))
5758 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
5759 if(!_maybe_scan_following_colon())
5761 _c4dbgp(
"seqblck[RVAL]: set as val");
5762 _handle_annotations_before_blck_val_scalar();
5763 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5764 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5769 if(startindent > m_evt_handler->m_curr->indref)
5771 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5773 _handle_annotations_before_start_mapblck(startline);
5774 m_evt_handler->begin_map_val_block();
5775 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5776 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5777 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5779 _maybe_skip_whitespace_tokens();
5780 goto seqblck_finish;
5782 else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(
RMAP|
BLCK, m_evt_handler->m_parent))
5784 _c4dbgp(
"seqblck[RVAL]: empty val + end indentless seq + set key");
5785 m_evt_handler->set_val_scalar_plain_empty();
5786 m_evt_handler->end_seq();
5787 m_evt_handler->add_sibling();
5788 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5789 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5791 _maybe_skip_whitespace_tokens();
5792 goto seqblck_finish;
5796 _c4err(
"parse error");
5800 else if(first ==
'[')
5802 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
5804 m_evt_handler->begin_seq_val_flow();
5806 _line_progressed(1);
5807 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5808 goto seqblck_finish;
5810 else if(first ==
'{')
5812 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
5814 _handle_annotations_before_blck_val_scalar();
5815 m_evt_handler->begin_map_val_flow();
5817 _line_progressed(1);
5818 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5819 goto seqblck_finish;
5821 else if(first ==
'-')
5823 if(startindent == m_evt_handler->m_curr->indref)
5825 _c4dbgp(
"seqblck[RVAL]: prev val was empty");
5826 _handle_annotations_before_blck_val_scalar();
5827 m_evt_handler->set_val_scalar_plain_empty();
5829 m_evt_handler->add_sibling();
5833 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
5834 _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5836 _handle_annotations_before_blck_val_scalar();
5837 m_evt_handler->begin_seq_val_block();
5839 _save_indentation();
5842 _line_progressed(1);
5843 _maybe_skip_whitespace_tokens();
5845 else if(first ==
':')
5847 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
5849 _handle_annotations_before_start_mapblck(startline);
5850 m_evt_handler->begin_map_val_block();
5851 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5852 m_evt_handler->set_key_scalar_plain_empty();
5854 _line_progressed(1);
5855 _maybe_skip_whitespace_tokens();
5856 goto seqblck_finish;
5858 else if(first ==
'&')
5860 const csubstr anchor = _scan_anchor();
5861 _c4dbgpf(
"seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5864 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5866 else if(first ==
'*')
5868 csubstr ref = _scan_ref_seq();
5869 _c4dbgpf(
"seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5870 if(!_maybe_scan_following_colon())
5872 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
5873 _handle_annotations_before_blck_val_scalar();
5874 m_evt_handler->set_val_ref(ref);
5879 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
5881 _handle_annotations_before_start_mapblck(startline);
5882 m_evt_handler->begin_map_val_block();
5883 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5884 m_evt_handler->set_key_ref(ref);
5886 _set_indentation(startindent);
5887 _maybe_skip_whitespace_tokens();
5888 goto seqblck_finish;
5891 else if(first ==
'!')
5893 csubstr tag = _scan_tag();
5894 _c4dbgpf(
"seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5897 _add_annotation(&m_pending_tags, tag, startindent, startline);
5899 else if(first ==
'?')
5901 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
5903 m_was_inside_qmrk =
true;
5904 m_evt_handler->begin_map_val_block();
5906 _save_indentation();
5907 _line_progressed(1);
5908 _maybe_skip_whitespace_tokens();
5909 goto seqblck_finish;
5913 _c4err(
"parse error");
5918 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5919 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5923 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5924 if(C4_LIKELY(_at_line_begin()))
5926 _c4dbgp(
"seqblck[RNXT]: at line begin");
5927 if(m_evt_handler->m_curr->indentation_ge())
5929 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5930 _line_progressed(m_evt_handler->m_curr->indref);
5931 _maybe_skip_whitespace_tokens();
5932 rem = m_evt_handler->m_curr->line_contents.rem;
5936 else if(m_evt_handler->m_curr->indentation_lt())
5938 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
5939 _handle_indentation_pop_from_block_seq();
5942 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
5943 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5944 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5945 rem = m_evt_handler->m_curr->line_contents.rem;
5951 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
5952 goto seqblck_finish;
5955 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5957 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5958 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5959 rem = m_evt_handler->m_curr->line_contents.rem;
5966 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
5967 if(!rem.begins_with_any(
" \t"))
5969 _c4err(
"parse error");
5974 rem = m_evt_handler->m_curr->line_contents.rem;
5977 _c4dbgp(
"seqblck[RNXT]: again");
5985 const char first = rem.str[0];
5986 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5989 if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5991 _c4dbgp(
"seqblck[RNXT]: expect next val");
5993 m_evt_handler->add_sibling();
5994 _line_progressed(1);
5995 _maybe_skip_whitespace_tokens();
5999 _c4dbgp(
"seqblck[RNXT]: start doc");
6000 _start_doc_suddenly();
6001 _line_progressed(3);
6002 _maybe_skip_whitespace_tokens();
6003 goto seqblck_finish;
6006 else if(first ==
':')
6012 auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6013 if(C4_LIKELY(prev_state && (prev_state->flags &
RMAP)))
6015 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6016 m_evt_handler->end_seq();
6017 goto seqblck_finish;
6021 _c4err(
"parse error");
6024 else if(first ==
'.')
6026 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6027 csubstr rs = rem.sub(1);
6028 if(rs ==
".." || rs.begins_with(
".. "))
6030 _c4dbgp(
"seqblck[RNXT]: end+start doc");
6031 _end_doc_suddenly();
6032 _line_progressed(3);
6033 _maybe_skip_whitespace_tokens();
6034 goto seqblck_finish;
6038 _c4err(
"parse error");
6047 for(
auto const& s : m_evt_handler->m_stack)
6049 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6052 if(m_evt_handler->m_parent && has_all(
RMAP|
BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6054 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6055 _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6056 _handle_indentation_pop(m_evt_handler->m_parent);
6057 _RYML_CB_ASSERT(this->callbacks(), has_all(
RMAP|
BLCK));
6058 m_evt_handler->add_sibling();
6060 goto seqblck_finish;
6064 _c4err(
"parse error");
6070 _c4dbgt(
"seqblck: go again", 0);
6071 if(_finished_line())
6075 if(_finished_file())
6077 _c4dbgp(
"seqblck: finish!");
6079 goto seqblck_finish;
6086 _c4dbgp(
"seqblck: finish");
6092 template<
class EventHandler>
6093 void ParseEngine<EventHandler>::_handle_map_block()
6096 _c4dbgpf(
"handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6099 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
6100 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
6102 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
6104 _maybe_skip_comment();
6105 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6111 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6112 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6113 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6114 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6118 if(m_evt_handler->m_curr->at_line_beginning())
6120 if(m_evt_handler->m_curr->indentation_eq())
6122 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6123 _line_progressed(m_evt_handler->m_curr->indref);
6124 rem = m_evt_handler->m_curr->line_contents.rem;
6128 else if(m_evt_handler->m_curr->indentation_lt())
6130 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6131 _handle_indentation_pop_from_block_map();
6132 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6135 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6136 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY));
6137 rem = m_evt_handler->m_curr->line_contents.rem;
6143 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6144 goto mapblck_finish;
6149 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6150 _c4err(
"invalid indentation");
6156 const char first = rem.str[0];
6157 const size_t startline = m_evt_handler->m_curr->pos.line;
6158 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6159 _c4dbgpf(
"mapblck[RKEY]: '{}'", first);
6163 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6164 sc = _scan_scalar_squot();
6165 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6166 _handle_annotations_before_blck_key_scalar();
6167 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6169 if(!_maybe_scan_following_colon())
6170 _c4err(
"could not find ':' colon after key");
6171 _maybe_skip_whitespace_tokens();
6173 else if(first ==
'"')
6175 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6176 sc = _scan_scalar_dquot();
6177 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6178 _handle_annotations_before_blck_key_scalar();
6179 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6181 if(!_maybe_scan_following_colon())
6182 _c4err(
"could not find ':' colon after key");
6183 _maybe_skip_whitespace_tokens();
6187 else if(C4_UNLIKELY(first ==
'|'))
6189 _c4err(
"block literal keys must be enclosed in '?'");
6191 else if(C4_UNLIKELY(first ==
'>'))
6193 _c4err(
"block literal keys must be enclosed in '?'");
6195 else if(_scan_scalar_plain_map_blck(&sc))
6197 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6198 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6199 _handle_annotations_before_blck_key_scalar();
6200 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6202 if(!_maybe_scan_following_colon())
6203 _c4err(
"could not find ':' colon after key");
6204 _maybe_skip_whitespace_tokens();
6206 else if(first ==
'?')
6208 _c4dbgp(
"mapblck[RKEY]: key token!");
6210 _line_progressed(1);
6211 _maybe_skip_whitespace_tokens();
6212 m_was_inside_qmrk =
true;
6215 else if(first ==
':')
6217 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6218 _handle_annotations_before_blck_key_scalar();
6219 m_evt_handler->set_key_scalar_plain_empty();
6221 _line_progressed(1);
6222 _maybe_skip_whitespace_tokens();
6224 else if(first ==
'*')
6226 csubstr ref = _scan_ref_map();
6227 _c4dbgpf(
"mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6228 _handle_annotations_before_blck_key_scalar();
6229 m_evt_handler->set_key_ref(ref);
6231 if(!_maybe_scan_following_colon())
6232 _c4err(
"could not find ':' colon after key");
6233 _maybe_skip_whitespace_tokens();
6235 else if(first ==
'&')
6237 csubstr anchor = _scan_anchor();
6238 _c4dbgpf(
"mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6239 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6241 else if(first ==
'!')
6243 csubstr tag = _scan_tag();
6244 _c4dbgpf(
"mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6245 _add_annotation(&m_pending_tags, tag, startindent, startline);
6247 else if(first ==
'[')
6252 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6254 _handle_annotations_before_blck_key_scalar();
6255 m_evt_handler->begin_seq_key_flow();
6257 _line_progressed(1);
6258 _set_indentation(startindent);
6259 goto mapblck_finish;
6261 else if(first ==
'{')
6266 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6268 _handle_annotations_before_blck_key_scalar();
6269 m_evt_handler->begin_map_key_flow();
6271 _line_progressed(1);
6272 _set_indentation(startindent);
6273 goto mapblck_finish;
6275 else if(first ==
'-')
6277 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6278 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6280 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6281 _start_doc_suddenly();
6282 _line_progressed(3);
6283 _maybe_skip_whitespace_tokens();
6284 goto mapblck_finish;
6288 _c4err(
"parse error");
6291 else if(first ==
'.')
6293 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6294 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6296 _c4dbgp(
"mapblck[RKEY]: end doc");
6297 _end_doc_suddenly();
6298 _line_progressed(3);
6299 _maybe_skip_whitespace_tokens();
6300 goto mapblck_finish;
6304 _c4err(
"parse error");
6308 else if(first ==
'\t')
6310 _c4dbgp(
"mapblck[RKEY]: skip tabs");
6311 _maybe_skipchars(
'\t');
6315 _c4err(
"parse error");
6318 else if(has_any(
RKCL))
6320 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6321 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6322 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6323 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6327 if(m_evt_handler->m_curr->at_line_beginning())
6329 if(m_evt_handler->m_curr->indentation_eq())
6331 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6332 _line_progressed(m_evt_handler->m_curr->indref);
6333 rem = m_evt_handler->m_curr->line_contents.rem;
6337 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6339 _c4err(
"invalid indentation");
6342 const char first = rem.str[0];
6343 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
6346 _c4dbgp(
"mapblck[RKCL]: found the colon");
6348 _line_progressed(1);
6349 _maybe_skip_whitespace_tokens();
6351 else if(first ==
'?')
6353 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
6354 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6355 m_evt_handler->set_val_scalar_plain_empty();
6356 m_evt_handler->add_sibling();
6358 _line_progressed(1);
6359 _maybe_skip_whitespace_tokens();
6361 else if(first ==
'-')
6363 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6365 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6366 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6367 _start_doc_suddenly();
6368 _line_progressed(3);
6369 _maybe_skip_whitespace_tokens();
6370 goto mapblck_finish;
6374 _c4err(
"parse error");
6377 else if(first ==
'.')
6379 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
6380 csubstr rs = rem.sub(1);
6381 if(rs ==
".." || rs.begins_with(
".. "))
6383 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6384 _end_doc_suddenly();
6385 _line_progressed(3);
6386 goto mapblck_finish;
6390 _c4err(
"parse error");
6393 else if(m_was_inside_qmrk)
6395 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6396 _c4dbgp(
"mapblck[RKCL]: missing :");
6397 m_evt_handler->set_val_scalar_plain_empty();
6398 m_evt_handler->add_sibling();
6399 m_was_inside_qmrk =
false;
6404 _c4err(
"parse error");
6407 else if(has_any(
RVAL))
6409 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6410 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6411 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6412 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6416 if(m_evt_handler->m_curr->at_line_beginning())
6418 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6419 m_evt_handler->m_curr->more_indented =
false;
6420 if(m_evt_handler->m_curr->indref ==
npos)
6422 _c4dbgpf(
"mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6423 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6424 _line_progressed(m_evt_handler->m_curr->indref);
6425 rem = m_evt_handler->m_curr->line_contents.rem;
6429 else if(m_evt_handler->m_curr->indentation_eq())
6431 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6432 _line_progressed(m_evt_handler->m_curr->indref);
6433 rem = m_evt_handler->m_curr->line_contents.rem;
6461 else if(m_evt_handler->m_curr->indentation_gt())
6463 _c4dbgp(
"mapblck[RVAL]: more indented!");
6464 m_evt_handler->m_curr->more_indented =
true;
6465 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6466 rem = m_evt_handler->m_curr->line_contents.rem;
6470 else if(m_evt_handler->m_curr->indentation_lt())
6472 _c4dbgp(
"mapblck[RVAL]: smaller indentation!");
6473 _handle_indentation_pop_from_block_map();
6476 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6477 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6480 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6481 m_evt_handler->add_sibling();
6488 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6489 goto mapblck_finish;
6492 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6494 _c4dbgp(
"mapblck[RVAL]: empty line!");
6495 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6502 const char first = rem.str[0];
6503 const size_t startline = m_evt_handler->m_curr->pos.line;
6504 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6505 _c4dbgpf(
"mapblck[RVAL]: '{}'", first);
6509 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6510 sc = _scan_scalar_squot();
6511 if(!_maybe_scan_following_colon())
6513 _c4dbgp(
"mapblck[RVAL]: set as val");
6514 _handle_annotations_before_blck_val_scalar();
6515 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6516 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6521 if(startindent != m_evt_handler->m_curr->indref)
6523 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6524 _handle_annotations_before_start_mapblck(startline);
6526 m_evt_handler->begin_map_val_block();
6527 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6528 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6529 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6530 _maybe_skip_whitespace_tokens();
6531 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6537 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6538 m_evt_handler->set_val_scalar_plain_empty();
6539 m_evt_handler->add_sibling();
6540 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6541 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6543 _maybe_skip_whitespace_tokens();
6547 else if(first ==
'"')
6549 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6550 sc = _scan_scalar_dquot();
6551 if(!_maybe_scan_following_colon())
6553 _c4dbgp(
"mapblck[RVAL]: set as val");
6554 _handle_annotations_before_blck_val_scalar();
6555 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6556 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6561 if(startindent != m_evt_handler->m_curr->indref)
6563 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6564 _handle_annotations_before_start_mapblck(startline);
6566 m_evt_handler->begin_map_val_block();
6567 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6568 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6569 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6570 _maybe_skip_whitespace_tokens();
6571 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6577 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6578 m_evt_handler->set_val_scalar_plain_empty();
6579 m_evt_handler->add_sibling();
6580 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6581 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6583 _maybe_skip_whitespace_tokens();
6589 else if(first ==
'|')
6591 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6593 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6594 _handle_annotations_before_blck_val_scalar();
6595 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6596 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6599 else if(first ==
'>')
6601 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6603 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6604 _handle_annotations_before_blck_val_scalar();
6605 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6606 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6609 else if(_scan_scalar_plain_map_blck(&sc))
6611 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
6612 if(!_maybe_scan_following_colon())
6614 _c4dbgp(
"mapblck[RVAL]: set as val");
6615 _handle_annotations_before_blck_val_scalar();
6616 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6617 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6622 if(startindent != m_evt_handler->m_curr->indref)
6624 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6626 _handle_annotations_before_start_mapblck(startline);
6627 m_evt_handler->begin_map_val_block();
6628 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6629 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6630 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6631 _maybe_skip_whitespace_tokens();
6632 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6638 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6639 _handle_annotations_before_blck_val_scalar();
6640 m_evt_handler->set_val_scalar_plain_empty();
6641 m_evt_handler->add_sibling();
6642 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6643 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6645 _maybe_skip_whitespace_tokens();
6649 else if(first ==
'-')
6653 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
6655 _handle_annotations_before_blck_val_scalar();
6656 m_evt_handler->begin_seq_val_block();
6658 _set_indentation(startindent);
6659 _line_progressed(1);
6660 _maybe_skip_whitespace_tokens();
6661 goto mapblck_finish;
6663 else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6665 _c4dbgp(
"mapblck[RVAL]: end+start doc");
6666 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6667 _start_doc_suddenly();
6668 _line_progressed(3);
6669 _maybe_skip_whitespace_tokens();
6670 goto mapblck_finish;
6674 _c4err(
"parse error");
6677 else if(first ==
'[')
6679 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
6681 _handle_annotations_before_blck_val_scalar();
6682 m_evt_handler->begin_seq_val_flow();
6684 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6685 _line_progressed(1);
6686 goto mapblck_finish;
6688 else if(first ==
'{')
6690 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
6692 _handle_annotations_before_blck_val_scalar();
6693 m_evt_handler->begin_map_val_flow();
6695 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6696 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6697 _line_progressed(1);
6698 goto mapblck_finish;
6700 else if(first ==
'*')
6702 csubstr ref = _scan_ref_map();
6703 _c4dbgpf(
"mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6704 if(startindent == m_evt_handler->m_curr->indref)
6706 _c4dbgpf(
"mapblck[RVAL]: same indentation {}", startindent);
6707 m_evt_handler->set_val_ref(ref);
6712 _c4dbgpf(
"mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6713 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6714 if(_maybe_scan_following_colon())
6716 _c4dbgp(
"mapblck[RVAL]: start child map, block");
6718 _handle_annotations_before_blck_val_scalar();
6719 m_evt_handler->begin_map_val_block();
6720 m_evt_handler->set_key_ref(ref);
6721 _set_indentation(startindent);
6727 _c4dbgp(
"mapblck[RVAL]: was val ref");
6728 _handle_annotations_before_blck_val_scalar();
6729 m_evt_handler->set_val_ref(ref);
6733 _maybe_skip_whitespace_tokens();
6735 else if(first ==
'&')
6737 csubstr anchor = _scan_anchor();
6738 _c4dbgpf(
"mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6739 if(startindent == m_evt_handler->m_curr->indref)
6741 _c4dbgp(
"mapblck[RVAL]: anchor for next key. val is missing!");
6742 m_evt_handler->set_val_scalar_plain_empty();
6743 m_evt_handler->add_sibling();
6748 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6750 else if(first ==
'!')
6752 csubstr tag = _scan_tag();
6753 _c4dbgpf(
"mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6754 if(startindent == m_evt_handler->m_curr->indref)
6756 _c4dbgp(
"mapblck[RVAL]: tag for next key. val is missing!");
6757 _handle_annotations_before_blck_val_scalar();
6758 m_evt_handler->set_val_scalar_plain_empty();
6759 m_evt_handler->add_sibling();
6764 _add_annotation(&m_pending_tags, tag, startindent, startline);
6766 else if(first ==
'?')
6768 if(startindent == m_evt_handler->m_curr->indref)
6770 _c4dbgp(
"mapblck[RVAL]: got '?'. val was empty");
6771 _handle_annotations_before_blck_val_scalar();
6772 m_evt_handler->set_val_scalar_plain_empty();
6773 m_evt_handler->add_sibling();
6776 else if(startindent > m_evt_handler->m_curr->indref)
6778 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6780 _handle_annotations_before_blck_val_scalar();
6781 m_evt_handler->begin_map_val_block();
6783 _set_indentation(startindent);
6787 _c4err(
"parse error");
6789 m_was_inside_qmrk =
true;
6790 _line_progressed(1);
6791 _maybe_skip_whitespace_tokens();
6794 else if(first ==
':')
6796 if(startindent == m_evt_handler->m_curr->indref)
6798 _c4dbgp(
"mapblck[RVAL]: got ':'. val was empty, next key as well");
6799 m_evt_handler->set_val_scalar_plain_empty();
6800 m_evt_handler->add_sibling();
6801 m_evt_handler->set_key_scalar_plain_empty();
6802 _line_progressed(1);
6803 _maybe_skip_whitespace_tokens();
6808 _c4err(
"parse error");
6811 else if(first ==
'.')
6813 _c4dbgp(
"mapblck[RVAL]: maybe doc?");
6814 csubstr rs = rem.sub(1);
6815 if(rs ==
".." || rs.begins_with(
".. "))
6817 _c4dbgp(
"seqblck[RVAL]: end doc expl");
6818 _end_doc_suddenly();
6819 _line_progressed(3);
6820 _maybe_skip_whitespace_tokens();
6821 goto mapblck_finish;
6825 _c4err(
"parse error");
6829 else if(first ==
'\t')
6831 _c4dbgp(
"mapblck[RVAL]: skip tabs");
6832 _maybe_skipchars(
'\t');
6836 _c4err(
"parse error");
6839 else if(has_any(
RNXT))
6841 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6842 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6843 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6844 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6848 if(m_evt_handler->m_curr->at_line_beginning())
6850 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6851 if(m_evt_handler->m_curr->indentation_eq())
6853 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6854 _line_progressed(m_evt_handler->m_curr->indref);
6855 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6856 m_evt_handler->add_sibling();
6860 else if(m_evt_handler->m_curr->indentation_lt())
6862 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
6863 _handle_indentation_pop_from_block_map();
6866 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6869 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6870 m_evt_handler->add_sibling();
6877 goto mapblck_finish;
6883 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
6884 if(!rem.begins_with_any(
" \t"))
6886 _c4err(
"parse error");
6891 rem = m_evt_handler->m_curr->line_contents.rem;
6894 _c4dbgp(
"seqblck[RNXT]: again");
6902 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6903 const char first = rem.str[0];
6904 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
6907 if(m_evt_handler->m_curr->more_indented)
6909 _c4dbgp(
"mapblck[RNXT]: start child block map");
6910 C4_NOT_IMPLEMENTED();
6912 _line_progressed(1);
6913 _set_indentation(m_evt_handler->m_curr->scalar_col);
6914 m_evt_handler->m_curr->more_indented =
false;
6919 _c4err(
"parse error");
6922 else if(first ==
' ')
6924 _c4dbgp(
"mapblck[RNXT]: skip spaces");
6925 _maybe_skip_whitespace_tokens();
6929 _c4err(
"parse error");
6932 else if(has_any(
QMRK))
6934 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6935 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6936 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6937 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6941 if(m_evt_handler->m_curr->at_line_beginning())
6943 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos);
6944 if(m_evt_handler->m_curr->indentation_eq())
6946 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6947 _line_progressed(m_evt_handler->m_curr->indref);
6948 rem = m_evt_handler->m_curr->line_contents.rem;
6952 else if(m_evt_handler->m_curr->indentation_lt())
6954 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
6955 _handle_indentation_pop_from_block_map();
6956 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6959 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
6960 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
6961 rem = m_evt_handler->m_curr->line_contents.rem;
6967 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
6968 goto mapblck_finish;
6974 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
6975 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6976 rem = m_evt_handler->m_curr->line_contents.rem;
6984 const char first = rem.str[0];
6985 const size_t startline = m_evt_handler->m_curr->pos.line;
6986 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6987 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
6991 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
6992 sc = _scan_scalar_squot();
6993 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6994 if(!_maybe_scan_following_colon())
6996 _c4dbgp(
"mapblck[QMRK]: set as key");
6997 _handle_annotations_before_blck_key_scalar();
6998 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7003 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7005 _handle_annotations_before_start_mapblck_as_key();
7006 m_evt_handler->begin_map_key_block();
7007 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7008 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7009 _maybe_skip_whitespace_tokens();
7010 _set_indentation(startindent);
7015 else if(first ==
'"')
7017 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7018 sc = _scan_scalar_dquot();
7019 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7020 if(!_maybe_scan_following_colon())
7022 _c4dbgp(
"mapblck[QMRK]: set as key");
7023 _handle_annotations_before_blck_key_scalar();
7024 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7029 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7031 _handle_annotations_before_start_mapblck_as_key();
7032 m_evt_handler->begin_map_key_block();
7033 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7034 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7035 _maybe_skip_whitespace_tokens();
7036 _set_indentation(startindent);
7041 else if(first ==
'|')
7043 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7045 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7046 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7047 _handle_annotations_before_blck_key_scalar();
7048 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7051 else if(first ==
'>')
7053 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7055 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7056 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7057 _handle_annotations_before_blck_key_scalar();
7058 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7061 else if(_scan_scalar_plain_map_blck(&sc))
7063 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7064 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7065 if(!_maybe_scan_following_colon())
7067 _c4dbgp(
"mapblck[QMRK]: set as key");
7068 _handle_annotations_before_blck_key_scalar();
7069 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7074 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7076 _handle_annotations_before_start_mapblck_as_key();
7077 m_evt_handler->begin_map_key_block();
7078 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7079 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7080 _maybe_skip_whitespace_tokens();
7081 _set_indentation(startindent);
7086 else if(first ==
':')
7088 if(startindent == m_evt_handler->m_curr->indref)
7090 _c4dbgp(
"mapblck[QMRK]: empty key");
7092 _handle_annotations_before_blck_key_scalar();
7093 m_evt_handler->set_key_scalar_plain_empty();
7094 _line_progressed(1);
7095 _maybe_skip_whitespace_tokens();
7099 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7101 _handle_annotations_before_start_mapblck_as_key();
7102 m_evt_handler->begin_map_key_block();
7103 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7104 m_evt_handler->set_key_scalar_plain_empty();
7105 _line_progressed(1);
7106 _maybe_skip_whitespace_tokens();
7107 _set_indentation(startindent);
7112 else if(first ==
'*')
7114 csubstr ref = _scan_ref_map();
7115 _c4dbgpf(
"mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7116 if(!_maybe_scan_following_colon())
7118 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7119 _handle_annotations_before_blck_key_scalar();
7120 m_evt_handler->set_key_ref(ref);
7125 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7127 _handle_annotations_before_blck_key_scalar();
7128 m_evt_handler->begin_map_key_block();
7129 m_evt_handler->set_key_ref(ref);
7130 _set_indentation(startindent);
7134 _maybe_skip_whitespace_tokens();
7136 else if(first ==
'&')
7138 csubstr anchor = _scan_anchor();
7139 _c4dbgpf(
"mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7140 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7142 else if(first ==
'!')
7144 csubstr tag = _scan_tag();
7145 _c4dbgpf(
"mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7146 _add_annotation(&m_pending_tags, tag, startindent, startline);
7148 else if(first ==
'-')
7150 _c4dbgp(
"mapblck[QMRK]: maybe doc?");
7151 csubstr rs = rem.sub(1);
7152 if(rs ==
"--" || rs.begins_with(
"-- "))
7154 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7155 _start_doc_suddenly();
7156 _line_progressed(3);
7160 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7162 m_evt_handler->begin_seq_key_block();
7164 _set_indentation(startindent);
7165 _line_progressed(1);
7167 _maybe_skip_whitespace_tokens();
7168 goto mapblck_finish;
7170 else if(first ==
'[')
7172 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7174 m_evt_handler->begin_seq_key_flow();
7176 _set_indentation(m_evt_handler->m_parent->indref);
7177 _line_progressed(1);
7178 goto mapblck_finish;
7180 else if(first ==
'{')
7182 _c4dbgp(
"mapblck[QMRK]: start child mapblck (!)");
7184 m_evt_handler->begin_map_key_flow();
7186 _set_indentation(m_evt_handler->m_parent->indref);
7187 _line_progressed(1);
7188 goto mapblck_finish;
7190 else if(first ==
'?')
7192 _c4dbgp(
"mapblck[QMRK]: another QMRK '?'");
7193 m_evt_handler->set_key_scalar_plain_empty();
7194 m_evt_handler->set_val_scalar_plain_empty();
7195 m_evt_handler->add_sibling();
7196 _line_progressed(1);
7198 else if(first ==
'.')
7200 _c4dbgp(
"mapblck[QMRK]: maybe end doc?");
7201 csubstr rs = rem.sub(1);
7202 if(rs ==
".." || rs.begins_with(
".. "))
7204 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7205 _end_doc_suddenly();
7206 _line_progressed(3);
7207 goto mapblck_finish;
7211 _c4err(
"parse error");
7216 _c4err(
"parse error");
7221 _c4dbgt(
"mapblck: again", 0);
7222 if(_finished_line())
7226 if(_finished_file())
7228 _c4dbgp(
"mapblck: file finished!");
7230 goto mapblck_finish;
7237 _c4dbgp(
"mapblck: finish");
7243 template<
class EventHandler>
7244 void ParseEngine<EventHandler>::_handle_unk_json()
7246 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7248 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7249 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7251 _maybe_skip_comment();
7252 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7256 size_t pos = rem.first_not_of(
" \t");
7259 pos = pos !=
npos ? pos : rem.len;
7260 _c4dbgpf(
"skipping indentation of {}", pos);
7261 _line_progressed(pos);
7262 rem = m_evt_handler->m_curr->line_contents.rem;
7265 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7268 if(rem.begins_with(
'['))
7270 _c4dbgp(
"it's a seq");
7271 m_evt_handler->check_trailing_doc_token();
7273 m_evt_handler->begin_seq_val_flow();
7275 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7276 m_doc_empty =
false;
7277 _line_progressed(1);
7279 else if(rem.begins_with(
'{'))
7281 _c4dbgp(
"it's a map");
7282 m_evt_handler->check_trailing_doc_token();
7284 m_evt_handler->begin_map_val_flow();
7286 m_doc_empty =
false;
7287 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7288 _line_progressed(1);
7290 else if(_handle_bom())
7292 _c4dbgp(
"byte order mark");
7296 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7297 _maybe_skip_whitespace_tokens();
7298 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7301 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7302 const char first = s.str[0];
7306 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7307 m_evt_handler->check_trailing_doc_token();
7310 m_doc_empty =
false;
7311 sc = _scan_scalar_dquot();
7312 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7313 if(!_maybe_scan_following_colon())
7315 _c4dbgp(
"runk_json: set as val");
7316 _handle_annotations_before_blck_val_scalar();
7317 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7321 _c4err(
"parse error");
7324 else if(_scan_scalar_plain_unk(&sc))
7326 _c4dbgp(
"runk_json: got a plain scalar");
7327 m_evt_handler->check_trailing_doc_token();
7330 m_doc_empty =
false;
7331 if(!_maybe_scan_following_colon())
7333 _c4dbgp(
"runk_json: set as val");
7334 _handle_annotations_before_blck_val_scalar();
7335 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7336 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7340 _c4err(
"parse error");
7345 _c4err(
"parse error");
7353 template<
class EventHandler>
7354 void ParseEngine<EventHandler>::_handle_unk()
7356 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7358 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7359 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7361 _maybe_skip_comment();
7362 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7366 size_t pos = rem.first_not_of(
" \t");
7369 pos = pos !=
npos ? pos : rem.len;
7370 _c4dbgpf(
"skipping {} whitespace characters", pos);
7371 _line_progressed(pos);
7372 rem = m_evt_handler->m_curr->line_contents.rem;
7375 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7378 if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7380 _c4dbgp(
"rtop: zero indent + at line begin");
7383 _c4dbgp(
"byte order mark!");
7384 rem = m_evt_handler->m_curr->line_contents.rem;
7388 const char first = rem.str[0];
7391 _c4dbgp(
"rtop: suspecting doc");
7392 if(_is_doc_begin_token(rem))
7394 _c4dbgp(
"rtop: begin doc");
7397 _set_indentation(0);
7399 _line_progressed(3u);
7400 _maybe_skip_whitespace_tokens();
7404 else if(first ==
'.')
7406 _c4dbgp(
"rtop: suspecting doc end");
7407 if(_is_doc_end_token(rem))
7409 _c4dbgp(
"rtop: end doc");
7416 _c4dbgp(
"rtop: ignore end doc");
7419 _line_progressed(3u);
7420 _maybe_skip_whitespace_tokens();
7424 else if(first ==
'%')
7426 _c4dbgpf(
"directive: {}", rem);
7427 if(C4_UNLIKELY(!m_doc_empty && has_none(
NDOC)))
7428 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"need document footer before directives");
7429 _handle_directive(rem);
7435 char first = rem.str[0];
7439 m_evt_handler->check_trailing_doc_token();
7441 m_doc_empty =
false;
7442 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7443 if(C4_LIKELY( ! _annotations_require_key_container()))
7445 _c4dbgp(
"it's a seq, flow");
7446 _handle_annotations_before_blck_val_scalar();
7447 m_evt_handler->begin_seq_val_flow();
7449 _set_indentation(startindent);
7453 _c4dbgp(
"start new block map, set flow seq as key (!)");
7454 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7455 m_evt_handler->begin_map_val_block();
7457 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7458 m_evt_handler->begin_seq_key_flow();
7460 _set_indentation(startindent);
7462 _line_progressed(1);
7464 else if(first ==
'{')
7466 m_evt_handler->check_trailing_doc_token();
7468 m_doc_empty =
false;
7469 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7470 if(C4_LIKELY( ! _annotations_require_key_container()))
7472 _c4dbgp(
"it's a map, flow");
7473 _handle_annotations_before_blck_val_scalar();
7474 m_evt_handler->begin_map_val_flow();
7476 _set_indentation(startindent);
7480 _c4dbgp(
"start new block map, set flow map as key (!)");
7481 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7482 m_evt_handler->begin_map_val_block();
7484 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7485 m_evt_handler->begin_map_key_flow();
7487 _set_indentation(startindent);
7489 _line_progressed(1);
7491 else if(first ==
'-' && _is_blck_token(rem))
7493 _c4dbgp(
"it's a seq, block");
7494 m_evt_handler->check_trailing_doc_token();
7496 _handle_annotations_before_blck_val_scalar();
7497 m_evt_handler->begin_seq_val_block();
7499 m_doc_empty =
false;
7500 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7501 _line_progressed(1);
7502 _maybe_skip_whitespace_tokens();
7504 else if(first ==
'?' && _is_blck_token(rem))
7506 _c4dbgp(
"it's a map + this key is complex");
7507 m_evt_handler->check_trailing_doc_token();
7509 _handle_annotations_before_blck_val_scalar();
7510 m_evt_handler->begin_map_val_block();
7512 m_doc_empty =
false;
7513 m_was_inside_qmrk =
true;
7514 _save_indentation();
7515 _line_progressed(1);
7516 _maybe_skip_whitespace_tokens();
7518 else if(first ==
':' && _is_blck_token(rem))
7522 _c4dbgp(
"it's a map with an empty key");
7523 m_evt_handler->check_trailing_doc_token();
7525 _handle_annotations_before_blck_val_scalar();
7526 m_evt_handler->begin_map_val_block();
7527 m_evt_handler->set_key_scalar_plain_empty();
7528 m_doc_empty =
false;
7529 _save_indentation();
7533 _c4dbgp(
"actually prev val is a key!");
7534 size_t prev_indentation = m_evt_handler->m_curr->indref;
7535 m_evt_handler->actually_val_is_first_key_of_new_map_block();
7536 _set_indentation(prev_indentation);
7539 _line_progressed(1);
7540 _maybe_skip_whitespace_tokens();
7542 else if(first ==
'&')
7544 csubstr anchor = _scan_anchor();
7545 _c4dbgpf(
"anchor! [{}]~~~{}~~~", anchor.len, anchor);
7546 m_evt_handler->check_trailing_doc_token();
7548 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7549 const size_t line = m_evt_handler->m_curr->pos.line;
7550 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7551 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7552 m_doc_empty =
false;
7554 else if(first ==
'*')
7556 csubstr ref = _scan_ref_map();
7557 _c4dbgpf(
"ref! [{}]~~~{}~~~", ref.len, ref);
7558 m_evt_handler->check_trailing_doc_token();
7560 m_doc_empty =
false;
7561 if(!_maybe_scan_following_colon())
7563 _c4dbgp(
"runk: set val ref");
7564 _handle_annotations_before_blck_val_scalar();
7565 m_evt_handler->set_val_ref(ref);
7569 _c4dbgp(
"runk: start new block map, set ref as key");
7570 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7571 const size_t startline = m_evt_handler->m_curr->pos.line;
7572 _handle_annotations_before_start_mapblck(startline);
7573 m_evt_handler->begin_map_val_block();
7574 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7575 m_evt_handler->set_key_ref(ref);
7576 _maybe_skip_whitespace_tokens();
7577 _set_indentation(startindent);
7581 else if(first ==
'!')
7583 csubstr tag = _scan_tag();
7584 _c4dbgpf(
"unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7587 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7588 const size_t line = m_evt_handler->m_curr->pos.line;
7589 _add_annotation(&m_pending_tags, tag, indentation, line);
7593 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7594 _maybe_skip_whitespace_tokens();
7595 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7598 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7599 const size_t startline = m_evt_handler->m_curr->pos.line;
7604 _c4dbgp(
"runk: scanning single-quoted scalar");
7605 m_evt_handler->check_trailing_doc_token();
7608 m_doc_empty =
false;
7609 sc = _scan_scalar_squot();
7610 if(!_maybe_scan_following_colon())
7612 _c4dbgp(
"runk: set as val");
7613 _handle_annotations_before_blck_val_scalar();
7614 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7615 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7619 _c4dbgp(
"runk: start new block map, set scalar as key");
7620 _handle_annotations_before_start_mapblck(startline);
7621 m_evt_handler->begin_map_val_block();
7622 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7623 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7624 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7625 _maybe_skip_whitespace_tokens();
7626 _set_indentation(startindent);
7630 else if(first ==
'"')
7632 _c4dbgp(
"runk: scanning double-quoted scalar");
7633 m_evt_handler->check_trailing_doc_token();
7636 m_doc_empty =
false;
7637 sc = _scan_scalar_dquot();
7638 if(!_maybe_scan_following_colon())
7640 _c4dbgp(
"runk: set as val");
7641 _handle_annotations_before_blck_val_scalar();
7642 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7643 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7647 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
7648 _handle_annotations_before_start_mapblck(startline);
7649 m_evt_handler->begin_map_val_block();
7650 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7651 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7652 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7653 _maybe_skip_whitespace_tokens();
7654 _set_indentation(startindent);
7658 else if(first ==
'|')
7660 _c4dbgp(
"runk: scanning block-literal scalar");
7661 m_evt_handler->check_trailing_doc_token();
7664 m_doc_empty =
false;
7666 _scan_block(&sb, startindent);
7667 if(C4_LIKELY(!_maybe_scan_following_colon()))
7669 _c4dbgp(
"runk: set as val");
7670 _handle_annotations_before_blck_val_scalar();
7671 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7672 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7676 _c4err(
"block literal keys must be enclosed in '?'");
7679 else if(first ==
'>')
7681 _c4dbgp(
"runk: scanning block-folded scalar");
7682 m_evt_handler->check_trailing_doc_token();
7685 m_doc_empty =
false;
7687 _scan_block(&sb, startindent);
7688 if(C4_LIKELY(!_maybe_scan_following_colon()))
7690 _c4dbgp(
"runk: set as val");
7691 _handle_annotations_before_blck_val_scalar();
7692 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7693 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7697 _c4err(
"block folded keys must be enclosed in '?'");
7700 else if(_scan_scalar_plain_unk(&sc))
7702 _c4dbgp(
"runk: got a plain scalar");
7703 m_evt_handler->check_trailing_doc_token();
7706 m_doc_empty =
false;
7707 if(!_maybe_scan_following_colon())
7709 _c4dbgp(
"runk: set as val");
7710 _handle_annotations_before_blck_val_scalar();
7711 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7712 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7716 _c4dbgp(
"runk: start new block map, set scalar as key");
7717 _handle_annotations_before_start_mapblck(startline);
7718 m_evt_handler->begin_map_val_block();
7719 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7720 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7721 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7722 _maybe_skip_whitespace_tokens();
7723 _set_indentation(startindent);
7733 template<
class EventHandler>
7734 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
7736 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7738 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK|
FLOW));
7740 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7743 _c4dbgp(
"usty[RNXT]: finishing!");
7748 _maybe_skip_comment();
7749 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7753 size_t pos = rem.first_not_of(
" \t");
7756 pos = pos !=
npos ? pos : rem.len;
7757 _c4dbgpf(
"skipping indentation of {}", pos);
7758 _line_progressed(pos);
7759 rem = m_evt_handler->m_curr->line_contents.rem;
7762 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7765 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7766 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7767 char first = rem.str[0];
7770 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP));
7771 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
7774 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
7776 m_evt_handler->_push();
7778 _set_indentation(startindent);
7779 _line_progressed(1);
7780 _maybe_skip_whitespace_tokens();
7782 else if(first ==
'-' && _is_blck_token(rem))
7784 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
7786 m_evt_handler->_push();
7788 _set_indentation(startindent);
7789 _line_progressed(1);
7790 _maybe_skip_whitespace_tokens();
7794 _c4err(
"can only parse a seq into an existing seq");
7797 else if(has_any(
RMAP))
7799 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7800 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
7803 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
7805 _handle_annotations_before_blck_val_scalar();
7806 m_evt_handler->_push();
7808 _set_indentation(startindent);
7809 _line_progressed(1);
7810 _maybe_skip_whitespace_tokens();
7812 else if(first ==
'?' && _is_blck_token(rem))
7814 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
7816 _handle_annotations_before_blck_val_scalar();
7817 m_evt_handler->_push();
7819 m_was_inside_qmrk =
true;
7820 _save_indentation();
7821 _line_progressed(1);
7822 _maybe_skip_whitespace_tokens();
7824 else if(first ==
':' && _is_blck_token(rem))
7826 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
7828 _handle_annotations_before_blck_val_scalar();
7829 m_evt_handler->_push();
7830 m_evt_handler->set_key_scalar_plain_empty();
7832 _save_indentation();
7833 _line_progressed(1);
7834 _maybe_skip_whitespace_tokens();
7836 else if(rem.begins_with(
'&'))
7838 csubstr anchor = _scan_anchor();
7839 _c4dbgpf(
"usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7840 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7841 const size_t line = m_evt_handler->m_curr->pos.line;
7842 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7843 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7845 else if(first ==
'*')
7847 csubstr ref = _scan_ref_map();
7848 _c4dbgpf(
"usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7849 if(!_maybe_scan_following_colon())
7851 _c4err(
"cannot read a VAL to a map");
7855 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
7856 const size_t startline = m_evt_handler->m_curr->pos.line;
7858 _handle_annotations_before_start_mapblck(startline);
7859 m_evt_handler->_push();
7860 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7861 m_evt_handler->set_key_ref(ref);
7862 _maybe_skip_whitespace_tokens();
7863 _set_indentation(startindent);
7867 else if(first ==
'!')
7869 csubstr tag = _scan_tag();
7870 _c4dbgpf(
"usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7873 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7874 const size_t line = m_evt_handler->m_curr->pos.line;
7875 _add_annotation(&m_pending_tags, tag, indentation, line);
7877 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
7879 _c4err(
"cannot parse a seq into an existing map");
7883 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7884 startindent = m_evt_handler->m_curr->line_contents.indentation;
7885 const size_t startline = m_evt_handler->m_curr->pos.line;
7887 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7890 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
7891 sc = _scan_scalar_squot();
7892 if(!_maybe_scan_following_colon())
7894 _c4err(
"cannot read a VAL to a map");
7898 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7900 _handle_annotations_before_start_mapblck(startline);
7901 m_evt_handler->_push();
7902 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7903 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7904 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7905 _set_indentation(startindent);
7907 _maybe_skip_whitespace_tokens();
7910 else if(first ==
'"')
7912 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
7913 sc = _scan_scalar_dquot();
7914 if(!_maybe_scan_following_colon())
7916 _c4err(
"cannot read a VAL to a map");
7920 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
7922 _handle_annotations_before_start_mapblck(startline);
7923 m_evt_handler->_push();
7924 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7925 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7926 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7927 _set_indentation(startindent);
7929 _maybe_skip_whitespace_tokens();
7932 else if(first ==
'|')
7934 _c4err(
"block literal keys must be enclosed in '?'");
7936 else if(first ==
'>')
7938 _c4err(
"block literal keys must be enclosed in '?'");
7940 else if(_scan_scalar_plain_unk(&sc))
7942 _c4dbgp(
"usty[RMAP]: got a plain scalar");
7943 if(!_maybe_scan_following_colon())
7945 _c4err(
"cannot read a VAL to a map");
7949 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7951 _handle_annotations_before_start_mapblck(startline);
7952 m_evt_handler->_push();
7953 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7954 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7955 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7956 _set_indentation(startindent);
7958 _maybe_skip_whitespace_tokens();
7963 _c4err(
"parse error");
7969 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7970 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
7973 _c4dbgp(
"usty[UNK]: it's a flow seq");
7975 _handle_annotations_before_blck_val_scalar();
7976 m_evt_handler->begin_seq_val_flow();
7978 _set_indentation(startindent);
7979 _line_progressed(1);
7980 _maybe_skip_whitespace_tokens();
7982 else if(first ==
'-' && _is_blck_token(rem))
7984 _c4dbgp(
"usty[UNK]: it's a block seq");
7986 _handle_annotations_before_blck_val_scalar();
7987 m_evt_handler->begin_seq_val_block();
7989 _set_indentation(startindent);
7990 _line_progressed(1);
7991 _maybe_skip_whitespace_tokens();
7993 else if(first ==
'{')
7995 _c4dbgp(
"usty[UNK]: it's a flow map");
7997 _handle_annotations_before_blck_val_scalar();
7998 m_evt_handler->begin_map_val_flow();
8000 _set_indentation(startindent);
8001 _line_progressed(1);
8002 _maybe_skip_whitespace_tokens();
8004 else if(first ==
'?' && _is_blck_token(rem))
8006 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8008 _handle_annotations_before_blck_val_scalar();
8009 m_evt_handler->begin_map_val_block();
8011 m_was_inside_qmrk =
true;
8012 _save_indentation();
8013 _line_progressed(1);
8014 _maybe_skip_whitespace_tokens();
8016 else if(first ==
':' && _is_blck_token(rem))
8018 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8020 _handle_annotations_before_blck_val_scalar();
8021 m_evt_handler->begin_map_val_block();
8022 m_evt_handler->set_key_scalar_plain_empty();
8024 _save_indentation();
8025 _line_progressed(1);
8026 _maybe_skip_whitespace_tokens();
8028 else if(first ==
'&')
8030 csubstr anchor = _scan_anchor();
8031 _c4dbgpf(
"usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8032 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8033 const size_t line = m_evt_handler->m_curr->pos.line;
8034 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8035 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8037 else if(first ==
'*')
8039 csubstr ref = _scan_ref_map();
8040 _c4dbgpf(
"usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8041 if(!_maybe_scan_following_colon())
8043 _c4dbgp(
"usty[UNK]: set val ref");
8044 _handle_annotations_before_blck_val_scalar();
8045 m_evt_handler->set_val_ref(ref);
8049 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8050 const size_t startline = m_evt_handler->m_curr->pos.line;
8052 _handle_annotations_before_start_mapblck(startline);
8053 m_evt_handler->begin_map_val_block();
8054 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8055 m_evt_handler->set_key_ref(ref);
8056 _maybe_skip_whitespace_tokens();
8057 _set_indentation(startindent);
8061 else if(first ==
'!')
8063 csubstr tag = _scan_tag();
8064 _c4dbgpf(
"usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8067 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8068 const size_t line = m_evt_handler->m_curr->pos.line;
8069 _add_annotation(&m_pending_tags, tag, indentation, line);
8073 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
8074 startindent = m_evt_handler->m_curr->line_contents.indentation;
8075 const size_t startline = m_evt_handler->m_curr->pos.line;
8078 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8081 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8082 sc = _scan_scalar_squot();
8083 if(!_maybe_scan_following_colon())
8085 _c4dbgp(
"usty[UNK]: set as val");
8086 _handle_annotations_before_blck_val_scalar();
8087 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8088 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8093 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8095 _handle_annotations_before_start_mapblck(startline);
8096 m_evt_handler->begin_map_val_block();
8097 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8098 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8099 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8100 _set_indentation(startindent);
8102 _maybe_skip_whitespace_tokens();
8105 else if(first ==
'"')
8107 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8108 sc = _scan_scalar_dquot();
8109 if(!_maybe_scan_following_colon())
8111 _c4dbgp(
"usty[UNK]: set as val");
8112 _handle_annotations_before_blck_val_scalar();
8113 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8114 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8119 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8121 _handle_annotations_before_start_mapblck(startline);
8122 m_evt_handler->begin_map_val_block();
8123 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8124 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8125 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8126 _set_indentation(startindent);
8128 _maybe_skip_whitespace_tokens();
8131 else if(first ==
'|')
8133 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8135 _scan_block(&sb, startindent);
8136 _c4dbgp(
"usty[UNK]: set as val");
8137 _handle_annotations_before_blck_val_scalar();
8138 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8139 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8142 else if(first ==
'>')
8144 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8146 _scan_block(&sb, startindent);
8147 _c4dbgp(
"usty[UNK]: set as val");
8148 _handle_annotations_before_blck_val_scalar();
8149 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8150 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8153 else if(_scan_scalar_plain_unk(&sc))
8155 _c4dbgp(
"usty[UNK]: got a plain scalar");
8156 if(!_maybe_scan_following_colon())
8158 _c4dbgp(
"usty[UNK]: set as val");
8159 _handle_annotations_before_blck_val_scalar();
8160 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8161 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8166 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8168 _handle_annotations_before_start_mapblck(startline);
8169 m_evt_handler->begin_map_val_block();
8170 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8171 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8172 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8173 _set_indentation(startindent);
8175 _maybe_skip_whitespace_tokens();
8180 _c4err(
"parse error");
8189 template<
class EventHandler>
8192 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8196 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8197 m_evt_handler->begin_stream();
8198 while( ! _finished_file())
8201 while( ! _finished_line())
8204 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8209 else if(has_any(
RMAP))
8213 else if(has_any(
RUNK))
8219 _c4err(
"internal error");
8222 if(_finished_file())
8227 m_evt_handler->finish_parse();
8233 template<
class EventHandler>
8236 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8240 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8241 m_evt_handler->begin_stream();
8242 while( ! _finished_file())
8245 while( ! _finished_line())
8248 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8259 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8268 else if(has_any(
BLCK))
8272 _handle_seq_block();
8276 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8277 _handle_map_block();
8280 else if(has_any(
RUNK))
8284 else if(has_any(
USTY))
8290 _c4err(
"internal error");
8293 if(_finished_file())
8298 m_evt_handler->finish_parse();
8307 #undef _c4dbgnextline
8309 #if defined(_MSC_VER)
8310 # pragma warning(pop)
8311 #elif defined(__clang__)
8312 # pragma clang diagnostic pop
8313 #elif defined(__GNUC__)
8314 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_ERRMSG_SIZE
size for the error message buffer
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
@ NOTYPE
no node type or style is set
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
size_t to_chars(substr buf, uint8_t v) noexcept
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark