301 , m_prev_val_end(
npos)
303 , m_newline_offsets()
304 , m_newline_offsets_size(0)
305 , m_newline_offsets_capacity(0)
307 RYML_CHECK_BASIC_(evt_handler);
310template<
class EventHandler>
312 : m_options(that.m_options)
313 , m_evt_handler(that.m_evt_handler)
314 , m_pending_anchors(that.m_pending_anchors)
315 , m_pending_tags(that.m_pending_tags)
316 , m_has_directives_yaml(that.m_has_directives_yaml)
317 , m_has_directives(that.m_has_directives)
318 , m_doc_empty(that.m_doc_empty)
320 , m_prev_val_end(
npos)
322 , m_newline_offsets(that.m_newline_offsets)
323 , m_newline_offsets_size(that.m_newline_offsets_size)
324 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
329template<
class EventHandler>
331 : m_options(that.m_options)
332 , m_evt_handler(that.m_evt_handler)
333 , m_pending_anchors(that.m_pending_anchors)
334 , m_pending_tags(that.m_pending_tags)
335 , m_has_directives_yaml(that.m_has_directives_yaml)
336 , m_has_directives(that.m_has_directives)
337 , m_doc_empty(that.m_doc_empty)
339 , m_prev_val_end(
npos)
341 , m_newline_offsets()
342 , m_newline_offsets_size()
343 , m_newline_offsets_capacity()
345 if(that.m_newline_offsets_capacity)
347 _resize_locations(that.m_newline_offsets_capacity);
348 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
350 m_newline_offsets_size = that.m_newline_offsets_size;
354template<
class EventHandler>
358 m_options = (that.m_options);
359 m_evt_handler = that.m_evt_handler;
360 m_pending_anchors = that.m_pending_anchors;
361 m_pending_tags = that.m_pending_tags;
362 m_has_directives_yaml = that.m_has_directives_yaml;
363 m_has_directives = that.m_has_directives;
364 m_doc_empty = that.m_doc_empty;
365 m_prev_colon = that.m_prev_colon;
366 m_prev_val_end = that.m_prev_val_end;
367 m_encoding = that.m_encoding;
368 m_newline_offsets = (that.m_newline_offsets);
369 m_newline_offsets_size = (that.m_newline_offsets_size);
370 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
375template<
class EventHandler>
381 m_options = (that.m_options);
382 m_evt_handler = that.m_evt_handler;
383 m_pending_anchors = that.m_pending_anchors;
384 m_pending_tags = that.m_pending_tags;
385 m_has_directives_yaml = that.m_has_directives_yaml;
386 m_has_directives = that.m_has_directives;
387 m_doc_empty = that.m_doc_empty;
388 m_prev_colon = that.m_prev_colon;
389 m_prev_val_end = that.m_prev_val_end;
390 m_encoding = that.m_encoding;
391 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392 _resize_locations(that.m_newline_offsets_capacity);
393 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
396 m_newline_offsets_size = that.m_newline_offsets_size;
401template<
class EventHandler>
402void ParseEngine<EventHandler>::_clr()
406 m_pending_anchors = {};
408 m_has_directives_yaml =
false;
409 m_has_directives =
false;
412 m_prev_val_end =
npos;
414 m_newline_offsets = {};
415 m_newline_offsets_size = {};
416 m_newline_offsets_capacity = {};
419template<
class EventHandler>
420void ParseEngine<EventHandler>::_free()
422 if(m_newline_offsets)
424 RYML_CB_FREE_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
425 m_newline_offsets =
nullptr;
426 m_newline_offsets_size = 0u;
427 m_newline_offsets_capacity = 0u;
434template<
class EventHandler>
435void ParseEngine<EventHandler>::_reset()
437 m_pending_anchors = {};
439 m_has_directives_yaml =
false;
440 m_has_directives =
false;
443 m_prev_val_end =
npos;
447 if(m_options.locations())
449 _prepare_locations();
456template<
class EventHandler>
457void ParseEngine<EventHandler>::_relocate_arena(
csubstr prev_arena,
substr next_arena,
substr *other)
459 _c4dbgp(
"relocate to new arena");
460 const char *pb = prev_arena.str;
461 const char *pe = prev_arena.str + prev_arena.len;
462 #define _ryml_relocate(s) \
463 if((s).str >= pb && (s).str <= pe) \
465 (s).str = next_arena.str + ((s).str - pb); \
474 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
479 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
486 for(
size_t i = 0, sz = tds.size(); i < sz; ++i)
493 TagCache &tch = m_evt_handler->tag_cache();
494 for(
id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
504 #undef _ryml_relocate
508template<
class EventHandler>
511 csubstr prev = m_evt_handler->arena();
512 substr out = m_evt_handler->alloc_arena(len);
513 substr curr = m_evt_handler->arena();
514 if(curr.str != prev.str)
515 _relocate_arena(prev, curr, other);
524template<
class EventHandler>
525template<
class DumpFn>
528 ParserState const *
const C4_RESTRICT st = m_evt_handler->m_curr;
535 csubstr m_file = m_evt_handler->m_curr->pos.name;
538 dbg_dump_(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
539 offs += m_file.len + 1;
541 dbg_dump_(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
542 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
544 dbg_dump_(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n",
escaped_scalar(maybe_full_content,
true), maybe_ellipsis, contents.len);
546 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
547 size_t lastcol = firstcol + lc.rem.len;
550 for(
size_t i = 0; i < offs + firstcol_adj; ++i)
551 std::forward<DumpFn>(dumpfn)(
" ");
552 std::forward<DumpFn>(dumpfn)(
"^");
553 for(
size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
554 std::forward<DumpFn>(dumpfn)(
"~");
555 dbg_dump_(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
559 std::forward<DumpFn>(dumpfn)(
"\n");
564 dbg_dump_(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
568template<
class EventHandler>
574 dbg_printf_(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
578template<
class EventHandler>
582 _print_state_stack(buf);
589template<
class EventHandler>
590template<
class ...Args>
591C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(
Location const& cpploc,
Location const& ymlloc,
const char* fmt, Args
const& ...args)
const
593 m_evt_handler->cancel_parse();
597template<
class EventHandler>
598template<
class ...Args>
599C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(
Location const& cpploc,
const char *fmt, Args
const& ...args)
const
601 m_evt_handler->cancel_parse();
602 err_parse(m_evt_handler->m_stack.m_callbacks,
ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
608template<
class EventHandler>
609template<
class ...Args>
614 dbg_printf_(fmt, args...);
616 _fmt_msg(dbg_dumper_);
623template<
class EventHandler>
624bool ParseEngine<EventHandler>::_finished_file()
const
626 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
630 _c4dbgp(
"finished file!!!");
636template<
class EventHandler>
637C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line() const
639 return m_evt_handler->m_curr->line_contents.rem.empty();
645template<
class EventHandler>
646void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
648 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' RYML_WITH_TAB_TOKENS_(|| m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')))
652 pos = m_evt_handler->m_curr->line_contents.rem.len;
653 _c4dbgpf(
"skip {} whitespace characters", pos);
654 _line_progressed(pos);
658template<
class EventHandler>
659void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
661 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
663 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
665 pos = m_evt_handler->m_curr->line_contents.rem.len;
666 _c4dbgpf(
"skip {}x'{}'", pos, _c4prc(c));
667 _line_progressed(pos);
671template<
class EventHandler>
673void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
675 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
676 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
678 pos = m_evt_handler->m_curr->line_contents.rem.len;
679 _c4dbgpf(
"skip {} characters", pos);
680 _line_progressed(pos);
683template<
class EventHandler>
684void ParseEngine<EventHandler>::_skip_comment()
686 LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
687 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
688 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with(
'#'), m_evt_handler->m_curr->pos);
689 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
690 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos);
691 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, col == ((
size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
693 if(lc.rem.str != lc.full.str)
695 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
696 const char prev = lc.full.str[col - 1u];
697 if C4_UNLIKELY(prev !=
' ' && prev !=
'\t')
698 _c4err(
"comment not preceded by whitespace");
700 _c4dbgpf(
"comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
701 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
704template<class EventHandler>
705void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
707 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
710 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
712 _line_progressed(pos);
718template<
class EventHandler>
719void ParseEngine<EventHandler>::_maybe_skip_comment()
721 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
724 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
726 _line_progressed(pos);
732 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
736template<
class EventHandler>
737bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
739 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
742 if(
':' == m_evt_handler->m_curr->line_contents.rem[pos])
746 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
748 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
754 _line_progressed(pos);
760 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
768template<
class EventHandler>
769csubstr ParseEngine<EventHandler>::_scan_anchor()
771 csubstr s = m_evt_handler->m_curr->line_contents.rem;
772 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'), m_evt_handler->m_curr->pos);
774 _line_progressed(1u + anchor.len);
775 _maybe_skipchars(
' ');
779template<
class EventHandler>
780csubstr ParseEngine<EventHandler>::_scan_ref_seq()
782 csubstr s = m_evt_handler->m_curr->line_contents.rem;
783 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
784 _set_first(s, s.first_of(
" ,]\t"));
785 _line_progressed(s.len);
789template<
class EventHandler>
790csubstr ParseEngine<EventHandler>::_scan_ref_map()
792 csubstr s = m_evt_handler->m_curr->line_contents.rem;
793 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
794 _set_first(s, s.first_of(
" ,}\t"));
795 _line_progressed(s.len);
799template<
class EventHandler>
800csubstr ParseEngine<EventHandler>::_scan_tag()
802 csubstr t = m_evt_handler->m_curr->line_contents.rem;
803 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
804 if(!t.begins_with(
"!<"))
806 _c4dbgp(
"begins with '!'");
807 _set_first(t, t.first_of(
" ,]}\t"));
808 if C4_UNLIKELY(t.first_of(
"[{") !=
npos)
810 _line_progressed(t.len);
811 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
816 _c4dbgp(
"begins with '!<'");
817 size_t pos = t.find(
'>');
818 if C4_UNLIKELY(pos ==
npos)
820 _set_first_strict(t, pos+1);
821 _line_progressed(t.len);
824 _maybe_skip_whitespace_tokens();
828template<
class EventHandler>
831 csubstr t = m_evt_handler->m_curr->line_contents.rem;
832 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
833 if(!t.begins_with(
"!<"))
835 _c4dbgp(
"begins with '!'");
836 _set_first(t, t.first_of(
" ,\t"));
837 if C4_UNLIKELY(t.first_of(
"[{") !=
npos)
839 _line_progressed(t.len);
841 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
846 _c4dbgp(
"begins with '!<'");
847 size_t pos = t.find(
'>');
848 if C4_UNLIKELY(pos ==
npos)
850 _set_first_strict(t, pos+1);
851 _line_progressed(t.len);
855 _maybe_skip_whitespace_tokens();
862template<
class EventHandler>
863bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(
csubstr s)
865 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
866 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
":-"), m_evt_handler->m_curr->pos);
867 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
868 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
880 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
890 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
905template<
class EventHandler>
906bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(
csubstr s)
908 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
909 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'?', m_evt_handler->m_curr->pos);
910 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
911 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
918 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
924 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
938template<
class EventHandler>
939bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(
csubstr s)
941 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
957 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
962 _c4dbgpf(
"suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
963 return _is_valid_start_scalar_plain_flow_check_block_token(s);
965 _c4dbgpf(
"qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
966 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
974template<
class EventHandler>
975bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(
csubstr s,
size_t offs)
977 _c4dbgpf(
"newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, prs_(s.first(offs),
true));
980 _c4dbgp(
"newl[PLAIN]: buffer continues");
982 size_t next_line_indentation = next_line.
first_not_of(
' ');
983 if(next_line_indentation !=
npos)
985 _c4dbgpf(
"newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
986 next_line = next_line.first(next_line.first_of(
"\n\r"));
987 _c4dbgpf(
"newl[PLAIN]: has indentation. next_line={}", prs_(next_line));
988 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
989 if C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref)
991 _c4dbgp(
"newl[PLAIN]: larger indentation");
992 next_line = next_line.sub(next_line_indentation);
994 else if C4_UNLIKELY(next_line.len && next_line.triml(
' ').len)
996 _c4dbgp(
"newl[PLAIN]: err, smaller indentation");
997 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1000 if(m_evt_handler->m_curr->line_contents.indentation !=
npos)
1001 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1004 _c4dbgpf(
"newl[PLAIN]: next_line.len={}", next_line.len);
1007 size_t fno = next_line.first_not_of(
" \t");
1011 switch(next_line.str[fno])
1013 case ',':
case ']':
case '#':
1014 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1017 _c4dbgp(
"newl[PLAIN]: found :");
1018 if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1020 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1029 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1035template<
class EventHandler>
1036bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1038 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1039 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1040 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP), m_evt_handler->m_curr->pos);
1041 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1042 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1044 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1045 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1047 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1050 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset);
1051 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1053 _c4dbgp(
"scanning seqflow scalar...");
1055 bool needs_filter =
false;
1058 for( ; offs < s.len; ++offs, ++col)
1060 const char c = s.str[offs];
1065 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1066 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1069 _c4dbgpf(
"found '\\n' at col={}", col);
1070 if(!_scan_scalar_plain_handle_newline(s, offs))
1073 needs_filter =
true;
1077 needs_filter =
true;
1080 _c4dbgp(
"found suspicious ':'");
1081 if(s.len > offs + 1)
1083 char next = s.str[offs + 1];
1084 _c4dbgpf(
"next char is '{}'", _c4prc(next));
1090 next = after.str[0];
1091 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
1095 if(next ==
' ' RYML_WITH_TAB_TOKENS_(|| next ==
'\t') || next ==
',' || next ==
'\n' || next ==
']')
1097 _c4dbgp(
"map starting!");
1102 _c4dbgp(
"':' nothing to see here");
1107 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1108 _line_progressed(col);
1109 _c4err(
"missing termination: '{}'", c);
1114 _c4dbgp(
"found suspicious '#'");
1115 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1116 char prev = s.str[offs - 1];
1119 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1127 _line_progressed(col);
1128 _c4err(
"invalid character: '{}'", c);
1131 _c4dbgpf(
"doc token character: '{}', offs={}", c, offs);
1132 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1134 _c4dbgp(
"at line beginning");
1135 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1148 _line_progressed(col);
1149 _set_first(s, offs);
1151 sc->needs_filter = needs_filter;
1153 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1158template<
class EventHandler>
1159bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1161 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1162 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1163 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP), m_evt_handler->m_curr->pos);
1164 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1165 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1167 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1168 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1170 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1173 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset);
1174 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1176 _c4dbgp(
"scanning mapflow scalar...");
1178 bool needs_filter =
false;
1181 for( ; offs < s.len; ++offs, ++col)
1183 const char c = s.str[offs];
1188 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1189 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1192 _c4dbgpf(
"found '\\n' at col={}", col);
1193 if(!_scan_scalar_plain_handle_newline(s, offs))
1196 needs_filter =
true;
1200 needs_filter =
true;
1203 _c4dbgpf(
"found ':'", c);
1207 const char next = s.str[offs+1];
1208 _c4dbgpf(
"next='{}'", c);
1209 if(next ==
' ' || next ==
',' || next ==
'}' || next ==
'\n' || next ==
'\r' RYML_WITH_TAB_TOKENS_(|| next ==
'\t'))
1211 _c4dbgpf(
"found terminating character: '{}'", c);
1218 _line_progressed(col);
1219 _c4err(
"invalid character: '{}'", c);
1222 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1231 _line_progressed(col);
1234 sc->needs_filter = needs_filter;
1236 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1238 return sc->scalar.len > 0u;
1241template<
class EventHandler>
1242bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1244 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1245 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1246 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1247 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1249 substr s = m_evt_handler->m_curr->line_contents.rem;
1250 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1251 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1253 _c4dbgp(
"seq_json: scanning scalar...");
1260 _c4dbgp(
"seq_json: not a scalar.");
1265 const size_t len = _begins_with_special_json_scalar(s);
1268 char c = s.len > len ? s.str[len] :
',';
1269 if(c ==
',' || c ==
']' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1271 sc->scalar = s.first(len);
1272 sc->needs_filter =
false;
1273 _c4dbgpf(
"seq_json: special scalar: '{}'", sc->scalar);
1274 _line_progressed(len);
1286 for( ; i < s.len; ++i)
1288 const char c = s.str[i];
1295 _c4dbgpf(
"seq_json: found terminating character: '{}'", c);
1304 _line_progressed(i);
1305 sc->scalar = s.first(i);
1306 sc->needs_filter =
false;
1307 _c4dbgpf(
"seq_json: scalar was {}", prs_(sc->scalar,
true));
1312template<
class EventHandler>
1313bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1315 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1316 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1317 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1318 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1319 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL), m_evt_handler->m_curr->pos);
1321 substr s = m_evt_handler->m_curr->line_contents.rem;
1322 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1323 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1325 _c4dbgp(
"scanning scalar...");
1328 const size_t len = _begins_with_special_json_scalar(s);
1331 char c = s.len > len ? s.str[len] :
',';
1332 _c4dbgpf(
"begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1333 if(c ==
',' || c ==
'}' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1335 sc->scalar = s.first(len);
1336 sc->needs_filter =
false;
1337 _c4dbgpf(
"special json scalar: '{}'", prs_(sc->scalar));
1338 _line_progressed(len);
1350 for( ; i < s.len; ++i)
1352 const char c = s.str[i];
1359 _c4dbgpf(
"found terminating character: '{}'", c);
1370 _line_progressed(i);
1371 sc->scalar = s.first(i);
1372 sc->needs_filter =
false;
1373 _c4dbgpf(
"scalar was {}", prs_(sc->scalar));
1380template<
class EventHandler>
1381bool ParseEngine<EventHandler>::_is_doc_begin(
csubstr s)
1383 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-', m_evt_handler->m_curr->pos);
1384 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1387template<
class EventHandler>
1388bool ParseEngine<EventHandler>::_is_doc_end(
csubstr s)
1390 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.', m_evt_handler->m_curr->pos);
1391 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1394template<
class EventHandler>
1395bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1397 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1398 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1399 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY), m_evt_handler->m_curr->pos);
1401 substr s = m_evt_handler->m_curr->line_contents.rem;
1402 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1403 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1408 if(_is_blck_token(s))
1412 else if(_is_doc_begin(s))
1414 _c4dbgp(
"token is doc start");
1420 if(_is_blck_token(s))
1435 _c4dbgp(
"token is doc end");
1441 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1443 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1444 const size_t start_line = m_evt_handler->m_curr->pos.line;
1446 bool needs_filter =
false;
1449 _c4dbgpf(
"plain scalar line: {}", prs_(s));
1450 for(
size_t i = 0; i < s.len; ++i)
1452 const char curr = s.str[i];
1457 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1461 _c4dbgpf(
"followed by '{}'", i+1 == s.len ?
csubstr(
"\\n") : _c4prc(s.str[i+1]));
1462 _line_progressed(i);
1464 if C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line)
1466 _c4dbgp(
"start line. scalar ends here");
1471 _c4err(
"multiline scalars cannot be used as keys");
1477 while(j + 1 < s.len && s.str[j+1] ==
':')
1479 _c4dbgp(
"skip colon");
1482 i = j > i ? j-1 : i;
1483 _c4dbgp(
"nothing to see here");
1487 _c4dbgp(
"got suspicious '#'");
1488 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1490 _c4dbgp(
"comment! scalar ends here");
1491 _line_progressed(i);
1496 _c4dbgp(
"nothing to see here");
1501 _line_progressed(s.len);
1502 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1503 next_peeked = next_peeked.trimr(
"\n\r");
1504 const size_t next_indentation = next_peeked.first_not_of(
' ');
1505 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1506 if(next_indentation < indentation)
1508 _c4dbgp(
"smaller indentation! scalar ended");
1511 else if(next_indentation == 0 && next_peeked.len > 0)
1513 const char first = next_peeked.str[0];
1517 _c4dbgpf(
"doc begin? peeked={}", prs_(next_peeked,
size_t(3)));
1518 if(_is_doc_begin_token(next_peeked))
1520 _c4dbgp(
"doc begin! scalar ended");
1525 _c4dbgpf(
"doc end? peeked={}", prs_(next_peeked,
size_t(3)));
1526 if(_is_doc_end_token(next_peeked))
1528 _c4dbgp(
"doc end! scalar ended");
1535 _c4dbgp(
"next line!");
1536 if(!_finished_file())
1538 _c4dbgp(
"next line!");
1544 _c4dbgp(
"file finished!");
1547 s = m_evt_handler->m_curr->line_contents.rem;
1548 needs_filter =
true;
1553 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1554 sc->needs_filter = needs_filter;
1556 _c4dbgpf(
"scalar was {}", prs_(sc->scalar));
1561template<
class EventHandler>
1562C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1564 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1565 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1566 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1567 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1568 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1569 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1570 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1573template<
class EventHandler>
1574C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1576 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1577 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1578 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1579 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1580 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1581 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1584template<
class EventHandler>
1585C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1587 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY), m_evt_handler->m_curr->pos);
1588 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1594template<
class EventHandler>
1595substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1599 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1600 if(pos >= _buf().len)
1604 rem = _from_next_line(_buf().sub(pos));
1609 nlpos = rem.first_of(
"\r\n");
1611 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1612 rem = rem.left_of(nlpos,
true);
1614 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1618 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1624template<
class EventHandler>
1625void ParseEngine<EventHandler>::_scan_line()
1627 if C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len)
1628 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1630 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1633template<
class EventHandler>
1634void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1636 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1637 m_evt_handler->m_curr->pos.line,
1638 m_evt_handler->m_curr->line_contents.full.len,
1639 ahead, m_evt_handler->m_curr->pos.col,
1640 m_evt_handler->m_curr->pos.col+ahead,
1641 m_evt_handler->m_curr->pos.offset,
1642 m_evt_handler->m_curr->pos.offset+ahead);
1643 m_evt_handler->m_curr->pos.offset += ahead;
1644 m_evt_handler->m_curr->pos.col += ahead;
1645 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1646 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1649template<
class EventHandler>
1650void ParseEngine<EventHandler>::_line_ended()
1652 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1653 m_evt_handler->m_curr->pos.line,
1654 m_evt_handler->m_curr->line_contents.full.len,
1655 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1656 m_evt_handler->m_curr->pos.col, 1);
1657 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1658 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1659 ++m_evt_handler->m_curr->pos.line;
1660 m_evt_handler->m_curr->pos.col = 1;
1663template<
class EventHandler>
1664void ParseEngine<EventHandler>::_line_ended_undo()
1666 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1667 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1668 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1669 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1670 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1671 m_evt_handler->m_curr->pos.offset -= delta;
1672 --m_evt_handler->m_curr->pos.line;
1673 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1676 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1681template<
class EventHandler>
1682void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
noexcept
1684 m_evt_handler->m_curr->indref = indentation;
1685 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1688template<
class EventHandler>
1689void ParseEngine<EventHandler>::_save_indentation()
1691 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1692 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1693 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1696template<
class EventHandler>
1697void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1699 _c4dbgpf(
"SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1700 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1706template<
class EventHandler>
1707void ParseEngine<EventHandler>::_flow_container_was_a_key(
size_t orig_indent)
1709 _c4dbgpf(
"flow container is followed by colon! orig_indent={}", orig_indent);
1710 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1712 _set_indentation(orig_indent);
1713 _maybe_skip_whitespace_tokens();
1716template<
class EventHandler>
1717void ParseEngine<EventHandler>::_end_flow_container(
size_t orig_indent,
bool multiline)
1723 _c4dbgp(
"flow container: end as vanilla block map key!");
1724 if C4_UNLIKELY(multiline)
1725 _c4err(
"multiline key is invalid");
1726 if C4_UNLIKELY(!_maybe_scan_following_colon())
1727 _c4err(
"could not find ':' colon after key");
1728 _maybe_skip_whitespace_tokens();
1731 else if(has_none(
RFLOW))
1733 _c4dbgp(
"end_flow_container: now not in flow!");
1734 if(has_any(
RUNK|
RSEQ|
RKCL) && _maybe_scan_following_colon())
1736 if C4_UNLIKELY(multiline)
1737 _c4err(
"multiline key is invalid");
1738 _flow_container_was_a_key(orig_indent);
1742 _c4dbgp(
"end_flow_container: end map as key!");
1745 else if(has_any(
RSEQ))
1747 _c4dbgp(
"end_flow_container: now in a flow seq");
1748 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1749 _mark_seqflow_val_end();
1753template<
class EventHandler>
1754void ParseEngine<EventHandler>::_end_map_flow()
1756 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1757 size_t orig_indent = m_evt_handler->m_curr->indref;
1758 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1759 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().m_bits);
1760 _end_flow_container(orig_indent, multiline);
1763template<
class EventHandler>
1764void ParseEngine<EventHandler>::_end_seq_flow()
1766 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1767 size_t orig_indent = m_evt_handler->m_curr->indref;
1768 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1769 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().m_bits);
1770 _end_flow_container(orig_indent, multiline);
1773template<
class EventHandler>
1774void ParseEngine<EventHandler>::_end_map_blck()
1776 _c4dbgp(
"mapblck: end");
1779 _c4dbgp(
"mapblck: set missing val");
1780 _handle_annotations_before_blck_val_scalar();
1781 m_evt_handler->set_val_scalar_plain_empty();
1783 else if(has_any(
QMRK))
1785 _c4dbgp(
"mapblck: set missing keyval");
1786 _handle_annotations_before_blck_key_scalar();
1787 m_evt_handler->set_key_scalar_plain_empty();
1788 _handle_annotations_before_blck_val_scalar();
1789 m_evt_handler->set_val_scalar_plain_empty();
1791 m_evt_handler->end_map_block();
1794template<
class EventHandler>
1795void ParseEngine<EventHandler>::_end_seq_blck()
1799 _c4dbgp(
"seqblck: set missing val");
1800 _handle_annotations_before_blck_val_scalar();
1801 m_evt_handler->set_val_scalar_plain_empty();
1803 m_evt_handler->end_seq_block();
1806template<
class EventHandler>
1807void ParseEngine<EventHandler>::_end2_map()
1809 _c4dbgp(
"map: end");
1810 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1817 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1818 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1819 m_evt_handler->_pop();
1823template<
class EventHandler>
1824void ParseEngine<EventHandler>::_end2_seq()
1826 _c4dbgp(
"seq: end");
1827 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1834 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1835 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1836 m_evt_handler->_pop();
1840template<
class EventHandler>
1841void ParseEngine<EventHandler>::_begin2_doc()
1843 _c4dbgp(
"begin_doc");
1844 m_has_directives_yaml =
false;
1845 m_has_directives =
false;
1848 m_evt_handler->begin_doc();
1849 m_evt_handler->m_curr->indref = 0;
1852template<
class EventHandler>
1853void ParseEngine<EventHandler>::_begin2_doc_expl()
1855 _c4dbgp(
"begin_doc_expl");
1856 m_has_directives_yaml =
false;
1857 m_has_directives =
false;
1860 m_evt_handler->begin_doc_expl();
1861 m_evt_handler->m_curr->indref = 0;
1864template<
class EventHandler>
1865void ParseEngine<EventHandler>::_end2_doc()
1867 _c4dbgp(
"doc: end");
1868 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1869 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1871 _c4dbgp(
"doc was empty; add empty val");
1872 _handle_annotations_before_blck_val_scalar();
1873 m_evt_handler->set_val_scalar_plain_empty();
1875 m_evt_handler->end_doc();
1879template<
class EventHandler>
1880void ParseEngine<EventHandler>::_end2_doc_expl()
1882 _c4dbgp(
"doc: end");
1883 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1885 _c4dbgp(
"doc: no children; add empty val");
1886 _handle_annotations_before_blck_val_scalar();
1887 m_evt_handler->set_val_scalar_plain_empty();
1889 m_evt_handler->end_doc_expl();
1893template<
class EventHandler>
1894void ParseEngine<EventHandler>::_maybe_begin_doc()
1898 _c4dbgp(
"doc must be started");
1902template<
class EventHandler>
1903void ParseEngine<EventHandler>::_maybe_end_doc()
1907 _c4dbgp(
"doc must be finished");
1910 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1912 _c4dbgp(
"no doc to finish, but pending annotations");
1913 m_evt_handler->begin_doc();
1914 _handle_annotations_before_blck_val_scalar();
1915 m_evt_handler->set_val_scalar_plain_empty();
1916 m_evt_handler->end_doc();
1920template<
class EventHandler>
1921void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1923 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1924 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags &
RDOC, m_evt_handler->m_curr->pos);
1925 _c4dbgp(
"root is RDOC");
1926 if(m_evt_handler->m_curr->level != 0)
1927 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1928 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1933template<
class EventHandler>
1934void ParseEngine<EventHandler>::_check_trailing_doc_token()
1936 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1937 const bool isndoc = (m_evt_handler->m_curr->flags &
NDOC) != 0;
1938 const bool suspicious = m_evt_handler->template has_any_<MAP|SEQ|VAL>();
1939 _c4dbgpf(
"target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1940 if((is_root || m_evt_handler->template has_any_<DOC>()) && suspicious && !isndoc)
1944template<
class EventHandler>
1945void ParseEngine<EventHandler>::_end_doc_suddenly()
1947 _c4dbgp(
"end doc suddenly");
1948 _end_doc_suddenly__pop();
1953template<
class EventHandler>
1954void ParseEngine<EventHandler>::_check_doc_end_tokens()
const
1956 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1957 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(
". \t"), m_evt_handler->m_curr->pos);
1958 if C4_UNLIKELY(rem.len && !rem.begins_with(
'#'))
1964template<
class EventHandler>
1965void ParseEngine<EventHandler>::_start_doc_suddenly()
1967 _c4dbgp(
"start doc suddenly");
1968 _end_doc_suddenly__pop();
1973template<
class EventHandler>
1974void ParseEngine<EventHandler>::_end_stream()
1976 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1978 _c4err(
"missing terminating ]");
1979 else if C4_UNLIKELY(has_all(
RMAP|
RFLOW))
1980 _c4err(
"missing terminating }");
1981 if(m_evt_handler->m_stack.size() > 1)
1982 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1989 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1993 m_evt_handler->begin_doc();
1994 _handle_annotations_before_blck_val_scalar();
1995 m_evt_handler->set_val_scalar_plain_empty();
1996 m_evt_handler->end_doc();
2000 m_evt_handler->end_stream();
2001 if C4_UNLIKELY(m_has_directives)
2002 _c4err(
"directives cannot be used without a document");
2006template<
class EventHandler>
2007void ParseEngine<EventHandler>::_handle_indentation_pop(
ParserState const* popto)
2009 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2010 while(m_evt_handler->m_curr != popto)
2014 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2017 else if(has_any(
RMAP))
2019 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2027 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2030template<
class EventHandler>
2031void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2034 using state_type =
typename EventHandler::state;
2035 state_type
const* popto =
nullptr;
2036 auto &stack = m_evt_handler->m_stack;
2037 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2038 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2039 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2041 _print_state_stack();
2043 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2045 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2046 if(s->indref == ind)
2048 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
2053 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2055 _c4err(
"parse error: incorrect indentation?");
2057 _handle_indentation_pop(popto);
2060template<
class EventHandler>
2061void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2064 using state_type =
typename EventHandler::state;
2065 auto &stack = m_evt_handler->m_stack;
2066 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2067 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2068 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2069 state_type
const* popto =
nullptr;
2072 _print_state_stack(flagbuf_);
2074 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
2076 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2081 else if(s->indref == ind)
2083 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
2084 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
2091 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2093 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, first == ind || first ==
npos, m_evt_handler->m_curr->pos);
2094 rem = rem.right_of(first,
true);
2095 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
2096 if(rem.begins_with(
'-') && _is_blck_token(rem))
2098 _c4dbgp(
"parent was indentless seq");
2104 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2106 _c4err(
"parse error: incorrect indentation?");
2108 _handle_indentation_pop(popto);
2113template<
class EventHandler>
2114void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2118 _c4err(
"multiline quoted keys are invalid");
2122 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(
RMAP|
RSEQ) && has_any(
RBLCK)));
2123 _c4dbgpf(
"indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2124 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2126 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks,
2127 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(
' '),
2128 m_evt_handler->m_curr->pos);
2129 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.
sub(m_evt_handler->m_curr->line_contents.indentation);
2130 _c4dbgpf(
"trimmed.len={} line={}", trimmed.len, prs_(m_evt_handler->m_curr->line_contents.rem,
true));
2131 if C4_UNLIKELY(!!trimmed.len)
2133 _c4err(
"bad indentation");
2141template<
class EventHandler>
2142ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2147 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'\''), m_evt_handler->m_curr->pos);
2150 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset + 1);
2151 _line_progressed(1);
2152 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2154 bool needs_filter =
false;
2156 while( ! _finished_file())
2158 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2159 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, prs_(line));
2160 if C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line))
2161 _c4err(
"token can not appear at line begin");
2162 for(
size_t i = 0; i < line.len; ++i)
2164 const char curr = line.str[i];
2167 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2170 _line_progressed(i + 1);
2171 pos = i + (size_t)(line.str - s.str);
2176 needs_filter =
true;
2182 needs_filter =
true;
2183 _line_progressed(line.len);
2186 _check_valid_newline_in_quoted_scalar();
2189 _c4err(
"reached end of file while looking for closing quote");
2193 _c4dbgpf(
"found closing quote at: {}", pos);
2194 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2195 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2196 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2197 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'\'', m_evt_handler->m_curr->pos);
2198 _set_first_strict(s, pos);
2200 _c4prscalar(
"scanned squoted scalar", s,
true);
2202 return ScannedScalar { s, needs_filter };
2207template<
class EventHandler>
2208ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2213 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'"'), m_evt_handler->m_curr->pos);
2216 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset + 1);
2217 _line_progressed(1);
2218 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2220 bool needs_filter =
false;
2222 while( ! _finished_file())
2224 #if defined(__GNUC__) && (__GNUC__ == 13)
2225 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
2227 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2228 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2229 if C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem))
2230 _c4err(
"token can not appear at line begin");
2231 for(
size_t i = 0; i < rem.len; ++i)
2233 const char curr = rem.str[i];
2237 const char next = i+1 < rem.len ? rem.str[i+1] :
'~';
2238 needs_filter =
true;
2239 if(next ==
'"' || next ==
'\\')
2242 else if(curr ==
'"')
2244 _line_progressed(i + 1);
2245 pos = i + (size_t)(rem.str - s.str);
2251 needs_filter =
true;
2252 _line_progressed(rem.len);
2255 _check_valid_newline_in_quoted_scalar();
2258 _c4err(
"reached end of file while looking for closing quote");
2262 _c4dbgpf(
"found closing quote at: {}", pos);
2263 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2264 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2265 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2266 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'"', m_evt_handler->m_curr->pos);
2267 _set_first_strict(s, pos);
2269 _c4prscalar(
"scanned dquoted scalar", s,
true);
2271 return ScannedScalar{s, needs_filter};
2276template<
class EventHandler>
2277void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2279 _c4dbgpf(
"blck: indref={}", indref);
2280 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, indref !=
npos, m_evt_handler->m_curr->pos);
2283 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2284 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'), m_evt_handler->m_curr->pos);
2286 _c4dbgpf(
"blck: specs={}", prs_(s));
2289 BlockChomp_e chomp = CHOMP_CLIP;
2290 size_t indentation =
npos;
2293 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"), m_evt_handler->m_curr->pos);
2295 _c4dbgpf(
"blck: spec is multichar: {}", prs_(t));
2296 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2297 size_t pos = t.first_of(
"-+");
2298 _c4dbgpf(
"blck: spec chomp char: pos={}", pos);
2301 _c4dbgpf(
"blck: spec chomp char: {}", _c4prc(t[pos]));
2304 _c4dbgp(
"blck: chomp=STRIP");
2305 chomp = CHOMP_STRIP;
2307 else if(t[pos] ==
'+')
2309 _c4dbgp(
"blck: chomp=KEEP");
2316 _c4dbgpf(
"blck: spec is now: {}", prs_(t));
2319 pos = t.first_not_of(
"0123456789");
2323 _c4dbgpf(
"blck: parse indentation digits: {}", prs_(rest));
2324 if C4_UNLIKELY(rest.len > 1)
2325 _c4err(
"parse error: invalid indentation");
2326 if C4_UNLIKELY( ! c4::
atou(rest, &indentation))
2327 _c4err(
"parse error: could not read indentation as decimal");
2328 if C4_UNLIKELY( ! indentation)
2329 _c4err(
"parse error: null indentation");
2330 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2331 indentation += m_evt_handler->m_curr->indref;
2335 rest = t.triml(
" \t");
2336 _c4dbgpf(
"blck: digits empty. t={} trimmed={} iscomm={} t.iscomm={}", prs_(t), prs_(rest), rest.begins_with(
'#'), t.begins_with(
'#'));
2337 if C4_UNLIKELY(rest.len && (rest.str[0] !=
'#' || t.str[0] ==
'#'))
2338 _c4err(
"parse error: invalid token");
2342 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2345 _line_progressed(s.len);
2350 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2351 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2359 size_t num_lines = 0;
2360 size_t first = m_evt_handler->m_curr->pos.line;
2361 size_t provisional_indentation =
npos;
2363 while(( ! _finished_file()))
2367 #
if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2368 C4_DONT_OPTIMIZE(lc.rem);
2370 _c4dbgpf(
"blck: peeking at {}", prs_(lc.rem.trimr(
"\r\n"),
true));
2372 if(indentation !=
npos)
2374 _c4dbgpf(
"blck: indentation={}", indentation);
2376 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2380 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2384 _c4err(
"indentation decreased without any scalar");
2388 else if(indentation == 0)
2390 _c4dbgpf(
"blck: noindent. lc.rem={}", prs_(lc.rem));
2391 if(_is_doc_token(lc.rem))
2393 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2400 const size_t fns = lc.rem.first_not_of(
' ');
2401 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2404 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2405 if C4_UNLIKELY(lc.full.begins_with(
'\t'))
2407 if(provisional_indentation ==
npos)
2409 if(lc.indentation < indref)
2411 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2412 if(raw_block.len == 0)
2414 _c4dbgp(
"blck: was empty, undo next line");
2419 else if(lc.indentation == m_evt_handler->m_curr->indref)
2423 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2427 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2428 indentation = lc.indentation;
2432 if(lc.indentation >= provisional_indentation)
2434 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2436 indentation = lc.indentation;
2440 if(lc.indentation >= indref)
2441 _c4err(
"parse error: first non-empty block line should have at least the original indentation");
2442 _c4dbgp(
"blck: finished");
2449 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2450 if(provisional_indentation !=
npos)
2452 if(lc.rem.len >= provisional_indentation)
2454 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2455 provisional_indentation = lc.rem.len;
2460 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2461 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2462 if(provisional_indentation ==
npos)
2464 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2465 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2467 if(provisional_indentation < indref)
2469 provisional_indentation = indref;
2470 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2476 m_evt_handler->m_curr->line_contents = lc;
2477 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2478 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2479 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2483 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2484 C4_UNUSED(num_lines);
2487 if(indentation ==
npos)
2489 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2490 indentation = provisional_indentation;
2496 _c4prscalar(
"scanned block", raw_block,
true);
2498 sb->scalar = raw_block;
2499 sb->indentation = indentation;
2511#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2513#define _c4dbgfws(...)
2516template<
class EventHandler>
2517template<
class FilterProcessor>
2520 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2521 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t', m_evt_handler->m_curr->pos);
2523 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2524 if(first_pos !=
npos)
2526 const char first_char = proc.src[first_pos];
2527 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2528 if(first_char ==
'\n' || first_char ==
'\r')
2530 _c4dbgfws(
"whitespace is trailing on line",
"");
2531 proc.skip(first_pos - proc.rpos);
2536 _c4dbgfws(
"legit whitespace. sofar={}", prs_(proc.sofar()));
2540 _c4dbgfws(
"whitespace is trailing on line",
"");
2544template<
class EventHandler>
2545template<
class FilterProcessor>
2548 if(!_filter_ws_handle_to_first_non_space(proc))
2550 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2551 proc.copy(proc.src.len - proc.rpos);
2555template<
class EventHandler>
2556template<
class FilterProcessor>
2559 if(!_filter_ws_handle_to_first_non_space(proc))
2561 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2562 proc.skip(proc.src.len - proc.rpos);
2576#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2578#define _c4dbgfps(fmt, ...)
2581template<
class EventHandler>
2582template<
class FilterProcessor>
2585 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2587 _c4dbgfps(
"found newline. sofar={}", prs_(proc.sofar()));
2588 size_t ii = proc.rpos;
2589 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2592 proc.set(
'\n', numnl_following);
2593 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2597 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2601 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2605 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2612template<
class EventHandler>
2613template<
class FilterProcessor>
2616 RYML_ASSERT_PARSE_CB_(this->callbacks(), indentation !=
npos, m_evt_handler->m_curr->pos);
2617 _c4dbgfps(
"before={}", prs_(proc.src));
2619 while(proc.has_more_chars())
2621 const char curr = proc.curr();
2622 _c4dbgfps(
"'{}', sofar={}", _c4prc(curr), prs_(proc.sofar()));
2627 _c4dbgfps(
"whitespace", curr);
2628 _filter_ws_skip_trailing(proc);
2631 _c4dbgfps(
"newline", curr);
2632 _filter_nl_plain(proc, indentation);
2635 _c4dbgfps(
"carriage return, ignore", curr);
2644 _c4dbgfps(
"after={}", prs_(proc.sofar()));
2646 return proc.result();
2652template<
class EventHandler>
2656 return _filter_plain(proc, indentation);
2659template<
class EventHandler>
2663 return _filter_plain(proc, indentation);
2674#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2676#define _c4dbgfsq(fmt, ...)
2679template<
class EventHandler>
2680template<
class FilterProcessor>
2683 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2685 _c4dbgfsq(
"found newline. sofar={}", prs_(proc.sofar()));
2686 size_t ii = proc.rpos;
2687 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2690 proc.set(
'\n', numnl_following);
2691 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2695 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2699 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2704 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2710template<
class EventHandler>
2711template<
class FilterProcessor>
2714 _c4dbgfsq(
"before={}", prs_(proc.src));
2718 while(proc.has_more_chars())
2720 const char curr = proc.curr();
2721 _c4dbgfsq(
"'{}', sofar={}", _c4prc(curr), prs_(proc.sofar()));
2726 _c4dbgfsq(
"whitespace", curr);
2727 _filter_ws_copy_trailing(proc);
2730 _c4dbgfsq(
"newline", curr);
2731 _filter_nl_squoted(proc);
2734 _c4dbgfsq(
"skip cr", curr);
2738 _c4dbgfsq(
"squote", curr);
2739 if(proc.next() ==
'\'')
2741 _c4dbgfsq(
"two consecutive squotes", curr);
2756 _c4dbgfsq(
": #filteredchars={} after={}", proc.src.len-proc.sofar().len, prs_(proc.sofar()));
2758 return proc.result();
2763template<
class EventHandler>
2767 return _filter_squoted(proc);
2770template<
class EventHandler>
2774 return _filter_squoted(proc);
2785#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2787#define _c4dbgfdq(...)
2790template<
class EventHandler>
2791template<
class FilterProcessor>
2794 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2796 _c4dbgfdq(
"found newline. sofar={}", prs_(proc.sofar()));
2797 size_t ii = proc.rpos;
2798 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2801 proc.set(
'\n', numnl_following);
2802 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2806 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2810 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2815 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2817 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2819 _c4dbgfdq(
"backslash at [{}]", ii);
2820 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2821 if(next ==
' ' || next ==
'\t')
2823 _c4dbgfdq(
"extend skip to backslash",
"");
2831template<
class EventHandler>
2832template<
class FilterProcessor>
2835 const size_t szp1 = sz + 1u;
2836 if C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len)
2837 _c4err(
"codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2839 csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2840 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2841 uint32_t codepoint_val = {};
2842 if C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val))
2843 _c4err(
"failed to parse codepoint. scalar pos={}", proc.rpos);
2844 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2845 if C4_UNLIKELY(numbytes == 0)
2846 _c4err(
"failed to decode code point={}", proc.rpos);
2847 RYML_ASSERT_PARSE_CB_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2848 proc.translate_esc_bulk(readbuf, numbytes, szp1);
2849 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2852template<class EventHandler>
2853template<class FilterProcessor>
2854void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2856 char next = proc.next();
2857 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2860 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2864 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2870 size_t ii = proc.rpos + 2;
2871 for( ; ii < proc.src.len; ++ii)
2874 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2879 proc.skip(ii - proc.rpos);
2881 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2884 proc.translate_esc(next);
2885 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2887 else if(next ==
'\r')
2891 else if(next ==
'n')
2893 proc.translate_esc(
'\n');
2895 else if(next ==
'r')
2897 proc.translate_esc(
'\r');
2899 else if(next ==
't')
2901 proc.translate_esc(
'\t');
2903 else if(next ==
'\\')
2905 proc.translate_esc(
'\\');
2907 else if(next ==
'x')
2909 _filter_dquoted_backslash_decode(proc, 2u);
2911 else if(next ==
'u')
2913 _filter_dquoted_backslash_decode(proc, 4u);
2915 else if(next ==
'U')
2917 _filter_dquoted_backslash_decode(proc, 8u);
2920 else if(next ==
'0')
2922 proc.translate_esc(
'\0');
2924 else if(next ==
'b')
2926 proc.translate_esc(
'\b');
2928 else if(next ==
'f')
2930 proc.translate_esc(
'\f');
2932 else if(next ==
'a')
2934 proc.translate_esc(
'\a');
2936 else if(next ==
'v')
2938 proc.translate_esc(
'\v');
2940 else if(next ==
'e')
2942 proc.translate_esc(
'\x1b');
2944 else if(next ==
'_')
2947 const char payload[] = {
2948 RYML_CHCONST_(-0x3e, 0xc2),
2949 RYML_CHCONST_(-0x60, 0xa0),
2951 proc.translate_esc_bulk(payload, 2, 1);
2953 else if(next ==
'N')
2956 const char payload[] = {
2957 RYML_CHCONST_(-0x3e, 0xc2),
2958 RYML_CHCONST_(-0x7b, 0x85),
2960 proc.translate_esc_bulk(payload, 2, 1);
2962 else if(next ==
'L')
2965 const char payload[] = {
2966 RYML_CHCONST_(-0x1e, 0xe2),
2967 RYML_CHCONST_(-0x80, 0x80),
2968 RYML_CHCONST_(-0x58, 0xa8),
2970 proc.translate_esc_extending(payload, 3, 1);
2972 else if(next ==
'P')
2975 const char payload[] = {
2976 RYML_CHCONST_(-0x1e, 0xe2),
2977 RYML_CHCONST_(-0x80, 0x80),
2978 RYML_CHCONST_(-0x57, 0xa9),
2980 proc.translate_esc_extending(payload, 3, 1);
2982 else if(next ==
'\0')
2988 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2990 _c4dbgfdq(
"backslash...sofar={}", prs_(proc.sofar()));
2994template<
class EventHandler>
2995template<
class FilterProcessor>
2998 _c4dbgfdq(
"before={}", prs_(proc.src));
3001 while(proc.has_more_chars())
3003 const char curr = proc.curr();
3004 _c4dbgfdq(
"'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3010 _c4dbgfdq(
"whitespace", curr);
3011 _filter_ws_copy_trailing(proc);
3016 _c4dbgfdq(
"newline", curr);
3017 _filter_nl_dquoted(proc);
3022 _c4dbgfdq(
"carriage return, ignore", curr);
3028 _filter_dquoted_backslash(proc);
3038 _c4dbgfdq(
"after={}", prs_(proc.sofar()));
3039 return proc.result();
3045template<
class EventHandler>
3049 return _filter_dquoted(proc);
3052template<
class EventHandler>
3056 return _filter_dquoted(proc);
3065template<
class EventHandler>
3066template<
class FilterProcessor>
3069 RYML_ASSERT_PARSE_CB_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3070 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos, m_evt_handler->m_curr->pos);
3074 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3076 #define _c4dbgchomp(...)
3081 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3084 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
3085 last = proc.rpos + last + size_t(1) + indentation;
3086 RYML_ASSERT_PARSE_CB_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3088 while((proc.rpos < last) && proc.has_more_chars())
3090 const char curr = proc.curr();
3091 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
3096 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
3099 csubstr at_next_line = proc.rem();
3100 if(at_next_line.begins_with(
' '))
3102 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
3104 size_t first_non_space = at_next_line.first_not_of(
' ');
3105 _c4dbgchomp(
"first_non_space={}", first_non_space);
3106 if(first_non_space ==
npos)
3108 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
3109 first_non_space = at_next_line.len;
3111 if(first_non_space <= indentation)
3113 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
3114 proc.skip(first_non_space);
3118 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
3119 proc.skip(indentation);
3121 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3122 proc.copy(first_non_space - indentation);
3140 bool had_one =
false;
3141 while(proc.has_more_chars())
3143 const char curr = proc.curr();
3144 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
3149 _c4dbgchomp(
"copy newline!", curr);
3157 _c4dbgchomp(
"skip!", curr);
3164 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3171 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3172 while(proc.has_more_chars())
3174 const char curr = proc.curr();
3175 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3179 _c4dbgchomp(
"copy newline!", curr);
3184 _c4dbgchomp(
"skip!", curr);
3193 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3205#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3207#define _c4dbgfb(...)
3210template<
class EventHandler>
3211template<
class FilterProcessor>
3217 size_t first = rem.first_not_of(
' ');
3220 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3221 if(first < indentation)
3223 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3228 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3229 proc.skip(indentation);
3232 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3235 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3239 if(first < indentation)
3241 _c4dbgfb(
"skip everything", first);
3242 proc.skip(proc.src.len - proc.rpos);
3246 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3247 proc.skip(indentation);
3255template<
class EventHandler>
3256template<
class FilterProcessor>
3260 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3263 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3264 if(chomp == CHOMP_KEEP && proc.src.len)
3266 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3267 while(proc.has_more_chars())
3269 const char curr = proc.curr();
3281 return contents.len;
3284template<
class EventHandler>
3285template<
class FilterProcessor>
3288 _c4dbgfb(
"contents_len={}", contents_len);
3290 RYML_ASSERT_PARSE_CB_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3294 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3295 if(firstnewl !=
npos)
3297 contents_len = firstnewl;
3298 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3302 contents_len = proc.src.len;
3303 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3306 return contents_len;
3318#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3320#define _c4dbgfbl(...)
3323template<
class EventHandler>
3324template<
class FilterProcessor>
3327 _c4dbgfbl(
"indentation={} before={}", indentation, prs_(proc.src));
3329 size_t contents_len = _handle_all_whitespace(proc, chomp);
3331 return proc.result();
3333 contents_len = _extend_to_chomp(proc, contents_len);
3335 _c4dbgfbl(
"to filter={}", prs_(proc.src.first(contents_len)));
3337 _filter_block_indentation(proc, indentation);
3340 while(proc.has_more_chars(contents_len))
3342 const char curr = proc.curr();
3343 _c4dbgfbl(
"'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3348 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3350 _filter_block_indentation(proc, indentation);
3362 _c4dbgfbl(
"before chomp: #tochomp={} sofar={}", proc.rem().len, prs_(proc.sofar()));
3364 _filter_chomp(proc, chomp, indentation);
3366 _c4dbgfbl(
"final={}", prs_(proc.sofar()));
3368 return proc.result();
3373template<
class EventHandler>
3377 return _filter_block_literal(proc, indentation, chomp);
3380template<
class EventHandler>
3384 return _filter_block_literal(proc, indentation, chomp);
3394#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3396#define _c4dbgfbf(...)
3400template<
class EventHandler>
3401template<
class FilterProcessor>
3404 _filter_block_indentation(proc, indentation);
3405 while(proc.has_more_chars(len))
3407 const char curr = proc.curr();
3408 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3412 _c4dbgfbf(
"newline.", curr);
3414 _filter_block_indentation(proc, indentation);
3422 size_t first = proc.rem().first_not_of(
" \t");
3423 _c4dbgfbf(
"space. first={}", first);
3425 first = proc.rem().len;
3426 _c4dbgfbf(
"... indentation increased to {}", first);
3427 _filter_block_folded_indented_block(proc, indentation, len, first);
3431 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3437template<
class EventHandler>
3438template<
class FilterProcessor>
3444 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3445 wpos_at_first_newl = proc.wpos;
3450 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3451 RYML_ASSERT_PARSE_CB_(this->callbacks(), wpos_at_first_newl !=
npos, m_evt_handler->m_curr->pos);
3452 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ', m_evt_handler->m_curr->pos);
3453 RYML_ASSERT_PARSE_CB_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3455 proc.set_at(wpos_at_first_newl,
'\n');
3456 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n', m_evt_handler->m_curr->pos);
3459 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3463 return wpos_at_first_newl;
3466template<
class EventHandler>
3467template<
class FilterProcessor>
3470 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
3471 size_t num_newl = 0;
3472 size_t wpos_at_first_newl =
npos;
3473 while(proc.has_more_chars(len))
3475 const char curr = proc.curr();
3476 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3481 _c4dbgfbf(
"newline. sofar={}", num_newl);
3517 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3518 _filter_block_indentation(proc, indentation);
3524 size_t first = proc.rem().first_not_of(
" \t");
3525 _c4dbgfbf(
"space. first={}", first);
3527 first = proc.rem().len;
3528 _c4dbgfbf(
"... indentation increased to {}", first);
3531 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3532 proc.set_at(wpos_at_first_newl,
'\n');
3536 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3539 _filter_block_folded_indented_block(proc, indentation, len, first);
3541 wpos_at_first_newl =
npos;
3548 _c4dbgfbf(
"not space, not newline. stop.", 0);
3555template<
class EventHandler>
3556template<
class FilterProcessor>
3559 RYML_ASSERT_PARSE_CB_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos), m_evt_handler->m_curr->pos);
3560 if(curr_indentation)
3561 proc.copy(curr_indentation);
3562 while(proc.has_more_chars(len))
3564 const char curr = proc.curr();
3565 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3571 _filter_block_indentation(proc, indentation);
3574 _c4dbgfbf(
"newline. firstns={}", first);
3577 const char c = rem[first];
3578 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3579 if(c !=
'\n' && c !=
'\r')
3581 _c4dbgfbf(
"done with indented block", first);
3585 else if(first !=
npos)
3588 _c4dbgfbf(
"copy all {} spaces", first);
3606template<
class EventHandler>
3607template<
class FilterProcessor>
3610 _c4dbgfbf(
"indentation={} before={}", indentation, prs_(proc.src));
3612 size_t contents_len = _handle_all_whitespace(proc, chomp);
3614 return proc.result();
3616 contents_len = _extend_to_chomp(proc, contents_len);
3618 _c4dbgfbf(
"to filter={}", prs_(proc.src.first(contents_len)));
3620 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3623 while(proc.has_more_chars(contents_len))
3625 const char curr = proc.curr();
3626 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3631 _c4dbgfbf(
"found newline", curr);
3632 _filter_block_folded_newlines(proc, indentation, contents_len);
3644 _c4dbgfbf(
"before chomp: #tochomp={} sofar={}", proc.rem().len, prs_(proc.sofar()));
3646 _filter_chomp(proc, chomp, indentation);
3648 _c4dbgfbf(
"final={}", proc.sofar().len, prs_(proc.sofar()));
3650 return proc.result();
3655template<
class EventHandler>
3659 return _filter_block_folded(proc, indentation, chomp);
3662template<
class EventHandler>
3666 return _filter_block_folded(proc, indentation, chomp);
3674template<
class EventHandler>
3677 _c4dbgpf(
"filtering plain scalar: s={}", prs_(s));
3678 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3679 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3680 _c4dbgpf(
"filtering plain scalar: success! s={}", prs_(r.get()));
3686template<
class EventHandler>
3689 _c4dbgpf(
"filtering squo scalar: s={}", prs_(s));
3690 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3691 RYML_ASSERT_PARSE_CB_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3692 _c4dbgpf(
"filtering squo scalar: success! s={}", prs_(r.get()));
3699template<
class EventHandler>
3702 _c4dbgpf(
"filtering dquo scalar: s={}", prs_(s));
3704 if C4_LIKELY(r.valid())
3706 _c4dbgpf(
"filtering dquo scalar: success! s={}", prs_(r.get()));
3711 const size_t len = r.required_len();
3712 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3713 substr dst = _alloc_arena(len, &s);
3714 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3717 RYML_ASSERT_PARSE_CB_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3718 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3719 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3720 RYML_ASSERT_PARSE_CB_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos);
3721 RYML_CHECK_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3722 _c4dbgpf(
"filtering dquo scalar: success! s={}", prs_(rsd.get()));
3732template<
class EventHandler>
3735 if(s.is_sub(_buf()))
3737 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3738 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3740 memmove(s.str - 1, s.str, s.len);
3742 s.str[s.len] =
'\n';
3748 substr dst = _alloc_arena(s.len + 1, &s);
3750 memcpy(dst.str, s.str, s.len);
3756template<
class EventHandler>
3759 _c4dbgpf(
"filtering block literal scalar: s={}", prs_(s));
3760 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3762 if C4_LIKELY(r.valid())
3768 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3769 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3772 result = _move_scalar_left_and_add_newline(s);
3774 _c4dbgpf(
"filtering block literal scalar: success! s={}", prs_(result));
3780template<
class EventHandler>
3783 _c4dbgpf(
"filtering block folded scalar: s={}", prs_(s));
3784 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3786 if C4_LIKELY(r.valid())
3792 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3793 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3796 result = _move_scalar_left_and_add_newline(s);
3798 _c4dbgpf(
"filtering block folded scalar: success! s={}", prs_(result));
3805template<
class EventHandler>
3810 if(m_options.scalar_filtering())
3812 return _filter_scalar_plain(sc.scalar, indentation);
3816 _c4dbgp(
"plain scalar left unfiltered");
3817 m_evt_handler->mark_key_scalar_unfiltered();
3822 _c4dbgp(
"plain scalar doesn't need filtering");
3827template<
class EventHandler>
3832 if(m_options.scalar_filtering())
3834 return _filter_scalar_plain(sc.scalar, indentation);
3838 _c4dbgp(
"plain scalar left unfiltered");
3839 m_evt_handler->mark_val_scalar_unfiltered();
3844 _c4dbgp(
"plain scalar doesn't need filtering");
3852template<
class EventHandler>
3857 if(m_options.scalar_filtering())
3859 return _filter_scalar_squot(sc.scalar);
3863 _c4dbgp(
"squo key scalar left unfiltered");
3864 m_evt_handler->mark_key_scalar_unfiltered();
3869 _c4dbgp(
"squo key scalar doesn't need filtering");
3874template<
class EventHandler>
3879 if(m_options.scalar_filtering())
3881 return _filter_scalar_squot(sc.scalar);
3885 _c4dbgp(
"squo val scalar left unfiltered");
3886 m_evt_handler->mark_val_scalar_unfiltered();
3891 _c4dbgp(
"squo val scalar doesn't need filtering");
3899template<
class EventHandler>
3904 if(m_options.scalar_filtering())
3906 return _filter_scalar_dquot(sc.scalar);
3910 _c4dbgp(
"dquo scalar left unfiltered");
3911 m_evt_handler->mark_key_scalar_unfiltered();
3916 _c4dbgp(
"dquo scalar doesn't need filtering");
3921template<
class EventHandler>
3926 if(m_options.scalar_filtering())
3928 return _filter_scalar_dquot(sc.scalar);
3932 _c4dbgp(
"dquo scalar left unfiltered");
3933 m_evt_handler->mark_val_scalar_unfiltered();
3938 _c4dbgp(
"dquo scalar doesn't need filtering");
3946template<
class EventHandler>
3949 if(m_options.scalar_filtering())
3951 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3955 _c4dbgp(
"literal scalar left unfiltered");
3956 m_evt_handler->mark_key_scalar_unfiltered();
3961template<
class EventHandler>
3964 if(m_options.scalar_filtering())
3966 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3970 _c4dbgp(
"literal scalar left unfiltered");
3971 m_evt_handler->mark_val_scalar_unfiltered();
3979template<
class EventHandler>
3982 if(m_options.scalar_filtering())
3984 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3988 _c4dbgp(
"folded scalar left unfiltered");
3989 m_evt_handler->mark_key_scalar_unfiltered();
3994template<
class EventHandler>
3997 if(m_options.scalar_filtering())
3999 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4003 _c4dbgp(
"folded scalar left unfiltered");
4004 m_evt_handler->mark_val_scalar_unfiltered();
4016template<
class EventHandler>
4017void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on)
4020 char buf1_[64], buf2_[64], buf3_[64];
4021 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4022 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4023 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4024 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4028template<
class EventHandler>
4032 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4033 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4034 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4035 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4036 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4037 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4043template<
class EventHandler>
4044void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off)
4047 char buf1_[64], buf2_[64], buf3_[64];
4048 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4049 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4050 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4051 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4058 bool gotone =
false;
4060 #define _prflag(fl) \
4061 if((flags & fl) == (fl)) \
4065 if(pos + 1 < buf.len) \
4069 csubstr fltxt = #fl; \
4070 if(pos + fltxt.len <= buf.len) \
4071 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4101 RYML_CHECK_BASIC_(pos <= buf.len);
4103 return buf.first(pos);
4113template<
class EventHandler>
4116 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4117 return _buf().sub(loc.offset);
4120template<
class EventHandler>
4123 if C4_UNLIKELY(val ==
nullptr)
4124 return {m_evt_handler->m_curr->pos.
name, 0, 0, 0};
4125 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4128 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4129 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4130 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4131 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4134 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4135 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4137 using lineptr_type =
size_t const* C4_RESTRICT;
4138 lineptr_type lineptr =
nullptr;
4139 size_t offset = (size_t)(val - src.begin());
4143 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4158 size_t count = m_newline_offsets_size;
4159 lineptr = m_newline_offsets;
4162 size_t step = count >> 1;
4163 lineptr_type it = lineptr + step;
4175 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr);
4176 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4177 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4178 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr && (*lineptr > offset));
4180 loc.
name = m_evt_handler->m_curr->pos.name;
4181 loc.offset = offset;
4182 loc.line = (size_t)(lineptr - m_newline_offsets);
4183 if(lineptr > m_newline_offsets)
4184 loc.col = (offset - *(lineptr-1) - 1u);
4190template<
class EventHandler>
4191void ParseEngine<EventHandler>::_prepare_locations()
4194 size_t numnewlines = 1u + src.count(
'\n');
4195 _resize_locations(numnewlines);
4196 m_newline_offsets_size = 0;
4197 for(
size_t i = 0; i < src.len; i++)
4198 if(src.str[i] ==
'\n')
4199 m_newline_offsets[m_newline_offsets_size++] = i;
4200 m_newline_offsets[m_newline_offsets_size++] = src.len;
4201 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4204template<
class EventHandler>
4205void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4207 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4208 if(numnewlines > m_newline_offsets_capacity)
4210 if(m_newline_offsets)
4211 RYML_CB_FREE_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4212 m_newline_offsets = RYML_CB_ALLOC_HINT_(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4213 m_newline_offsets_capacity = numnewlines;
4217template<
class EventHandler>
4218bool ParseEngine<EventHandler>::_locations_dirty()
const
4220 return !m_newline_offsets_size;
4228template<
class EventHandler>
4229void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4232 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4234 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4236 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4240 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4242 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4243 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4249template<
class EventHandler>
4250void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4252 _c4dbgpf(
"flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4253 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4254 if C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt())
4256 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.
sub(m_evt_handler->m_curr->line_contents.indentation);
4257 _c4dbgpf(
"flow: after indentation={}", prs_(trimmed));
4258 if(trimmed.len && trimmed.triml(
" \t").len)
4260 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4261 _c4err(
"bad indentation");
4266template<
class EventHandler>
4267size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4269 const size_t mark = m_evt_handler->m_curr->pos.offset;
4270 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
4271 _c4dbgpf(
"block: mark={} firstpos={}", mark, firstpos);
4272 if(firstpos !=
npos)
4274 _c4dbgp(
"block: non empty line");
4275 _line_progressed(firstpos);
4280 _c4dbgp(
"block: rest of line is whitespace");
4281 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4286template<
class EventHandler>
4287void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(
size_t start_mark,
size_t end_mark)
4289 _c4dbgpf(
"block: start_mark={} end_mark={}", start_mark, end_mark);
4290 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4291 if(end_mark != start_mark)
4294 _c4dbgpf(
"block: leading[{}-{}]={}", start_mark, end_mark, prs_(leading,
true));
4295 size_t pos = leading.find(
'\t');
4298 size_t fno = leading.first_not_of(
" \t");
4299 if(fno ==
npos || pos < fno)
4300 _c4err(
"invalid tab character to the left");
4310template<
class EventHandler>
4311void ParseEngine<EventHandler>::_handle_colon()
4313 size_t curr = m_evt_handler->m_curr->pos.line;
4314 if C4_UNLIKELY(m_prev_colon !=
npos && curr == m_prev_colon)
4316 _c4dbgpf(
"colon: prevline={} currline={}", m_prev_colon, curr);
4317 _c4err(
"two colons on same line");
4319 _c4dbgpf(
"colon: set prevline={}->{}", m_prev_colon, curr);
4320 m_prev_colon = curr;
4323template<
class EventHandler>
4324void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str)
4326 _c4dbgpf(
"store annotation[{}]: {}", dst->num_entries, prs_(str));
4327 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4328 dst->annotations[dst->num_entries].str = str;
4329 dst->annotations[dst->num_entries].indentation = {};
4330 dst->annotations[dst->num_entries].line = {};
4331 dst->annotations[dst->num_entries].orig = {};
4335template<
class EventHandler>
4336void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str,
size_t indentation,
size_t line)
4338 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, maybe_null_str_(str), indentation, line);
4339 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4340 if C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line)
4344 dst->annotations[dst->num_entries].str = str;
4345 dst->annotations[dst->num_entries].indentation = indentation;
4346 dst->annotations[dst->num_entries].line = line;
4347 dst->annotations[dst->num_entries].orig = {};
4351template<
class EventHandler>
4352void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str,
size_t indentation,
size_t line,
csubstr orig)
4354 _c4dbgpf(
"store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, maybe_null_str_(str), indentation, line);
4355 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4356 if C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line)
4360 dst->annotations[dst->num_entries].str = str;
4361 dst->annotations[dst->num_entries].indentation = indentation;
4362 dst->annotations[dst->num_entries].line = line;
4363 dst->annotations[dst->num_entries].orig = orig;
4367template<
class EventHandler>
4368bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4370 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4373template<
class EventHandler>
4374bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4376 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4378 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4379 if(m_pending_tags.num_entries)
4381 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4382 if C4_LIKELY(m_pending_tags.num_entries == 1)
4384 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4385 _clear_annotations(&m_pending_tags);
4392 if(m_pending_anchors.num_entries)
4394 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4395 if C4_LIKELY(m_pending_anchors.num_entries == 1)
4397 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4398 _clear_annotations(&m_pending_anchors);
4402 _c4err(
"too many anchors");
4405 m_evt_handler->set_key_scalar_plain_empty();
4406 m_evt_handler->set_val_scalar_plain_empty();
4410template<
class EventHandler>
4411void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4413 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4414 if(m_pending_tags.num_entries)
4416 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4417 if C4_LIKELY(m_pending_tags.num_entries == 1)
4419 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4420 _clear_annotations(&m_pending_tags);
4427 if(m_pending_anchors.num_entries)
4429 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4430 if C4_LIKELY(m_pending_anchors.num_entries == 1)
4432 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4433 _clear_annotations(&m_pending_anchors);
4437 _c4err(
"too many anchors");
4442template<
class EventHandler>
4443void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4445 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4446 if(m_pending_tags.num_entries)
4448 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4449 if C4_LIKELY(m_pending_tags.num_entries == 1)
4451 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4452 _clear_annotations(&m_pending_tags);
4459 if(m_pending_anchors.num_entries)
4461 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4462 if C4_LIKELY(m_pending_anchors.num_entries == 1)
4464 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4465 _clear_annotations(&m_pending_anchors);
4469 _c4err(
"too many anchors");
4474template<
class EventHandler>
4475void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4477 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4478 if(m_pending_tags.num_entries == 2)
4480 _c4dbgp(
"2 tags, setting entry 0");
4481 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4483 else if(m_pending_tags.num_entries == 1)
4485 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4486 if(m_pending_tags.annotations[0].line < current_line)
4488 _c4dbgp(
"...tag is for the map. setting it.");
4489 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4490 _clear_annotations(&m_pending_tags);
4494 if(m_pending_anchors.num_entries == 2)
4496 _c4dbgp(
"2 anchors, setting entry 0");
4497 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4499 else if(m_pending_anchors.num_entries == 1)
4501 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4502 if(m_pending_anchors.annotations[0].line < current_line)
4504 _c4dbgp(
"...anchor is for the map. setting it.");
4505 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4506 _clear_annotations(&m_pending_anchors);
4511template<
class EventHandler>
4512void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4514 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4515 switch(m_pending_tags.num_entries)
4518 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", prs_(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4519 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4521 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map tag");
4522 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4523 _clear_annotations(&m_pending_tags);
4527 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", prs_(m_pending_tags.annotations[0].str), prs_(m_pending_tags.annotations[1].str));
4528 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4531 switch(m_pending_anchors.num_entries)
4534 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4535 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4537 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map anchor");
4538 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4539 _clear_annotations(&m_pending_anchors);
4543 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4544 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4549template<
class EventHandler>
4550void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4552 _c4dbgp(
"annotations_after_start_mapblck");
4553 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4554 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4555 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4557 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4558 switch(m_pending_tags.num_entries)
4561 _c4dbgpf(
"annotations_after_start_mapblck: 1 tag: {}", prs_(m_pending_tags.annotations[0].str));
4562 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4563 _clear_annotations(&m_pending_tags);
4566 _c4dbgpf(
"annotations_after_start_mapblck: 2 tags: {} -> {}", prs_(m_pending_tags.annotations[0].str), prs_(m_pending_tags.annotations[1].str));
4567 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4568 _clear_annotations(&m_pending_tags);
4571 switch(m_pending_anchors.num_entries)
4574 _c4dbgpf(
"annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4575 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4576 _clear_annotations(&m_pending_anchors);
4579 _c4dbgpf(
"annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4580 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4581 _clear_annotations(&m_pending_anchors);
4585 _set_indentation(key_indentation);
4588template<
class EventHandler>
4589size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4591 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4593 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4594 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4596 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4597 if(ann.line > curr->line)
4599 else if(ann.indentation < curr->indentation)
4602 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4604 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4605 if(ann.line > curr->line)
4607 else if(ann.indentation < curr->indentation)
4610 return curr->line < val_line ? val_indentation : curr->indentation;
4613template<
class EventHandler>
4614void ParseEngine<EventHandler>::_handle_keyref(
csubstr alias)
4616 if C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries))
4617 m_evt_handler->set_key_ref(alias);
4619 _c4err(
"aliases cannot have anchors or tags");
4622template<class EventHandler>
4625 if C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries))
4626 m_evt_handler->set_val_ref(alias);
4628 _c4err(
"aliases cannot have anchors or tags");
4631template<class EventHandler>
4634 _c4dbgpf(
"resolving tag: {} curr_doc={}", prs_(tag), m_evt_handler->m_curr_doc);
4639 _c4dbgpf(
"resolving tag: found in cache[{}]: {}", ret.pos, prs_(ret.resolved));
4640 return ret.resolved;
4642 _c4dbgpf(
"resolving tag: not in cache: {} curr_doc={}", prs_(tag), m_evt_handler->m_curr_doc);
4644 substr buf = m_evt_handler->arena_rem();
4645 TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4646 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4647 m_evt_handler->m_curr->pos,
4648 m_evt_handler->m_stack.m_callbacks);
4649 _c4dbgpf(
"resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4650 _c4assert((bufsz > buf.len) == (!ttag.str));
4651 _c4assert(!!bufsz == (ttag.len == bufsz));
4655 _c4dbgpf(
"tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4657 buf = _alloc_arena(bufsz, &tag);
4660 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4661 m_evt_handler->m_curr->pos,
4662 m_evt_handler->m_stack.m_callbacks);
4665 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4669 _c4dbgp(
"tag required arena. update size");
4672 (void)_alloc_arena(bufsz);
4674 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127)
4675 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers)
4677 _c4dbgpf(
"handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4679 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4681 _c4dbgpf(
"copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4682 buf = _alloc_arena(ttag.len, &tag);
4684 memcpy(buf.str, ttag.str, ttag.len);
4686 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4689 C4_SUPPRESS_WARNING_MSVC_POP
4690 _c4dbgpf(
"resolved tag: {} --> [{}]~~~{}~~~", prs_(tag), ttag.len, maybe_null_str_(ttag));
4693 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4697template<
class EventHandler>
4698bool ParseEngine<EventHandler>::_validate_directive_yaml(
csubstr *C4_RESTRICT directive,
csubstr *C4_RESTRICT
version)
const
4700 _c4assert(directive->begins_with(
"%YAML"));
4701 size_t version_start = directive->first_not_of(
" \t", 5);
4702 if(version_start !=
npos)
4704 csubstr digits =
"0123456789";
4705 size_t major_end = directive->
first_not_of(digits, version_start);
4706 if(major_end !=
npos && directive->str[major_end] ==
'.')
4708 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4709 if(minor_end ==
npos)
4710 minor_end = directive->len;
4711 _set_first_strict(*directive, minor_end);
4712 *
version = directive->range(version_start, minor_end);
4713 _c4dbgpf(
"%YAML: version={} full={}", *
version, prs_(*directive,
true));
4720template<
class EventHandler>
4721bool ParseEngine<EventHandler>::_validate_directive_tag(
csubstr *C4_RESTRICT directive,
csubstr *C4_RESTRICT handle,
csubstr *C4_RESTRICT prefix)
const
4723 _c4assert(directive->begins_with(
"%TAG"));
4725 size_t handle_start = directive->
first_not_of(whitespace, 4);
4726 if(handle_start !=
npos && directive->str[handle_start] ==
'!')
4728 size_t handle_end = directive->first_of(whitespace, handle_start);
4729 if(handle_end !=
npos)
4731 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4732 if(prefix_start !=
npos)
4734 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4735 if(prefix_end ==
npos)
4736 prefix_end = directive->len;
4737 _set_first_strict(*directive, prefix_end);
4738 *handle = directive->range(handle_start, handle_end);
4739 *prefix = directive->range(prefix_start, prefix_end);
4740 _c4dbgpf(
"%TAG: handle={} prefix={} full={}", *handle, *prefix, prs_(*directive,
true));
4749template<
class EventHandler>
4750void ParseEngine<EventHandler>::_handle_directive(
csubstr directive)
4752 _c4dbgpf(
"handle_directive: rem={}", prs_(directive,
true));
4753 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with(
'%'));
4754 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4755 const char *err =
nullptr;
4759 if(str.begins_with(dir))
4762 return (!rest.len || rest.str[0] ==
' ' || rest.str[0] ==
'\t');
4766 if(isdirective(directive,
"%TAG"))
4770 if C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix))
4772 err =
"invalid %TAG directive";
4773 goto directive_error;
4775 m_evt_handler->add_directive_tag(handle, prefix);
4777 else if(isdirective(directive,
"%YAML"))
4780 if C4_UNLIKELY(!_validate_directive_yaml(&directive, &
version))
4782 err =
"invalid %YAML directive";
4783 goto directive_error;
4785 if C4_UNLIKELY(m_has_directives_yaml)
4787 err =
"multiple %YAML directives";
4788 goto directive_error;
4790 m_has_directives_yaml =
true;
4791 m_evt_handler->add_directive_yaml(
version);
4793 m_has_directives =
true;
4794 rem = m_evt_handler->m_curr->line_contents.rem;
4796 pos = pos !=
npos ? pos : rem.len;
4797 _line_progressed(pos);
4799 _c4dbgpf(
"handle_directive: rest={}", prs_(rem));
4800 if C4_UNLIKELY(rem.len && !rem.begins_with(
'#'))
4802 err =
"invalid tokens after directive";
4803 goto directive_error;
4806 if C4_UNLIKELY(err !=
nullptr)
4810template<class EventHandler>
4813 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4816 const csubstr rest = rem.sub(1);
4818 #define _rymlisascii(c) ((c) >
'\0' && (c) <=
'\x7f')
4819 if(rem.begins_with(csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(
csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4821 _c4dbgp(
"byte order mark: UTF32BE");
4823 _line_progressed(4);
4827 else if(rem.begins_with(
csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(
csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4829 _c4dbgp(
"byte order mark: UTF32LE");
4831 _line_progressed(4);
4835 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4837 _c4dbgp(
"byte order mark: UTF16BE");
4839 _line_progressed(2);
4843 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4845 _c4dbgp(
"byte order mark: UTF16LE");
4847 _line_progressed(2);
4851 else if(rem.begins_with(
"\xef\xbb\xbf"))
4853 _c4dbgp(
"byte order mark: UTF8");
4855 _line_progressed(3);
4864template<
class EventHandler>
4865void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4867 if(m_encoding ==
NOBOM)
4869 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4872 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4874 else if(enc != m_encoding)
4876 _c4err(
"byte order mark can only be set once");
4883template<
class EventHandler>
4884void ParseEngine<EventHandler>::_handle_seq_json()
4887 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4889 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
4890 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
4891 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
4892 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
4893 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
4895 _handle_flow_skip_whitespace();
4896 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4902 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
4903 const char first = rem.str[0];
4904 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4909 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4910 ScannedScalar sc = _scan_scalar_dquot();
4911 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4912 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4918 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4920 m_evt_handler->begin_seq_val_flow();
4922 _line_progressed(1);
4927 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4929 m_evt_handler->begin_map_val_flow();
4931 _line_progressed(1);
4932 goto seqjson_finish;
4936 _c4dbgp(
"seqjson[RVAL]: end!");
4939 _line_progressed(1);
4941 goto seqjson_finish;
4947 if(_scan_scalar_seq_json(&sc))
4949 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4950 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4951 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4963 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
4964 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
4965 const char first = rem.str[0];
4966 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4971 _c4dbgp(
"seqjson[RNXT]: expect next val");
4973 m_evt_handler->add_sibling();
4974 _line_progressed(1);
4979 _c4dbgp(
"seqjson[RNXT]: end!");
4981 _line_progressed(1);
4982 goto seqjson_finish;
4990 _c4dbgt(
"seqjson: go again", 0);
4991 if(_finished_line())
4993 if C4_LIKELY(!_finished_file())
5001 _c4err(
"missing terminating ]");
5007 _c4dbgp(
"seqjson: finish");
5013template<
class EventHandler>
5014void ParseEngine<EventHandler>::_handle_map_json()
5017 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5019 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5020 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5021 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5022 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5023 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
5025 _handle_flow_skip_whitespace();
5026 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5032 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5033 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5034 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5035 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5036 const char first = rem.str[0];
5037 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
5042 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
5043 ScannedScalar sc = _scan_scalar_dquot();
5044 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5045 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5051 _c4dbgp(
"mapjson[RKEY]: end!");
5053 _line_progressed(1);
5054 goto mapjson_finish;
5060 else if(has_any(
RVAL))
5062 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5063 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5064 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5065 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5066 const char first = rem.str[0];
5067 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
5072 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
5073 ScannedScalar sc = _scan_scalar_dquot();
5074 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5075 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5081 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
5083 m_evt_handler->begin_seq_val_flow();
5084 _set_indentation(m_evt_handler->m_parent->indref);
5086 _line_progressed(1);
5087 goto mapjson_finish;
5091 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
5093 m_evt_handler->begin_map_val_flow();
5094 _set_indentation(m_evt_handler->m_parent->indref);
5096 _line_progressed(1);
5103 if(_scan_scalar_map_json(&sc))
5105 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
5106 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5107 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5118 else if(has_any(
RKCL))
5120 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5121 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5122 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5123 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5124 const char first = rem.str[0];
5125 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
5128 _c4dbgp(
"mapjson[RKCL]: found the colon");
5130 _line_progressed(1);
5137 else if(has_any(
RNXT))
5139 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5140 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5141 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5142 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5143 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
5144 if(rem.begins_with(
','))
5146 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
5147 m_evt_handler->add_sibling();
5149 _line_progressed(1);
5151 else if(rem.begins_with(
'}'))
5153 _c4dbgp(
"mapjson[RNXT]: end!");
5155 _line_progressed(1);
5156 goto mapjson_finish;
5165 _c4dbgt(
"mapjson: go again", 0);
5166 if(_finished_line())
5168 if C4_LIKELY(!_finished_file())
5176 _c4err(
"missing terminating }");
5182 _c4dbgp(
"mapjson: finish");
5188template<
class EventHandler>
5189void ParseEngine<EventHandler>::_handle_seq_imap()
5192 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5194 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP), m_evt_handler->m_curr->pos);
5195 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5196 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL), m_evt_handler->m_curr->pos);
5197 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL), m_evt_handler->m_curr->pos);
5198 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5200 _handle_flow_skip_whitespace();
5201 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5207 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
5208 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5209 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5210 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5211 const char first = rem.str[0];
5212 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
5216 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
5217 sc = _scan_scalar_squot();
5218 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5219 _handle_annotations_before_blck_val_scalar();
5220 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5222 goto seqimap_finish;
5224 else if(first ==
'"')
5226 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
5227 sc = _scan_scalar_dquot();
5228 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5229 _handle_annotations_before_blck_val_scalar();
5230 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5232 goto seqimap_finish;
5235 else if(_scan_scalar_plain_map_flow(&sc))
5237 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
5238 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5239 _handle_annotations_before_blck_val_scalar();
5240 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5242 goto seqimap_finish;
5244 else if(first ==
'[')
5246 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
5248 _handle_annotations_before_blck_val_scalar();
5249 m_evt_handler->begin_seq_val_flow();
5251 _set_indentation(m_evt_handler->m_parent->indref);
5252 _line_progressed(1);
5253 goto seqimap_finish;
5255 else if(first ==
'{')
5257 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
5259 _handle_annotations_before_blck_val_scalar();
5260 m_evt_handler->begin_map_val_flow();
5262 _set_indentation(m_evt_handler->m_parent->indref);
5263 _line_progressed(1);
5264 goto seqimap_finish;
5266 else if(first ==
',' || first ==
']')
5268 _c4dbgp(
"seqimap[RVAL]: finish without val.");
5269 _handle_annotations_before_blck_val_scalar();
5270 m_evt_handler->set_val_scalar_plain_empty();
5272 goto seqimap_finish;
5274 else if(first ==
'*')
5276 csubstr ref = _scan_ref_seq();
5277 _c4dbgpf(
"seqimap[RVAL]: ref! {}", prs_(ref));
5278 _handle_valref(ref);
5281 else if(first ==
'&')
5283 csubstr anchor = _scan_anchor();
5284 _c4dbgpf(
"seqimap[RVAL]: anchor! {}", prs_(anchor));
5285 _add_annotation(&m_pending_anchors, anchor);
5287 else if(first ==
'!')
5290 _c4dbgpf(
"seqimap[RVAL]: tag! {}", prs_(tag));
5291 _add_annotation(&m_pending_tags, tag);
5298 else if(has_any(
RNXT))
5300 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5301 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5302 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5303 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5304 const char first = rem.str[0];
5305 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
5306 if(first ==
',' || first ==
']')
5310 _c4dbgp(
"seqimap: done");
5312 goto seqimap_finish;
5319 else if(has_any(
QMRK))
5321 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK), m_evt_handler->m_curr->pos);
5322 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5323 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5324 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5325 const char first = rem.str[0];
5326 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
5330 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
5331 sc = _scan_scalar_squot();
5332 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5333 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5337 else if(first ==
'"')
5339 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
5340 sc = _scan_scalar_dquot();
5341 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5342 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5347 else if(_scan_scalar_plain_map_flow(&sc))
5349 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
5350 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5351 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5355 else if(first ==
'[')
5357 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
5359 m_evt_handler->begin_seq_key_flow();
5361 _set_indentation(m_evt_handler->m_parent->indref);
5362 _line_progressed(1);
5363 goto seqimap_finish;
5365 else if(first ==
'{')
5367 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
5369 m_evt_handler->begin_map_key_flow();
5371 _set_indentation(m_evt_handler->m_parent->indref);
5372 _line_progressed(1);
5373 goto seqimap_finish;
5375 else if(first ==
',' || first ==
']')
5377 _c4dbgp(
"seqimap[QMRK]: finish without key.");
5378 m_evt_handler->set_key_scalar_plain_empty();
5379 m_evt_handler->set_val_scalar_plain_empty();
5381 goto seqimap_finish;
5383 else if(first ==
'&')
5385 csubstr anchor = _scan_anchor();
5386 _c4dbgp(
"seqimap[QMRK]: anchor!");
5387 m_evt_handler->set_key_anchor(anchor);
5389 else if(first ==
'*')
5391 csubstr ref = _scan_ref_seq();
5392 _c4dbgp(
"seqimap[QMRK]: ref!");
5393 _handle_keyref(ref);
5401 else if(has_any(
RKCL))
5403 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5404 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5405 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5406 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL), m_evt_handler->m_curr->pos);
5407 const char first = rem.str[0];
5408 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
5411 _c4dbgp(
"seqimap[RKCL]: found ':'");
5413 _line_progressed(1);
5416 else if(first ==
',' || first ==
']')
5418 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
5419 m_evt_handler->set_val_scalar_plain_empty();
5421 goto seqimap_finish;
5430 _c4dbgt(
"seqimap: go again", 0);
5431 if(_finished_line())
5433 if C4_LIKELY(!_finished_file())
5447 _c4dbgp(
"seqimap: finish");
5453template<
class EventHandler>
5454void ParseEngine<EventHandler>::_handle_seq_flow()
5457 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5459 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5460 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
5461 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5462 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5463 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
5464 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
5466 if(m_evt_handler->m_curr->at_line_beginning())
5468 _handle_flow_line_beginning();
5471 _handle_flow_skip_whitespace();
5472 if(!m_evt_handler->m_curr->line_contents.rem.len)
5477 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5478 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5482 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5483 sc = _scan_scalar_squot();
5484 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5485 _handle_annotations_before_blck_val_scalar();
5486 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5488 _mark_seqflow_val_end();
5490 else if(first ==
'"')
5492 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5493 sc = _scan_scalar_dquot();
5494 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5495 _handle_annotations_before_blck_val_scalar();
5496 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5498 _mark_seqflow_val_end();
5501 else if(_scan_scalar_plain_seq_flow(&sc))
5503 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5504 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5505 _handle_annotations_before_blck_val_scalar();
5506 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5508 _mark_seqflow_val_end();
5510 else if(first ==
'[')
5512 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5514 _handle_annotations_before_blck_val_scalar();
5515 m_evt_handler->begin_seq_val_flow();
5516 _set_indentation(m_evt_handler->m_parent->indref);
5518 _line_progressed(1);
5520 else if(first ==
'{')
5522 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5524 _handle_annotations_before_blck_val_scalar();
5525 m_evt_handler->begin_map_val_flow();
5526 _set_indentation(m_evt_handler->m_parent->indref);
5528 _line_progressed(1);
5529 goto seqflow_finish;
5531 else if(first ==
']')
5533 _c4dbgp(
"seqflow[RVAL]: end!");
5534 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5536 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5537 _handle_annotations_before_blck_val_scalar();
5538 m_evt_handler->set_val_scalar_plain_empty();
5540 _line_progressed(1);
5542 goto seqflow_finish;
5544 else if(first ==
'*')
5546 csubstr ref = _scan_ref_seq();
5547 _c4dbgpf(
"seqflow[RVAL]: ref! {}", prs_(ref));
5548 _handle_valref(ref);
5551 else if(first ==
'&')
5553 csubstr anchor = _scan_anchor();
5554 _c4dbgpf(
"seqflow[RVAL]: anchor! {}", prs_(anchor));
5555 _add_annotation(&m_pending_anchors, anchor);
5557 else if(first ==
'!')
5560 _c4dbgpf(
"seqflow[RVAL]: tag! {}", prs_(tag));
5561 _add_annotation(&m_pending_tags, tag);
5563 else if(first ==
':')
5565 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5567 m_evt_handler->begin_map_val_flow();
5568 _set_indentation(m_evt_handler->m_parent->indref);
5569 _handle_annotations_before_blck_key_scalar();
5570 m_evt_handler->set_key_scalar_plain_empty();
5572 _line_progressed(1);
5573 goto seqflow_finish;
5575 else if(first ==
'?')
5577 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5579 m_evt_handler->begin_map_val_flow();
5580 _set_indentation(m_evt_handler->m_parent->indref);
5582 _line_progressed(1);
5583 _maybe_skip_whitespace_tokens();
5584 goto seqflow_finish;
5586 else if(first ==
',')
5588 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5590 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5591 _handle_annotations_before_blck_val_scalar();
5592 m_evt_handler->set_val_scalar_plain_empty();
5594 _mark_seqflow_val_end();
5608 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5609 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5610 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5613 _c4dbgp(
"seqflow[RNXT]: expect next val");
5615 m_evt_handler->add_sibling();
5616 _line_progressed(1);
5617 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5619 _c4err(
"parse error: invalid comment after comma");
5621 _mark_seqflow_val_end();
5623 else if(first ==
']')
5625 _c4dbgp(
"seqflow[RNXT]: end!");
5626 _line_progressed(1);
5628 goto seqflow_finish;
5630 else if(first ==
':')
5632 _c4dbgpf(
"seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5633 if(m_prev_val_end !=
NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5635 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5636 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5637 _set_indentation(m_evt_handler->m_parent->indref);
5638 _line_progressed(1);
5640 goto seqflow_finish;
5654 _c4dbgt(
"seqflow: go again", 0);
5655 if(_finished_line())
5657 if C4_LIKELY(!_finished_file())
5665 _c4err(
"missing terminating ]");
5671 _c4dbgp(
"seqflow: finish");
5677template<
class EventHandler>
5678void ParseEngine<EventHandler>::_handle_map_flow()
5681 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5683 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5684 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5685 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
5686 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
5688 if(m_evt_handler->m_curr->at_line_beginning())
5690 _handle_flow_line_beginning();
5693 _handle_flow_skip_whitespace();
5694 if(!m_evt_handler->m_curr->line_contents.rem.len)
5699 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5700 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5701 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5702 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5703 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5704 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5708 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5709 sc = _scan_scalar_squot();
5710 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5711 _handle_annotations_before_blck_key_scalar();
5712 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5715 else if(first ==
'"')
5717 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5718 sc = _scan_scalar_dquot();
5719 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5720 _handle_annotations_before_blck_key_scalar();
5721 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5725 else if(_scan_scalar_plain_map_flow(&sc))
5727 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5728 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5729 _handle_annotations_before_blck_key_scalar();
5730 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5733 else if(first ==
'?')
5735 _c4dbgp(
"mapflow[RKEY]: explicit key");
5736 _handle_annotations_before_blck_key_scalar();
5738 _line_progressed(1);
5739 _maybe_skip_whitespace_tokens();
5741 else if(first ==
':')
5743 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5744 _handle_annotations_before_blck_key_scalar();
5745 m_evt_handler->set_key_scalar_plain_empty();
5747 _line_progressed(1);
5748 _maybe_skip_whitespace_tokens();
5750 else if(first ==
',')
5752 _c4dbgp(
"mapflow[RKEY]: comma!");
5753 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5754 _c4err(
"unexpected comma");
5758 else if(first ==
'}')
5760 _c4dbgp(
"mapflow[RKEY]: end!");
5761 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5762 _line_progressed(1);
5764 goto mapflow_finish;
5766 else if(first ==
'&')
5768 csubstr anchor = _scan_anchor();
5769 _c4dbgpf(
"mapflow[RKEY]: key anchor! {}", prs_(anchor));
5770 _add_annotation(&m_pending_anchors, anchor);
5772 else if(first ==
'!')
5775 _c4dbgpf(
"mapflow[RKEY]: tag! {}", prs_(tag));
5776 _add_annotation(&m_pending_tags, tag);
5778 else if(first ==
'*')
5780 csubstr ref = _scan_ref_map();
5781 _c4dbgpf(
"mapflow[RKEY]: key ref! {}", prs_(ref));
5782 _handle_keyref(ref);
5785 else if(first ==
'[')
5790 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5791 _handle_annotations_before_blck_key_scalar();
5793 m_evt_handler->begin_seq_key_flow();
5795 _set_indentation(m_evt_handler->m_parent->indref);
5796 _line_progressed(1);
5797 goto mapflow_finish;
5799 else if(first ==
'{')
5804 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5805 _handle_annotations_before_blck_key_scalar();
5807 m_evt_handler->begin_map_key_flow();
5809 _set_indentation(m_evt_handler->m_parent->indref);
5810 _line_progressed(1);
5818 else if(has_any(
RKCL))
5820 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5821 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5822 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5823 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5824 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5825 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5828 _c4dbgp(
"mapflow[RKCL]: found the colon");
5830 _line_progressed(1);
5832 else if(first ==
'}')
5834 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5836 m_evt_handler->set_val_scalar_plain_empty();
5837 _line_progressed(1);
5839 goto mapflow_finish;
5841 else if(first ==
',')
5843 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5844 m_evt_handler->set_val_scalar_plain_empty();
5845 m_evt_handler->add_sibling();
5847 _line_progressed(1);
5848 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5850 _c4err(
"parse error: invalid comment after comma");
5858 else if(has_any(
RVAL))
5860 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5861 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5862 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5863 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5864 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5865 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5869 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5870 sc = _scan_scalar_squot();
5871 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5872 _handle_annotations_before_blck_val_scalar();
5873 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5876 else if(first ==
'"')
5878 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5879 sc = _scan_scalar_dquot();
5880 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5881 _handle_annotations_before_blck_val_scalar();
5882 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5886 else if(_scan_scalar_plain_map_flow(&sc))
5888 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5889 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5890 _handle_annotations_before_blck_val_scalar();
5891 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5894 else if(first ==
'[')
5896 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5898 _handle_annotations_before_blck_val_scalar();
5899 m_evt_handler->begin_seq_val_flow();
5900 _set_indentation(m_evt_handler->m_parent->indref);
5902 _line_progressed(1);
5903 goto mapflow_finish;
5905 else if(first ==
'{')
5907 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5909 _handle_annotations_before_blck_val_scalar();
5910 m_evt_handler->begin_map_val_flow();
5911 _set_indentation(m_evt_handler->m_parent->indref);
5913 _line_progressed(1);
5916 else if(first ==
'}')
5918 _c4dbgp(
"mapflow[RVAL]: end!");
5919 _handle_annotations_before_blck_val_scalar();
5920 m_evt_handler->set_val_scalar_plain_empty();
5921 _line_progressed(1);
5923 goto mapflow_finish;
5925 else if(first ==
',')
5927 _c4dbgp(
"mapflow[RVAL]: empty val!");
5928 _handle_annotations_before_blck_val_scalar();
5929 m_evt_handler->set_val_scalar_plain_empty();
5933 else if(first ==
'*')
5935 csubstr ref = _scan_ref_map();
5936 _c4dbgpf(
"mapflow[RVAL]: key ref! {}", prs_(ref));
5937 _handle_valref(ref);
5940 else if(first ==
'&')
5942 csubstr anchor = _scan_anchor();
5943 _c4dbgpf(
"mapflow[RVAL]: key anchor! {}", prs_(anchor));
5944 _add_annotation(&m_pending_anchors, anchor);
5946 else if(first ==
'!')
5949 _c4dbgpf(
"mapflow[RVAL]: tag! {}", prs_(tag));
5950 _add_annotation(&m_pending_tags, tag);
5957 else if(has_any(
RNXT))
5959 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5960 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5961 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5962 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5963 _c4dbgpf(
"mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5964 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
','))
5966 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5967 m_evt_handler->add_sibling();
5969 _line_progressed(1);
5970 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5972 _c4err(
"parse error: invalid comment after comma");
5975 else if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'}'))
5977 _c4dbgp(
"mapflow[RNXT]: end!");
5978 _line_progressed(1);
5980 goto mapflow_finish;
5987 else if(has_any(
QMRK))
5989 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5990 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5991 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5992 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5993 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5994 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5998 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5999 sc = _scan_scalar_squot();
6000 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6001 _handle_annotations_before_blck_key_scalar();
6002 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6005 else if(first ==
'"')
6007 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
6008 sc = _scan_scalar_dquot();
6009 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6010 _handle_annotations_before_blck_key_scalar();
6011 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6015 else if(_scan_scalar_plain_map_flow(&sc))
6017 _c4dbgp(
"mapflow[QMRK]: plain scalar");
6018 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6019 _handle_annotations_before_blck_key_scalar();
6020 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6023 else if(first ==
':')
6025 _c4dbgp(
"mapflow[QMRK]: setting empty key");
6026 _handle_annotations_before_blck_key_scalar();
6027 m_evt_handler->set_key_scalar_plain_empty();
6029 _line_progressed(1);
6030 _maybe_skip_whitespace_tokens();
6032 else if(first ==
'}')
6034 _c4dbgp(
"mapflow[QMRK]: end!");
6035 _handle_annotations_before_blck_key_scalar();
6036 m_evt_handler->set_key_scalar_plain_empty();
6037 m_evt_handler->set_val_scalar_plain_empty();
6039 _line_progressed(1);
6040 goto mapflow_finish;
6042 else if(first ==
',')
6044 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
6045 _handle_annotations_before_blck_key_scalar();
6046 m_evt_handler->set_key_scalar_plain_empty();
6047 m_evt_handler->set_val_scalar_plain_empty();
6050 else if(first ==
'&')
6052 csubstr anchor = _scan_anchor();
6053 _c4dbgpf(
"mapflow[QMRK]: key anchor! {}", prs_(anchor));
6054 _add_annotation(&m_pending_anchors, anchor);
6056 else if(first ==
'*')
6058 csubstr ref = _scan_ref_map();
6059 _c4dbgpf(
"mapflow[QMRK]: key ref! {}", prs_(ref));
6060 _handle_keyref(ref);
6063 else if(first ==
'[')
6068 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
6070 _handle_annotations_before_blck_key_scalar();
6071 m_evt_handler->begin_seq_key_flow();
6073 _set_indentation(m_evt_handler->m_parent->indref);
6074 _line_progressed(1);
6075 goto mapflow_finish;
6077 else if(first ==
'{')
6082 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
6084 _handle_annotations_before_blck_key_scalar();
6085 m_evt_handler->begin_map_key_flow();
6086 _set_indentation(m_evt_handler->m_parent->indref);
6088 _line_progressed(1);
6091 else if(first ==
'!')
6094 _c4dbgpf(
"mapflow[QMRK]: tag! {}", prs_(tag));
6095 _add_annotation(&m_pending_tags, tag);
6104 _c4dbgt(
"mapflow: go again", 0);
6105 if(_finished_line())
6107 if C4_LIKELY(!_finished_file())
6115 _c4err(
"missing terminating }");
6121 _c4dbgp(
"mapflow: finish");
6127template<
class EventHandler>
6128void ParseEngine<EventHandler>::_handle_seq_block()
6131 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6133 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
6134 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6135 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
6136 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
6138 _maybe_skip_comment_strict();
6139 if(!m_evt_handler->m_curr->line_contents.rem.len)
6144 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6145 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6146 if(m_evt_handler->m_curr->at_line_beginning())
6148 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6149 if(m_evt_handler->m_curr->indentation_ge_extra())
6151 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6152 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6153 if(!m_evt_handler->m_curr->line_contents.rem.len)
6156 else if(m_evt_handler->m_curr->indentation_lt_extra())
6158 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6159 if(m_evt_handler->m_curr->indentation_eq())
6161 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6162 _handle_annotations_before_blck_val_scalar();
6163 m_evt_handler->set_val_scalar_plain_empty();
6169 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6170 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
6171 _handle_indentation_pop_from_block_seq();
6172 goto seqblck_finish;
6175 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6177 _c4dbgp(
"seqblck[RVAL]: empty line!");
6178 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6182 RYML_ASSERT_PARSE_CB_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6183 const size_t startmark = _handle_block_skip_leading_whitespace();
6184 _c4dbgpf(
"seqblck[RVAL]: startmark={}", startmark);
6185 if(startmark ==
npos)
6187 _c4dbgp(
"seqblck[RVAL]: whitespace only");
6190 const size_t tabmark = _handle_block_get_whitespace_mark();
6191 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6192 _c4dbgpf(
"seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6193 const size_t startline = m_evt_handler->m_curr->pos.line;
6194 _c4assert(m_evt_handler->m_curr->line_contents.current_col() >= m_bom_len);
6195 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6199 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
6200 sc = _scan_scalar_squot();
6201 if(!_maybe_scan_following_colon())
6203 _c4dbgp(
"seqblck[RVAL]: set as val");
6204 _handle_annotations_before_blck_val_scalar();
6205 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6206 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6211 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6212 _handle_block_check_leading_tabs(startmark);
6214 _handle_annotations_before_start_mapblck(startline);
6216 m_evt_handler->begin_map_val_block();
6217 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6218 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6219 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6221 _maybe_skip_whitespace_tokens();
6222 goto seqblck_finish;
6225 else if(first ==
'"')
6227 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
6228 sc = _scan_scalar_dquot();
6229 if(!_maybe_scan_following_colon())
6231 _c4dbgp(
"seqblck[RVAL]: set as val");
6232 _handle_annotations_before_blck_val_scalar();
6233 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6234 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6239 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6241 _handle_block_check_leading_tabs(startmark);
6242 _handle_annotations_before_start_mapblck(startline);
6244 m_evt_handler->begin_map_val_block();
6245 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6246 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6247 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6249 _maybe_skip_whitespace_tokens();
6250 goto seqblck_finish;
6256 else if(first ==
'|')
6258 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
6260 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6261 _handle_annotations_before_blck_val_scalar();
6262 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6263 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6266 else if(first ==
'>')
6268 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
6270 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6271 _handle_annotations_before_blck_val_scalar();
6272 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6273 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6276 else if(_scan_scalar_plain_seq_blck(&sc))
6278 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
6279 if(!_maybe_scan_following_colon())
6281 _c4dbgp(
"seqblck[RVAL]: set as val");
6282 _handle_annotations_before_blck_val_scalar();
6283 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6284 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6289 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6290 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6291 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6292 _handle_block_check_leading_tabs(startmark, tabmark);
6294 _handle_annotations_before_start_mapblck(startline);
6296 m_evt_handler->begin_map_val_block();
6297 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6298 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6299 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6301 _maybe_skip_whitespace_tokens();
6302 goto seqblck_finish;
6305 else if(first ==
'[')
6307 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
6309 _handle_annotations_before_blck_val_scalar();
6310 m_evt_handler->begin_seq_val_flow();
6312 _line_progressed(1);
6313 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6314 goto seqblck_finish;
6316 else if(first ==
'{')
6318 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
6320 _handle_annotations_before_blck_val_scalar();
6321 m_evt_handler->begin_map_val_flow();
6323 _line_progressed(1);
6324 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6325 goto seqblck_finish;
6327 else if(first ==
'-')
6329 _c4dbgp(
"seqblck[RVAL]: dash");
6330 _handle_block_check_leading_tabs(startmark);
6331 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6332 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6333 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
6334 RYML_ASSERT_PARSE_CB_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6336 _handle_annotations_before_blck_val_scalar();
6337 m_evt_handler->begin_seq_val_block();
6339 _set_indentation(startindent);
6341 _line_progressed(1);
6343 else if(first ==
':')
6345 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
6347 _handle_annotations_before_start_mapblck(startline);
6349 m_evt_handler->begin_map_val_block();
6350 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6351 m_evt_handler->set_key_scalar_plain_empty();
6353 _line_progressed(1);
6354 _maybe_skip_whitespace_tokens();
6355 goto seqblck_finish;
6357 else if(first ==
'&')
6359 const csubstr anchor = _scan_anchor();
6360 _c4dbgpf(
"seqblck[RVAL]: anchor! {}", prs_(anchor));
6363 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6365 else if(first ==
'*')
6367 csubstr ref = _scan_ref_seq();
6368 _c4dbgpf(
"seqblck[RVAL]: ref! {}", prs_(ref));
6369 if(!_maybe_scan_following_colon())
6371 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
6372 _handle_valref(ref);
6377 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
6379 _handle_annotations_before_start_mapblck(startline);
6380 m_evt_handler->begin_map_val_block();
6381 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6382 _handle_keyref(ref);
6384 _set_indentation(startindent);
6385 _maybe_skip_whitespace_tokens();
6386 goto seqblck_finish;
6389 else if(first ==
'!')
6392 _c4dbgpf(
"seqblck[RVAL]: val tag! {}", prs_(tag));
6395 _add_annotation(&m_pending_tags, tag, startindent, startline);
6397 else if(first ==
'?')
6399 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
6401 m_evt_handler->begin_map_val_block();
6403 _set_indentation(startindent);
6404 _line_progressed(1);
6405 _maybe_skipchars(
' ');
6406 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6408 _c4dbgp(
"seqblck[RVAL]: seqblck starts after ?");
6410 m_evt_handler->begin_seq_key_block();
6412 _save_indentation();
6413 _line_progressed(1);
6414 _maybe_skipchars(
' ');
6416 goto seqblck_finish;
6425 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6426 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6430 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6431 if C4_LIKELY(m_evt_handler->m_curr->at_line_beginning())
6433 _c4dbgp(
"seqblck[RNXT]: at line begin");
6434 if(m_evt_handler->m_curr->indentation_ge())
6436 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6437 _line_progressed(m_evt_handler->m_curr->indref);
6438 if(!m_evt_handler->m_curr->line_contents.rem.len)
6441 else if(m_evt_handler->m_curr->indentation_lt())
6443 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
6444 _handle_indentation_pop_from_block_seq();
6447 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
6448 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6449 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6450 if(!m_evt_handler->m_curr->line_contents.rem.len)
6455 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
6456 goto seqblck_finish;
6459 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6461 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6462 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6463 if(!m_evt_handler->m_curr->line_contents.rem.len)
6469 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6470 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
6477 if(!m_evt_handler->m_curr->line_contents.rem.len)
6479 _c4dbgp(
"seqblck[RNXT]: again");
6487 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6488 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6489 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6492 if(m_evt_handler->m_curr->indref > 0
6493 || m_evt_handler->m_curr->line_contents.indentation > 0
6494 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6496 if C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6498 _c4dbgp(
"seqblck[RNXT]: expect next val");
6500 m_evt_handler->add_sibling();
6501 _line_progressed(1);
6510 _c4dbgp(
"seqblck[RNXT]: start doc");
6511 _start_doc_suddenly();
6512 _line_progressed(3);
6513 _maybe_skip_whitespace_tokens();
6514 goto seqblck_finish;
6517 else if(first ==
':')
6523 if C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags &
RMAP))
6525 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6526 m_evt_handler->end_seq_block();
6527 goto seqblck_finish;
6534 else if(first ==
'.')
6536 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6537 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6539 _c4dbgp(
"seqblck[RNXT]: end doc");
6540 _end_doc_suddenly();
6541 _line_progressed(3);
6542 _maybe_skip_whitespace_tokens();
6543 _check_doc_end_tokens();
6544 goto seqblck_finish;
6555 _print_state_stack();
6557 if(m_evt_handler->m_parent
6558 && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent)
6559 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6561 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6562 RYML_ASSERT_PARSE_CB_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6563 _handle_indentation_pop(m_evt_handler->m_parent);
6564 RYML_ASSERT_PARSE_CB_(this->callbacks(), has_all(
RMAP|
RBLCK), m_evt_handler->m_curr->pos);
6565 m_evt_handler->add_sibling();
6567 goto seqblck_finish;
6569 else if(first ==
'\t')
6571 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
'\t');
6574 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6583 _c4dbgt(
"seqblck: go again", 0);
6584 if(_finished_line())
6589 if(_finished_file())
6591 _c4dbgp(
"seqblck: finish!");
6593 goto seqblck_finish;
6600 _c4dbgp(
"seqblck: finish");
6606template<
class EventHandler>
6607void ParseEngine<EventHandler>::_handle_map_block()
6610 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6614 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
6615 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6616 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
6617 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
6619 _maybe_skip_comment();
6620 if(!m_evt_handler->m_curr->line_contents.rem.len)
6625 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6626 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6627 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6628 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6632 if(m_evt_handler->m_curr->at_line_beginning())
6634 if(m_evt_handler->m_curr->indentation_eq())
6636 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6637 _line_progressed(m_evt_handler->m_curr->indref);
6638 if(!m_evt_handler->m_curr->line_contents.rem.len)
6641 else if(m_evt_handler->m_curr->indentation_lt())
6643 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6644 _handle_indentation_pop_from_block_map();
6645 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6648 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6649 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY), m_evt_handler->m_curr->pos);
6650 if(!m_evt_handler->m_curr->line_contents.rem.len)
6655 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6656 goto mapblck_finish;
6661 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6662 _c4err(
"invalid indentation");
6668 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6669 const size_t startline = m_evt_handler->m_curr->pos.line;
6670 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6671 _c4dbgpf(
"mapblck[RKEY]: '{}'", _c4prc(first));
6675 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6676 sc = _scan_scalar_squot();
6677 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6678 _handle_annotations_before_blck_key_scalar();
6679 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6681 if(!_maybe_scan_following_colon())
6682 _c4err(
"could not find ':' colon after key");
6684 _maybe_skip_whitespace_tokens();
6686 else if(first ==
'"')
6688 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6689 sc = _scan_scalar_dquot();
6690 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6691 _handle_annotations_before_blck_key_scalar();
6692 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6694 if(!_maybe_scan_following_colon())
6695 _c4err(
"could not find ':' colon after key");
6697 _maybe_skip_whitespace_tokens();
6701 else if C4_UNLIKELY(first ==
'|')
6703 _c4err(
"block map: literal keys must be enclosed in '?'");
6705 else if C4_UNLIKELY(first ==
'>')
6707 _c4err(
"block map: folded keys must be enclosed in '?'");
6709 else if(_scan_scalar_plain_map_blck(&sc))
6711 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6712 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6713 _handle_annotations_before_blck_key_scalar();
6714 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6716 if(!_maybe_scan_following_colon())
6717 _c4err(
"could not find ':' colon after key");
6719 _maybe_skip_whitespace_tokens();
6721 else if(first ==
'?')
6723 _c4dbgp(
"mapblck[RKEY]: key token!");
6725 _line_progressed(1);
6726 _maybe_skipchars(
' ');
6727 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6729 _c4dbgp(
"mapblck[RKEY]: seqblck starts after ?");
6731 m_evt_handler->begin_seq_key_block();
6733 _save_indentation();
6734 _line_progressed(1);
6735 _maybe_skipchars(
' ');
6736 goto mapblck_finish;
6740 else if(first ==
':')
6742 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6743 _handle_annotations_before_blck_key_scalar();
6744 m_evt_handler->set_key_scalar_plain_empty();
6746 _line_progressed(1);
6748 _maybe_skip_whitespace_tokens();
6750 else if(first ==
'*')
6752 csubstr ref = _scan_ref_map();
6753 _c4dbgpf(
"mapblck[RKEY]: key ref! {}", prs_(ref));
6754 _handle_keyref(ref);
6756 if(!_maybe_scan_following_colon())
6757 _c4err(
"could not find ':' colon after key");
6759 _maybe_skip_whitespace_tokens();
6761 else if(first ==
'&')
6763 csubstr anchor = _scan_anchor();
6764 _c4dbgpf(
"mapblck[RKEY]: key anchor! {}", prs_(anchor));
6765 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6767 else if(first ==
'!')
6770 _c4dbgpf(
"mapblck[RKEY]: key tag! {}", prs_(tag));
6771 _add_annotation(&m_pending_tags, tag, startindent, startline);
6773 else if(first ==
'[')
6778 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6779 _handle_annotations_before_blck_key_scalar();
6780 m_evt_handler->begin_seq_key_flow();
6782 _line_progressed(1);
6783 _set_indentation(startindent);
6784 goto mapblck_finish;
6786 else if(first ==
'{')
6791 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6792 _handle_annotations_before_blck_key_scalar();
6793 m_evt_handler->begin_map_key_flow();
6795 _line_progressed(1);
6796 _set_indentation(startindent);
6797 goto mapblck_finish;
6799 else if(first ==
'-')
6801 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6802 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6804 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6805 _start_doc_suddenly();
6806 _line_progressed(3);
6807 _maybe_skip_whitespace_tokens();
6808 goto mapblck_finish;
6815 else if(first ==
'.')
6817 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6818 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6820 _c4dbgp(
"mapblck[RKEY]: end doc");
6821 _end_doc_suddenly();
6822 _line_progressed(3);
6823 _maybe_skip_whitespace_tokens();
6824 _check_doc_end_tokens();
6825 goto mapblck_finish;
6837 else if(has_any(
RVAL))
6839 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6840 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6841 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6842 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6846 if(m_evt_handler->m_curr->at_line_beginning())
6848 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6849 m_evt_handler->m_curr->more_indented =
false;
6850 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6851 if(m_evt_handler->m_curr->indentation_eq_extra())
6853 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6854 _line_progressed(m_evt_handler->m_curr->indref + 1);
6855 if(!m_evt_handler->m_curr->line_contents.rem.len)
6858 else if(m_evt_handler->m_curr->indentation_gt_extra())
6860 _c4dbgp(
"mapblck[RVAL]: more indented!");
6861 m_evt_handler->m_curr->more_indented =
true;
6862 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6863 if(!m_evt_handler->m_curr->line_contents.rem.len)
6866 else if(m_evt_handler->m_curr->indentation_lt_extra())
6868 if(m_evt_handler->m_curr->indentation_eq())
6870 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6872 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6874 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6875 _handle_annotations_before_blck_val_scalar();
6876 m_evt_handler->set_val_scalar_plain_empty();
6883 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RKEY!");
6884 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6885 _handle_indentation_pop_from_block_map();
6888 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6889 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6892 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6893 m_evt_handler->add_sibling();
6900 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6901 goto mapblck_finish;
6906 const size_t startcol = _handle_block_skip_leading_whitespace();
6907 if(startcol ==
npos)
6909 _c4dbgp(
"mapblck[RVAL]: whitespace only");
6912 const size_t tabmark = _handle_block_get_whitespace_mark();
6916 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6917 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6918 const size_t startline = m_evt_handler->m_curr->pos.line;
6919 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6920 _c4dbgpf(
"mapblck[RVAL]: '{}'", _c4prc(first));
6924 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6925 sc = _scan_scalar_squot();
6926 if(!_maybe_scan_following_colon())
6928 _c4dbgp(
"mapblck[RVAL]: set as val");
6929 _handle_annotations_before_blck_val_scalar();
6930 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6931 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6937 _c4assert(startindent > m_evt_handler->m_curr->indref);
6938 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6939 _handle_block_check_leading_tabs(startcol);
6940 _handle_annotations_before_start_mapblck(startline);
6943 m_evt_handler->begin_map_val_block();
6944 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6945 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6946 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6947 _maybe_skip_whitespace_tokens();
6952 else if(first ==
'"')
6954 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6955 sc = _scan_scalar_dquot();
6956 if(!_maybe_scan_following_colon())
6958 _c4dbgp(
"mapblck[RVAL]: set as val");
6959 _handle_annotations_before_blck_val_scalar();
6960 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6961 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6967 _c4assert(startindent > m_evt_handler->m_curr->indref);
6968 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6969 _handle_block_check_leading_tabs(startcol);
6970 _handle_annotations_before_start_mapblck(startline);
6973 m_evt_handler->begin_map_val_block();
6974 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6975 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6976 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6977 _maybe_skip_whitespace_tokens();
6984 else if(first ==
'|')
6986 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6988 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6989 _handle_annotations_before_blck_val_scalar();
6990 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6991 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6994 else if(first ==
'>')
6996 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6998 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6999 _handle_annotations_before_blck_val_scalar();
7000 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7001 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7004 else if(_scan_scalar_plain_map_blck(&sc))
7006 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
7007 if(!_maybe_scan_following_colon())
7009 _c4dbgp(
"mapblck[RVAL]: set as val");
7010 _handle_annotations_before_blck_val_scalar();
7011 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
7012 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7018 _c4assert(startindent > m_evt_handler->m_curr->indref);
7019 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7020 _handle_block_check_leading_tabs(startcol, tabmark);
7022 _handle_annotations_before_start_mapblck(startline);
7024 m_evt_handler->begin_map_val_block();
7025 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7026 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7027 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7028 _maybe_skip_whitespace_tokens();
7033 else if(first ==
'-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7035 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7037 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
7038 _handle_block_check_leading_tabs(startcol);
7040 _handle_annotations_before_blck_val_scalar();
7041 m_evt_handler->begin_seq_val_block();
7043 _set_indentation(startindent);
7044 _line_progressed(1);
7045 _maybe_skip_whitespace_tokens();
7046 goto mapblck_finish;
7048 else if(first ==
'[')
7050 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
7052 _handle_annotations_before_blck_val_scalar();
7053 m_evt_handler->begin_seq_val_flow();
7055 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7056 _line_progressed(1);
7057 goto mapblck_finish;
7059 else if(first ==
'{')
7061 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
7063 _handle_annotations_before_blck_val_scalar();
7064 m_evt_handler->begin_map_val_flow();
7066 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7067 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7068 _line_progressed(1);
7069 goto mapblck_finish;
7071 else if(first ==
'*')
7073 csubstr ref = _scan_ref_map();
7074 _c4dbgpf(
"mapblck[RVAL]: ref! {}", prs_(ref));
7075 if(_maybe_scan_following_colon())
7077 _c4dbgp(
"mapblck[RVAL]: start child map, block");
7079 _handle_annotations_before_blck_val_scalar();
7080 m_evt_handler->begin_map_val_block();
7081 _handle_keyref(ref);
7082 _set_indentation(startindent);
7088 _c4dbgp(
"mapblck[RVAL]: was val ref");
7089 _handle_valref(ref);
7092 _maybe_skip_whitespace_tokens();
7094 else if(first ==
'&')
7096 csubstr anchor = _scan_anchor();
7097 _c4dbgpf(
"mapblck[RVAL]: anchor! {}", prs_(anchor));
7100 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7102 else if(first ==
'!')
7105 _c4dbgpf(
"mapblck[RVAL]: tag! {}", prs_(tag));
7108 _add_annotation(&m_pending_tags, tag, startindent, startline);
7110 else if(first ==
'?')
7112 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7114 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7116 _handle_annotations_before_blck_val_scalar();
7117 m_evt_handler->begin_map_val_block();
7119 _set_indentation(startindent);
7120 _line_progressed(1);
7121 _maybe_skipchars(
' ');
7122 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7124 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7126 m_evt_handler->begin_seq_key_block();
7128 _save_indentation();
7129 _line_progressed(1);
7130 _maybe_skipchars(
' ');
7131 goto mapblck_finish;
7135 else if(first ==
':')
7137 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7139 _handle_annotations_before_start_mapblck(startline);
7141 m_evt_handler->begin_map_val_block();
7142 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7143 m_evt_handler->set_key_scalar_plain_empty();
7146 _line_progressed(1);
7147 _maybe_skip_whitespace_tokens();
7155 else if(has_any(
RNXT))
7157 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7158 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7159 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7160 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7164 if(m_evt_handler->m_curr->at_line_beginning())
7166 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7167 if(m_evt_handler->m_curr->indentation_eq())
7169 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7170 _line_progressed(m_evt_handler->m_curr->indref);
7171 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7172 m_evt_handler->add_sibling();
7176 else if(m_evt_handler->m_curr->indentation_lt())
7178 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
7179 _handle_indentation_pop_from_block_map();
7182 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7185 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7186 m_evt_handler->add_sibling();
7193 goto mapblck_finish;
7199 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
7200 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
7207 if(!m_evt_handler->m_curr->line_contents.rem.len)
7209 _c4dbgp(
"seqblck[RNXT]: again");
7217 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7218 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7219 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
7222 _c4dbgp(
"mapblck[RNXT]: skip spaces");
7223 _maybe_skip_whitespace_tokens();
7230 else if(has_any(
QMRK))
7232 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7233 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7234 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7235 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7236 if(_handle_map_block_qmrk())
7239 goto mapblck_finish;
7241 else if(has_any(
RKCL))
7243 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7244 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7245 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7246 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7247 if(_handle_map_block_rkcl())
7250 goto mapblck_finish;
7254 _c4dbgt(
"mapblck: again", 0);
7255 if(_finished_line())
7259 if(_finished_file())
7261 _c4dbgp(
"mapblck: file finished!");
7263 goto mapblck_finish;
7270 _c4dbgp(
"mapblck: finish");
7277template<
class EventHandler>
7278bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7283 if(m_evt_handler->m_curr->at_line_beginning())
7285 _c4dbgpf(
"mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7286 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos, m_evt_handler->m_curr->pos);
7287 if(m_evt_handler->m_curr->indentation_eq_extra())
7289 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7290 _line_progressed(m_evt_handler->m_curr->indref + 1);
7291 if(!m_evt_handler->m_curr->line_contents.rem.len)
7295 else if(m_evt_handler->m_curr->indentation_gt_extra())
7297 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7298 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7299 if(!m_evt_handler->m_curr->line_contents.rem.len)
7304 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7305 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7306 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7307 if(m_evt_handler->m_curr->indentation_eq()
7309 && m_evt_handler->m_curr->line_contents.rem.str[0] !=
'-')
7311 _c4dbgp(
"mapblck[QMRK]: QMRK finished!");
7312 _handle_annotations_before_blck_key_scalar();
7313 m_evt_handler->set_key_scalar_plain_empty();
7317 else if(m_evt_handler->m_curr->indentation_lt())
7319 _c4dbgp(
"mapblck[QMRK]: indentation pop!");
7320 _handle_indentation_pop_from_block_map();
7321 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7324 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7329 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7338 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7339 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7340 const size_t startline = m_evt_handler->m_curr->pos.line;
7341 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7342 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7346 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7347 sc = _scan_scalar_squot();
7348 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7350 if(!_maybe_scan_following_colon())
7352 _c4dbgp(
"mapblck[QMRK]: set as key");
7353 _handle_annotations_before_blck_key_scalar();
7354 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7358 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7359 _handle_annotations_before_start_mapblck_as_key();
7360 m_evt_handler->begin_map_key_block();
7361 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7362 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7363 _maybe_skip_whitespace_tokens();
7364 _set_indentation(startindent);
7369 else if(first ==
'"')
7371 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7372 sc = _scan_scalar_dquot();
7373 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7375 if(!_maybe_scan_following_colon())
7377 _c4dbgp(
"mapblck[QMRK]: set as key");
7378 _handle_annotations_before_blck_key_scalar();
7379 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7383 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7384 _handle_annotations_before_start_mapblck_as_key();
7385 m_evt_handler->begin_map_key_block();
7386 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7387 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7388 _maybe_skip_whitespace_tokens();
7389 _set_indentation(startindent);
7394 else if(first ==
'|')
7396 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7398 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7399 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7400 _handle_annotations_before_blck_key_scalar();
7401 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7404 else if(first ==
'>')
7406 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7408 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7409 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7410 _handle_annotations_before_blck_key_scalar();
7411 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7414 else if(_scan_scalar_plain_map_blck(&sc))
7416 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7417 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7419 if(!_maybe_scan_following_colon())
7421 _c4dbgp(
"mapblck[QMRK]: set as key");
7422 _handle_annotations_before_blck_key_scalar();
7423 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7427 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7428 _handle_annotations_before_start_mapblck_as_key();
7429 m_evt_handler->begin_map_key_block();
7430 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7431 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7432 _maybe_skip_whitespace_tokens();
7433 _set_indentation(startindent);
7438 else if(first ==
':')
7440 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7442 _handle_annotations_before_start_mapblck_as_key();
7443 m_evt_handler->begin_map_key_block();
7444 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7445 m_evt_handler->set_key_scalar_plain_empty();
7446 _line_progressed(1);
7447 _maybe_skip_whitespace_tokens();
7448 _set_indentation(startindent);
7452 else if(first ==
'*')
7454 csubstr ref = _scan_ref_map();
7455 _c4dbgpf(
"mapblck[QMRK]: key ref! {}", prs_(ref));
7457 if(!_maybe_scan_following_colon())
7459 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7460 _handle_keyref(ref);
7464 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7465 _handle_annotations_before_start_mapblck_as_key();
7466 m_evt_handler->begin_map_key_block();
7467 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7468 _handle_keyref(ref);
7469 _set_indentation(startindent);
7473 _maybe_skip_whitespace_tokens();
7475 else if(first ==
'&')
7477 csubstr anchor = _scan_anchor();
7478 _c4dbgpf(
"mapblck[QMRK]: key anchor! {}", prs_(anchor));
7479 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7481 else if(first ==
'!')
7484 _c4dbgpf(
"mapblck[QMRK]: key tag! {}", prs_(tag));
7485 _add_annotation(&m_pending_tags, tag, startindent, startline);
7487 else if(first ==
'-')
7489 _c4dbgp(
"mapblck[QMRK]: maybe seq or doc?");
7490 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7492 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7494 _handle_annotations_before_blck_key_scalar();
7495 m_evt_handler->begin_seq_key_block();
7497 _set_indentation(startindent);
7498 _line_progressed(1);
7502 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7503 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7504 _start_doc_suddenly();
7505 _line_progressed(3);
7507 _maybe_skip_whitespace_tokens();
7510 else if(first ==
'[')
7512 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7514 _handle_annotations_before_blck_key_scalar();
7515 m_evt_handler->begin_seq_key_flow();
7517 _set_indentation(m_evt_handler->m_parent->indref + 1);
7518 _line_progressed(1);
7521 else if(first ==
'{')
7523 _c4dbgp(
"mapblck[QMRK]: start child mapflow (!)");
7525 _handle_annotations_before_blck_key_scalar();
7526 m_evt_handler->begin_map_key_flow();
7528 _set_indentation(m_evt_handler->m_parent->indref + 1);
7529 _line_progressed(1);
7532 else if(first ==
'?')
7534 _c4dbgpf(
"mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7535 RYML_ASSERT_PARSE_CB_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7536 _c4dbgp(
"mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7538 _handle_annotations_before_blck_key_scalar();
7539 m_evt_handler->begin_map_key_block();
7541 _set_indentation(startindent);
7543 _line_progressed(1);
7544 _maybe_skipchars(
' ');
7545 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7547 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7549 m_evt_handler->begin_seq_key_block();
7551 _save_indentation();
7552 _line_progressed(1);
7553 _maybe_skipchars(
' ');
7568template<
class EventHandler>
7569bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7574 if(m_evt_handler->m_curr->at_line_beginning())
7576 if(m_evt_handler->m_curr->indentation_eq())
7578 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7579 _line_progressed(m_evt_handler->m_curr->indref);
7580 if(!m_evt_handler->m_curr->line_contents.rem.len)
7583 else if C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt())
7585 _c4err(
"invalid indentation");
7588 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7589 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
7592 _c4dbgp(
"mapblck[RKCL]: found the colon");
7593 _line_progressed(1);
7594 _maybe_skipchars(
' ');
7595 #if defined(__GNUC__) && ( \
7596 ((__GNUC__ >= 12) && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))) \
7598 (__GNUC__ == 16 && defined(C4_CPU_X86_64)))
7599 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7602 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7609 _c4dbgp(
"mapblck[RKCL]: start val seqblck");
7611 m_evt_handler->begin_seq_val_block();
7613 _save_indentation();
7614 _line_progressed(1);
7615 _maybe_skipchars(
' ');
7619 else if(first ==
'?')
7621 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
7622 m_evt_handler->set_val_scalar_plain_empty();
7623 m_evt_handler->add_sibling();
7625 _line_progressed(1);
7626 _maybe_skipchars(
' ');
7627 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7629 _c4dbgp(
"mapblck[RKCL]: seqblck starts after ?");
7631 m_evt_handler->begin_seq_key_block();
7633 _save_indentation();
7634 _line_progressed(1);
7635 _maybe_skipchars(
' ');
7639 else if(first ==
'-')
7641 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7643 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7644 RYML_CHECK_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7645 _start_doc_suddenly();
7646 _line_progressed(3);
7647 _maybe_skip_whitespace_tokens();
7655 else if(first ==
'.')
7657 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
7658 csubstr rs = m_evt_handler->m_curr->line_contents.rem.
sub(1);
7659 if(rs ==
".." || rs.begins_with(
".. "))
7661 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7662 _end_doc_suddenly();
7663 _line_progressed(3);
7664 _maybe_skip_whitespace_tokens();
7665 _check_doc_end_tokens();
7675 _c4dbgp(
"mapblck[RKCL]: missing :");
7676 if C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq())
7678 m_evt_handler->set_val_scalar_plain_empty();
7679 m_evt_handler->add_sibling();
7688template<class EventHandler>
7691 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7693 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7694 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7696 _maybe_skip_comment();
7697 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7704 pos = pos !=
npos ? pos : rem.len;
7705 _c4dbgpf(
"skipping indentation of {}", pos);
7706 _line_progressed(pos);
7707 rem = m_evt_handler->m_curr->line_contents.rem;
7710 _c4dbgpf(
"rem is now {}", prs_(rem));
7713 if(rem.begins_with(
'['))
7715 _c4dbgp(
"it's a seq");
7716 _check_trailing_doc_token();
7718 m_evt_handler->begin_seq_val_flow();
7720 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7721 m_doc_empty =
false;
7722 _line_progressed(1);
7724 else if(rem.begins_with(
'{'))
7726 _c4dbgp(
"it's a map");
7727 _check_trailing_doc_token();
7729 m_evt_handler->begin_map_val_flow();
7731 m_doc_empty =
false;
7732 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7733 _line_progressed(1);
7735 else if(_handle_bom())
7737 _c4dbgp(
"byte order mark");
7741 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
7742 _maybe_skip_whitespace_tokens();
7743 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7746 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7747 const char first = s.
str[0];
7751 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7752 _check_trailing_doc_token();
7755 m_doc_empty =
false;
7756 sc = _scan_scalar_dquot();
7757 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7758 if(!_maybe_scan_following_colon())
7760 _c4dbgp(
"runk_json: set as val");
7761 _handle_annotations_before_blck_val_scalar();
7762 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7769 else if(_scan_scalar_plain_unk(&sc))
7771 _c4dbgp(
"runk_json: got a plain scalar");
7772 _check_trailing_doc_token();
7775 m_doc_empty =
false;
7776 if(!_maybe_scan_following_colon())
7778 _c4dbgp(
"runk_json: set as val");
7779 _handle_annotations_before_blck_val_scalar();
7780 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7781 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7798template<
class EventHandler>
7799void ParseEngine<EventHandler>::_handle_unk()
7801 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7803 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7804 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7806 _maybe_skipchars(
' ');
7807 _maybe_skip_comment();
7809 if(!m_evt_handler->m_curr->line_contents.rem.len)
7812 _c4dbgpf(
"runk: rem is now {}", prs_(m_evt_handler->m_curr->line_contents.rem));
7814 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7816 _c4dbgpf(
"runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7817 _c4dbgp(
"runk: check BOM");
7820 m_bom_line = m_evt_handler->m_curr->pos.line;
7821 _c4dbgpf(
"runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7824 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7825 _c4dbgpf(
"runk: rtop: first={}", _c4prc(first));
7828 _c4dbgp(
"runk: rtop: suspecting doc");
7829 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7831 _c4dbgp(
"runk: rtop: begin doc");
7834 _set_indentation(0);
7836 _line_progressed(3u);
7837 _maybe_skip_whitespace_tokens();
7841 else if(first ==
'.')
7843 _c4dbgp(
"runk: rtop: suspecting doc end");
7844 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7846 _c4dbgp(
"runk: rtop: end doc");
7853 _c4dbgp(
"runk: rtop: ignore end doc");
7856 _line_progressed(3u);
7857 _maybe_skip_whitespace_tokens();
7858 _check_doc_end_tokens();
7862 else if(first ==
'%')
7864 _c4dbgpf(
"directive: {}", m_evt_handler->m_curr->line_contents.rem);
7865 if C4_UNLIKELY(has_any(
RDOC) || (!m_doc_empty && has_none(
NDOC)))
7866 _c4err(
"need document footer before directives");
7867 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7874 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7875 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7878 _c4dbgpf(
"runk: prev BOMlen={}", m_bom_len);
7879 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7881 _c4dbgpf(
"runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7882 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7883 remindent -= m_bom_len;
7891 size_t startcol = _handle_block_skip_leading_whitespace();
7892 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7896 _c4dbgp(
"runk: flow seq?");
7897 _handle_unk_begin_doc();
7898 if C4_LIKELY( ! _annotations_require_key_container())
7900 _c4dbgp(
"runk: it's a seq, flow");
7901 _handle_annotations_before_blck_val_scalar();
7902 m_evt_handler->begin_seq_val_flow();
7904 _set_indentation(0);
7908 _c4dbgp(
"runk: start new block map, set flow seq as key (!)");
7909 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7910 m_evt_handler->begin_map_val_block();
7912 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7913 m_evt_handler->begin_seq_key_flow();
7915 _set_indentation(0);
7917 _line_progressed(1);
7919 else if(first ==
'{')
7921 _c4dbgp(
"runk: flow map?");
7922 _handle_unk_begin_doc();
7923 if C4_LIKELY( ! _annotations_require_key_container())
7925 _c4dbgp(
"runk: it's a map, flow");
7926 _handle_annotations_before_blck_val_scalar();
7927 m_evt_handler->begin_map_val_flow();
7929 _set_indentation(0);
7933 _c4dbgp(
"runk: start new block map, set flow map as key (!)");
7934 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7935 m_evt_handler->begin_map_val_block();
7937 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7938 m_evt_handler->begin_map_key_flow();
7940 _set_indentation(0);
7942 _line_progressed(1);
7944 else if(first ==
'-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7946 _c4dbgp(
"runk: it's a seq, block");
7947 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7948 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, false);
7949 _handle_unk_begin_doc();
7950 _handle_annotations_before_blck_val_scalar();
7951 m_evt_handler->begin_seq_val_block();
7953 _set_indentation(startindent);
7954 _line_progressed(1);
7955 _maybe_skipchars(
' ');
7957 else if(first ==
'?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7959 _c4dbgp(
"runk: it's a map + this key is complex");
7960 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7961 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, false);
7962 _handle_block_check_leading_tabs(startcol);
7963 _handle_unk_begin_doc();
7964 _handle_annotations_before_blck_val_scalar();
7965 m_evt_handler->begin_map_val_block();
7967 _set_indentation(startindent);
7968 _line_progressed(1);
7969 _maybe_skipchars(
' ');
7970 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7972 _c4dbgp(
"runk: seqblck key starts after ?");
7974 m_evt_handler->begin_seq_key_block();
7976 _save_indentation();
7977 _line_progressed(1);
7978 _maybe_skipchars(
' ');
7981 else if(first ==
':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7983 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7985 _c4dbgp(
"runk: it's a map with an empty key");
7986 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7987 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7988 _handle_block_check_leading_tabs(startcol);
7989 const
size_t startline = m_evt_handler->m_curr->pos.line;
7990 _handle_unk_begin_doc();
7991 _handle_annotations_before_start_mapblck(startline);
7993 m_evt_handler->begin_map_val_block();
7994 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7995 m_evt_handler->set_key_scalar_plain_empty();
7996 _set_indentation(startindent);
8000 _c4err(
"block colon cannot occur on a new line unless ? is used");
8003 _line_progressed(1);
8004 _maybe_skip_whitespace_tokens();
8006 else if(first ==
'&')
8008 csubstr anchor = _scan_anchor();
8009 _c4dbgpf(
"anchor! {}", prs_(anchor));
8010 const size_t line = m_evt_handler->m_curr->pos.line;
8011 _handle_unk_begin_doc();
8012 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8013 _set_indentation(0);
8015 else if(first ==
'*')
8017 csubstr ref = _scan_ref_map();
8018 _c4dbgpf(
"runk: ref! {}", prs_(ref));
8019 _handle_unk_begin_doc();
8020 if(!_maybe_scan_following_colon())
8022 _c4dbgp(
"runk: set val ref");
8023 _handle_valref(ref);
8027 _c4dbgp(
"runk: start new block map, set ref as key");
8028 _handle_block_check_leading_tabs(startcol);
8029 const size_t startline = m_evt_handler->m_curr->pos.line;
8030 _handle_annotations_before_start_mapblck(startline);
8031 m_evt_handler->begin_map_val_block();
8032 _handle_keyref(ref);
8033 _maybe_skip_whitespace_tokens();
8034 _set_indentation(0);
8038 else if(first ==
'!')
8041 csubstr tag = _scan_tag(&tag_orig);
8042 _c4dbgpf(
"runk: val tag! {}", prs_(tag));
8045 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8046 const size_t line = m_evt_handler->m_curr->pos.line;
8047 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8051 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8052 const size_t startscalar = _handle_block_get_whitespace_mark();
8053 const size_t startline = m_evt_handler->m_curr->pos.line;
8054 auto beginmap = [&](
size_t startindent_){
8055 if C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline)
8056 _c4err(
"multiline scalars cannot be used as implicit keys");
8057 _handle_block_check_leading_tabs(startcol, startscalar);
8058 _handle_annotations_before_start_mapblck(startline);
8060 m_evt_handler->begin_map_val_block();
8061 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8063 auto after_beginmap = [&](
size_t startindent_){
8064 _maybe_skip_whitespace_tokens();
8065 _set_indentation(startindent_);
8070 _c4dbgp(
"runk: block-literal scalar");
8071 _handle_unk_begin_doc();
8073 _scan_block(&sb, startindent);
8074 _handle_annotations_before_blck_val_scalar();
8075 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8076 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8078 else if(first ==
'>')
8080 _c4dbgp(
"runk: block-folded scalar");
8081 _handle_unk_begin_doc();
8083 _scan_block(&sb, startindent);
8084 _handle_annotations_before_blck_val_scalar();
8085 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8086 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8088 else if(first ==
'\'')
8090 _c4dbgp(
"runk: single-quoted scalar");
8091 _handle_unk_begin_doc();
8092 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8093 size_t col = m_evt_handler->m_curr->pos.col;
8094 ScannedScalar sc = _scan_scalar_squot();
8095 if(!_maybe_scan_following_colon())
8097 _c4dbgp(
"runk: set as val");
8098 _handle_annotations_before_blck_val_scalar();
8099 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8100 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8104 _c4dbgp(
"runk: start new block map, set single-quoted scalar as key");
8105 if C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline)
8108 startindent = _handle_unk_check_left_tokens(startindent, col);
8109 beginmap(startindent);
8110 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8111 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8112 after_beginmap(startindent);
8115 else if(first ==
'"')
8117 _c4dbgp(
"runk: double-quoted scalar");
8118 _handle_unk_begin_doc();
8119 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8120 size_t col = m_evt_handler->m_curr->pos.col;
8121 ScannedScalar sc = _scan_scalar_dquot();
8122 if(!_maybe_scan_following_colon())
8124 _c4dbgp(
"runk: set as val");
8125 _handle_annotations_before_blck_val_scalar();
8126 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8127 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8131 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
8132 if C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline)
8135 startindent = _handle_unk_check_left_tokens(startindent, col);
8136 beginmap(startindent);
8137 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8138 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8139 after_beginmap(startindent);
8144 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8145 size_t col = m_evt_handler->m_curr->pos.col;
8147 if(_scan_scalar_plain_unk(&sc))
8149 _c4dbgp(
"runk: plain scalar");
8150 _handle_unk_begin_doc();
8151 if(!_maybe_scan_following_colon())
8153 _c4dbgp(
"runk: set as val");
8154 _handle_annotations_before_blck_val_scalar();
8155 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8156 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8160 _c4dbgp(
"runk: start new block map, set plain scalar as key");
8165 _c4assert(m_evt_handler->m_curr->pos.line == startline);
8167 startindent = _handle_unk_check_left_tokens(startindent, col);
8168 beginmap(startindent);
8169 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8170 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8171 after_beginmap(startindent);
8181 if(m_bom_len && has_none(
RUNK))
8183 _c4dbgpf(
"runk: BOMlen={} BOMline={} now={} at_end={}", m_bom_len, m_bom_line, m_evt_handler->m_curr->pos.line, !m_evt_handler->m_curr->line_contents.rem.len);
8184 if(m_evt_handler->m_curr->pos.line != m_bom_line || !m_evt_handler->m_curr->line_contents.rem.len)
8186 _c4dbgp(
"runk: clear BOMlen");
8192template<
class EventHandler>
8193void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8195 _c4dbgp(
"runk: begin doc");
8196 _check_trailing_doc_token();
8199 m_doc_empty =
false;
8202template<
class EventHandler>
8203size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(
size_t realindent,
size_t col,
bool skip_annotations)
8208 csubstr s = m_evt_handler->m_curr->line_contents.full.
range(m_bom_len, col);
8210 _c4dbgpf(
"runk: check left tokens: s={}", prs_(s,
true));
8211 if(skip_annotations)
8213 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8214 _c4dbgpf(
"runk: skip annotations: realindent={} pos={}", realindent, pos);
8216 size_t firstns = s.first_not_of(
' ', pos);
8219 _c4dbgpf(
"runk: check left tokens:\n"
8220 " tokens={} skipped={}\n"
8221 " bomlen={} first={} col={}\n"
8222 " (bomlen+first)={} vs {}=col\n"
8223 " startindent={} lineindent={}"
8224 , prs_(s,
true), prs_(s.sub(firstns),
true)
8225 , m_bom_len, firstns, col
8226 , m_bom_len+firstns, col,
8227 realindent, m_evt_handler->m_curr->line_contents.indentation);
8228 if(m_bom_len + firstns != col)
8230 if(!skip_annotations)
8231 realindent = firstns;
8232 _c4dbgpf(
"runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8238template<
class EventHandler>
8239void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(
csubstr s,
size_t *indent,
size_t *first_non_token_pos)
8242 uint32_t total = _get_annotations_same_line(s, &first, &second);
8243 _c4dbgpf(
"runk: before skip: {}", prs_(s,
true));
8244 size_t pos = s.first_not_of(
" \t");
8249 *indent = *first_non_token_pos = pos;
8252 _c4assert(!s.sub(pos).begins_with_any(
" \t"));
8253 _c4dbgpf(
"runk: after skip leading {} whitespace: {}", pos, prs_(s.sub(pos),
true));
8254 _c4dbgpf(
"runk: first annotation: {}", first);
8258 _c4assert(s.sub(pos).begins_with(first));
8261 _c4dbgpf(
"runk: after skip first annotation: pos={} {}", pos, prs_(s.sub(pos),
true));
8264 _c4dbgpf(
"runk: second annotation: {}", second);
8272 _c4dbgpf(
"runk: next nonspace: {}", pos + more);
8274 _c4dbgpf(
"runk: after skip annotation whitespace: pos={} {}", pos, prs_(s.sub(pos),
true));
8275 _c4assert(s.sub(pos).begins_with(second));
8277 _c4dbgpf(
"runk: after skip annotation 2: pos={} {}", pos, prs_(s.sub(pos),
true));
8279 *first_non_token_pos = pos;
8283template<
class EventHandler>
8284uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(
csubstr token_soup,
csubstr *first_,
csubstr *second_)
const
8286 _c4assert(!m_evt_handler->m_curr->at_first_token());
8288 using EntryPtr =
typename Annotation::Entry
const* C4_RESTRICT;
8289 EntryPtr first =
nullptr;
8290 EntryPtr second =
nullptr;
8291 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8294 _c4dbgpf(
"there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8295 auto valid_if_same_line = [
this](EntryPtr entry){
8296 _c4dbgpf(
"pending: {} indent={} line={} vs currline={}", maybe_null_str_(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8297 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry :
nullptr;
8301 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8302 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8303 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8304 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8305 _c4dbgpf(
"{} annotations on same line", total);
8310 auto get_first_on_same_line = [
this](EntryPtr not_this_one){
8311 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8312 if(&m_pending_anchors.annotations[i] != not_this_one
8313 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8314 return &m_pending_anchors.annotations[i];
8315 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8316 if(&m_pending_tags.annotations[i] != not_this_one
8317 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8318 return &m_pending_tags.annotations[i];
8320 return (EntryPtr)
nullptr;
8324 first = get_first_on_same_line(
nullptr);
8326 _c4dbgpf(
"first annotation: {} indent={} line={}", maybe_null_str_(first->str), first->indentation, first->line);
8331 second = get_first_on_same_line(first);
8333 _c4dbgpf(
"second annotation: {} indent={} line={}", maybe_null_str_(second->str), second->indentation, second->line);
8335 auto extract_string = [&](EntryPtr e){
8337 if(!e->str.str || e->str.begins_with_any(
"!<"))
8343 _c4dbgpf(
"tag: {} -> {}", maybe_null_str_(e->str), tag);
8351 _c4assert(anchor.str - token_soup.str > 0);
8356 _c4dbgpf(
"anchor: {} -> {}", e->str, anchor);
8359 *first_ = first ? extract_string(first) : nullptr;
8360 *second_ = second ? extract_string(second) : nullptr;
8361 if(total > 1 && (first_->str > second_->str))
8366 _c4dbgpf(
"swap first and second: {} -> {}", *first_, *second_);
8375template<
class EventHandler>
8376C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
8378 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8380 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW), m_evt_handler->m_curr->pos);
8382 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8385 _c4dbgp(
"usty[RNXT]: finishing!");
8390 _maybe_skip_comment();
8391 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8398 pos = pos !=
npos ? pos : rem.len;
8399 _c4dbgpf(
"skipping indentation of {}", pos);
8400 _line_progressed(pos);
8401 rem = m_evt_handler->m_curr->line_contents.rem;
8404 _c4dbgpf(
"rem is now {}", prs_(rem));
8407 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8408 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
8409 char first = rem.str[0];
8412 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP), m_evt_handler->m_curr->pos);
8413 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
8416 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
8418 m_evt_handler->_push();
8420 _set_indentation(startindent);
8421 _line_progressed(1);
8422 _maybe_skip_whitespace_tokens();
8424 else if(first ==
'-' && _is_blck_token(rem))
8426 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
8428 m_evt_handler->_push();
8430 _set_indentation(startindent);
8431 _line_progressed(1);
8432 _maybe_skip_whitespace_tokens();
8436 _c4err(
"can only parse a seq into an existing seq");
8439 else if(has_any(
RMAP))
8441 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8442 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
8445 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
8447 _handle_annotations_before_blck_val_scalar();
8448 m_evt_handler->_push();
8450 _set_indentation(startindent);
8451 _line_progressed(1);
8452 _maybe_skip_whitespace_tokens();
8454 else if(first ==
'?' && _is_blck_token(rem))
8456 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
8458 _handle_annotations_before_blck_val_scalar();
8459 m_evt_handler->_push();
8461 _save_indentation();
8462 _line_progressed(1);
8463 _maybe_skip_whitespace_tokens();
8465 else if(first ==
':' && _is_blck_token(rem))
8467 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
8469 _handle_annotations_before_blck_val_scalar();
8470 m_evt_handler->_push();
8471 m_evt_handler->set_key_scalar_plain_empty();
8473 _save_indentation();
8474 _line_progressed(1);
8475 _maybe_skip_whitespace_tokens();
8477 else if(rem.begins_with(
'&'))
8479 csubstr anchor = _scan_anchor();
8480 _c4dbgpf(
"usty[RMAP]: anchor! {}", prs_(anchor));
8481 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8482 const size_t line = m_evt_handler->m_curr->pos.line;
8483 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8484 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8486 else if(first ==
'*')
8488 csubstr ref = _scan_ref_map();
8489 _c4dbgpf(
"usty[RMAP]: ref! {}", prs_(ref));
8490 if(!_maybe_scan_following_colon())
8492 _c4err(
"cannot read a VAL to a map");
8496 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
8497 const size_t startline = m_evt_handler->m_curr->pos.line;
8499 _handle_annotations_before_start_mapblck(startline);
8500 m_evt_handler->_push();
8501 _handle_keyref(ref);
8502 _maybe_skip_whitespace_tokens();
8503 _set_indentation(startindent);
8507 else if(first ==
'!')
8510 _c4dbgpf(
"usty[RMAP]: val tag! {}", prs_(tag));
8513 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8514 const size_t line = m_evt_handler->m_curr->pos.line;
8515 _add_annotation(&m_pending_tags, tag, indentation, line);
8517 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
8519 _c4err(
"cannot parse a seq into an existing map");
8523 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8524 startindent = m_evt_handler->m_curr->line_contents.indentation;
8525 const size_t startline = m_evt_handler->m_curr->pos.line;
8527 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8530 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
8531 sc = _scan_scalar_squot();
8532 if(!_maybe_scan_following_colon())
8534 _c4err(
"cannot read a VAL to a map");
8538 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8540 _handle_annotations_before_start_mapblck(startline);
8541 m_evt_handler->_push();
8542 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8543 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8544 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8545 _set_indentation(startindent);
8547 _maybe_skip_whitespace_tokens();
8550 else if(first ==
'"')
8552 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
8553 sc = _scan_scalar_dquot();
8554 if(!_maybe_scan_following_colon())
8556 _c4err(
"cannot read a VAL to a map");
8560 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
8562 _handle_annotations_before_start_mapblck(startline);
8563 m_evt_handler->_push();
8564 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8565 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8566 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8567 _set_indentation(startindent);
8569 _maybe_skip_whitespace_tokens();
8572 else if(first ==
'|')
8574 _c4err(
"block literal keys must be enclosed in '?'");
8576 else if(first ==
'>')
8578 _c4err(
"block literal keys must be enclosed in '?'");
8580 else if(_scan_scalar_plain_unk(&sc))
8582 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8583 if(!_maybe_scan_following_colon())
8585 _c4err(
"cannot read a VAL to a map");
8589 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8591 _handle_annotations_before_start_mapblck(startline);
8592 m_evt_handler->_push();
8593 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8594 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8595 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8596 _set_indentation(startindent);
8598 _maybe_skip_whitespace_tokens();
8609 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8610 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8613 _c4dbgp(
"usty[UNK]: it's a flow seq");
8615 _handle_annotations_before_blck_val_scalar();
8616 m_evt_handler->begin_seq_val_flow();
8618 _set_indentation(startindent);
8619 _line_progressed(1);
8620 _maybe_skip_whitespace_tokens();
8622 else if(first ==
'-' && _is_blck_token(rem))
8624 _c4dbgp(
"usty[UNK]: it's a block seq");
8626 _handle_annotations_before_blck_val_scalar();
8627 m_evt_handler->begin_seq_val_block();
8629 _set_indentation(startindent);
8630 _line_progressed(1);
8631 _maybe_skip_whitespace_tokens();
8633 else if(first ==
'{')
8635 _c4dbgp(
"usty[UNK]: it's a flow map");
8637 _handle_annotations_before_blck_val_scalar();
8638 m_evt_handler->begin_map_val_flow();
8640 _set_indentation(startindent);
8641 _line_progressed(1);
8642 _maybe_skip_whitespace_tokens();
8644 else if(first ==
'?' && _is_blck_token(rem))
8646 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8648 _handle_annotations_before_blck_val_scalar();
8649 m_evt_handler->begin_map_val_block();
8651 _save_indentation();
8652 _line_progressed(1);
8653 _maybe_skip_whitespace_tokens();
8655 else if(first ==
':' && _is_blck_token(rem))
8657 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8659 _handle_annotations_before_blck_val_scalar();
8660 m_evt_handler->begin_map_val_block();
8661 m_evt_handler->set_key_scalar_plain_empty();
8663 _save_indentation();
8664 _line_progressed(1);
8665 _maybe_skip_whitespace_tokens();
8667 else if(first ==
'&')
8669 csubstr anchor = _scan_anchor();
8670 _c4dbgpf(
"usty[UNK]: anchor! {}", prs_(anchor));
8671 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8672 const size_t line = m_evt_handler->m_curr->pos.line;
8673 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8674 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8676 else if(first ==
'*')
8678 csubstr ref = _scan_ref_map();
8679 _c4dbgpf(
"usty[UNK]: ref! {}", prs_(ref));
8680 if(!_maybe_scan_following_colon())
8682 _c4dbgp(
"usty[UNK]: set val ref");
8683 _handle_valref(ref);
8687 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8688 const size_t startline = m_evt_handler->m_curr->pos.line;
8690 _handle_annotations_before_start_mapblck(startline);
8691 m_evt_handler->begin_map_val_block();
8692 _handle_keyref(ref);
8693 _maybe_skip_whitespace_tokens();
8694 _set_indentation(startindent);
8698 else if(first ==
'!')
8701 _c4dbgpf(
"usty[UNK]: val tag! {}", prs_(tag));
8704 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8705 const size_t line = m_evt_handler->m_curr->pos.line;
8706 _add_annotation(&m_pending_tags, tag, indentation, line);
8710 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8711 startindent = m_evt_handler->m_curr->line_contents.indentation;
8712 const size_t startline = m_evt_handler->m_curr->pos.line;
8715 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8718 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8719 sc = _scan_scalar_squot();
8720 if(!_maybe_scan_following_colon())
8722 _c4dbgp(
"usty[UNK]: set as val");
8723 _handle_annotations_before_blck_val_scalar();
8724 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8725 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8730 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8732 _handle_annotations_before_start_mapblck(startline);
8733 m_evt_handler->begin_map_val_block();
8734 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8735 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8736 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8737 _set_indentation(startindent);
8739 _maybe_skip_whitespace_tokens();
8742 else if(first ==
'"')
8744 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8745 sc = _scan_scalar_dquot();
8746 if(!_maybe_scan_following_colon())
8748 _c4dbgp(
"usty[UNK]: set as val");
8749 _handle_annotations_before_blck_val_scalar();
8750 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8751 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8756 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8758 _handle_annotations_before_start_mapblck(startline);
8759 m_evt_handler->begin_map_val_block();
8760 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8761 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8762 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8763 _set_indentation(startindent);
8765 _maybe_skip_whitespace_tokens();
8768 else if(first ==
'|')
8770 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8772 _scan_block(&sb, startindent);
8773 _c4dbgp(
"usty[UNK]: set as val");
8774 _handle_annotations_before_blck_val_scalar();
8775 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8776 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8779 else if(first ==
'>')
8781 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8783 _scan_block(&sb, startindent);
8784 _c4dbgp(
"usty[UNK]: set as val");
8785 _handle_annotations_before_blck_val_scalar();
8786 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8787 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8790 else if(_scan_scalar_plain_unk(&sc))
8792 _c4dbgp(
"usty[UNK]: got a plain scalar");
8793 if(!_maybe_scan_following_colon())
8795 _c4dbgp(
"usty[UNK]: set as val");
8796 _handle_annotations_before_blck_val_scalar();
8797 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8798 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8803 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8805 _handle_annotations_before_start_mapblck(startline);
8806 m_evt_handler->begin_map_val_block();
8807 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8808 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8809 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8810 _set_indentation(startindent);
8812 _maybe_skip_whitespace_tokens();
8826template<
class EventHandler>
8829 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8831 m_evt_handler->start_parse(filename.str, src);
8832 m_evt_handler->begin_stream();
8834 while( ! _finished_file())
8837 while( ! _finished_line())
8840 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8845 else if(has_any(
RMAP))
8849 else if(has_any(
RUNK))
8855 _c4err(
"internal error");
8858 if(_finished_file())
8863 m_evt_handler->finish_parse();
8869template<
class EventHandler>
8872 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8874 m_evt_handler->start_parse(filename.str, src);
8875 m_evt_handler->begin_stream();
8877 while( ! _finished_file())
8880 while( ! _finished_line())
8883 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8894 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8903 else if(has_any(
RBLCK))
8907 _handle_seq_block();
8911 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8912 _handle_map_block();
8915 else if(has_any(
RUNK))
8919 else if(has_any(
USTY))
8925 _c4err(
"internal error");
8928 if(_finished_file())
8933 m_evt_handler->finish_parse();
8942#undef _c4dbgnextline
8946C4_SUPPRESS_WARNING_MSVC_POP
8947C4_SUPPRESS_WARNING_GCC_CLANG_POP