1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
4 #ifndef _C4_YML_PARSE_ENGINE_HPP_
7 #ifndef _C4_CHARCONV_HPP_
13 #ifndef _C4_YML_FILTER_PROCESSOR_HPP_
16 #ifndef _C4_YML_TAG_HPP_
19 #ifndef _C4_YML_NODE_TYPE_HPP_
23 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24 #include "c4/yml/detail/dbgprint.hpp"
29 #include <c4/dump.hpp>
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
37 #define _c4assert(...) \
38 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
41 #if defined(RYML_WITH_TAB_TOKENS)
42 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43 #define _RYML_WITHOUT_TAB_TOKENS(...)
44 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
46 #define _RYML_WITH_TAB_TOKENS(...)
47 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
52 #ifndef RYML_SAVE_TEST_YAML
53 #define _RYML_SAVE_TEST_YAML(filename, src)
54 #define _RYML_SAVE_TEST_JSON(filename, src)
56 #define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57 #define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
60 void ryml_save_test_yaml(csubstr filename, csubstr src);
61 void ryml_save_test_json(csubstr filename, csubstr src);
68 #define _c4dbgnextline() \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
77 C4_SUPPRESS_WARNING_MSVC_PUSH
78 C4_SUPPRESS_WARNING_MSVC(4296)
79 C4_SUPPRESS_WARNING_MSVC(4702)
80 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
81 C4_SUPPRESS_WARNING_GCC_CLANG("-Wtype-limits")
82 C4_SUPPRESS_WARNING_GCC_CLANG("-Wformat-nonliteral")
83 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
84 #if defined(__GNUC__) && (__GNUC__ >= 6)
85 C4_SUPPRESS_WARNING_GCC(
"-Wnull-dereference")
87 #if defined(__GNUC__) && (__GNUC__ >= 7)
88 C4_SUPPRESS_WARNING_GCC(
"-Wduplicated-branches")
98 C4_HOT C4_ALWAYS_INLINE
void _set_first(substr &C4_RESTRICT subject,
size_t pos) noexcept
101 subject.len = pos !=
npos ? pos : subject.len;
103 C4_HOT C4_ALWAYS_INLINE
void _set_first(csubstr &C4_RESTRICT subject,
size_t pos) noexcept
106 subject.len = pos !=
npos ? pos : subject.len;
108 C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(substr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
111 _RYML_ASSERT_BASIC(pos !=
npos);
114 C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(csubstr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
117 _RYML_ASSERT_BASIC(pos !=
npos);
121 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s)
RYML_NOEXCEPT
123 _RYML_ASSERT_BASIC(s.len > 0);
124 _RYML_ASSERT_BASIC(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
128 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_seq_token_maybe(csubstr
const& C4_RESTRICT s) noexcept
130 return ((s.len >= 1) && (s.str[0] ==
'-') && ((s.len == 1) || ((s.str[1] ==
' ')
_RYML_WITH_TAB_TOKENS( || (s.str[1] ==
'\t')))));
135 _RYML_ASSERT_BASIC(s.begins_with(
'-'));
136 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
137 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
138 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
144 _RYML_ASSERT_BASIC(s.begins_with(
'.'));
145 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
146 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
147 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
151 inline bool _is_doc_token(csubstr s) noexcept
159 return (s.str[1] ==
'-' && s.str[2] ==
'-')
163 return (s.str[1] ==
'.' && s.str[2] ==
'.')
170 inline size_t _begins_with_special_json_scalar(csubstr s)
RYML_NOEXCEPT
172 _RYML_ASSERT_BASIC(s.len);
176 return s.begins_with(
"false") ? 5u : 0u;
178 return s.begins_with(
"true") ? 4u : 0u;
180 return s.begins_with(
"null") ? 4u : 0u;
188 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
190 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
194 inline substr _from_next_line(substr rem)
196 size_t nlpos = rem.first_of(
"\r\n");
199 const char nl = rem[nlpos];
200 rem = rem.right_of(nlpos);
203 if(_extend_from_combined_newline(nl, rem.front()))
211 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
213 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
214 size_t numnl_following = 0;
216 for( ; *i < r.len; ++(*i))
218 if(r.str[*i] ==
'\n')
221 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
226 return numnl_following;
231 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
233 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
234 size_t numnl_following = 0;
238 for( ; *i < r.len; ++(*i))
240 const char c = r.str[*i];
244 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
250 for( ; *i < r.len; ++(*i))
257 size_t stop = *i + indentation;
258 for( ; *i < r.len; ++(*i))
261 if(c !=
' ' && c !=
'\r')
263 _RYML_ASSERT_BASIC(*i < stop);
268 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
274 return numnl_following;
284 template<
class EventHandler>
291 template<
class EventHandler>
294 , m_evt_handler(evt_handler)
295 , m_pending_anchors()
297 , m_has_directives_yaml(false)
298 , m_has_directives(false)
301 , m_prev_val_end(
npos)
303 , m_newline_offsets()
304 , m_newline_offsets_size(0)
305 , m_newline_offsets_capacity(0)
307 _RYML_CHECK_BASIC(evt_handler);
310 template<
class EventHandler>
312 : m_options(that.m_options)
313 , m_evt_handler(that.m_evt_handler)
314 , m_pending_anchors(that.m_pending_anchors)
315 , m_pending_tags(that.m_pending_tags)
316 , m_has_directives_yaml(that.m_has_directives_yaml)
317 , m_has_directives(that.m_has_directives)
318 , m_doc_empty(that.m_doc_empty)
320 , m_prev_val_end(
npos)
322 , m_newline_offsets(that.m_newline_offsets)
323 , m_newline_offsets_size(that.m_newline_offsets_size)
324 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
329 template<
class EventHandler>
331 : m_options(that.m_options)
332 , m_evt_handler(that.m_evt_handler)
333 , m_pending_anchors(that.m_pending_anchors)
334 , m_pending_tags(that.m_pending_tags)
335 , m_has_directives_yaml(that.m_has_directives_yaml)
336 , m_has_directives(that.m_has_directives)
337 , m_doc_empty(that.m_doc_empty)
339 , m_prev_val_end(
npos)
341 , m_newline_offsets()
342 , m_newline_offsets_size()
343 , m_newline_offsets_capacity()
345 if(that.m_newline_offsets_capacity)
347 _resize_locations(that.m_newline_offsets_capacity);
348 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
350 m_newline_offsets_size = that.m_newline_offsets_size;
354 template<
class EventHandler>
358 m_options = (that.m_options);
359 m_evt_handler = that.m_evt_handler;
360 m_pending_anchors = that.m_pending_anchors;
361 m_pending_tags = that.m_pending_tags;
362 m_has_directives_yaml = that.m_has_directives_yaml;
363 m_has_directives = that.m_has_directives;
364 m_doc_empty = that.m_doc_empty;
365 m_prev_colon = that.m_prev_colon;
366 m_prev_val_end = that.m_prev_val_end;
367 m_encoding = that.m_encoding;
368 m_newline_offsets = (that.m_newline_offsets);
369 m_newline_offsets_size = (that.m_newline_offsets_size);
370 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
375 template<
class EventHandler>
381 m_options = (that.m_options);
382 m_evt_handler = that.m_evt_handler;
383 m_pending_anchors = that.m_pending_anchors;
384 m_pending_tags = that.m_pending_tags;
385 m_has_directives_yaml = that.m_has_directives_yaml;
386 m_has_directives = that.m_has_directives;
387 m_doc_empty = that.m_doc_empty;
388 m_prev_colon = that.m_prev_colon;
389 m_prev_val_end = that.m_prev_val_end;
390 m_encoding = that.m_encoding;
391 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392 _resize_locations(that.m_newline_offsets_capacity);
393 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
396 m_newline_offsets_size = that.m_newline_offsets_size;
401 template<
class EventHandler>
406 m_pending_anchors = {};
408 m_has_directives_yaml =
false;
409 m_has_directives =
false;
412 m_prev_val_end =
npos;
414 m_newline_offsets = {};
415 m_newline_offsets_size = {};
416 m_newline_offsets_capacity = {};
419 template<
class EventHandler>
420 void ParseEngine<EventHandler>::_free()
422 if(m_newline_offsets)
424 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
425 m_newline_offsets =
nullptr;
426 m_newline_offsets_size = 0u;
427 m_newline_offsets_capacity = 0u;
434 template<
class EventHandler>
435 void ParseEngine<EventHandler>::_reset()
437 m_pending_anchors = {};
439 m_has_directives_yaml =
false;
440 m_has_directives =
false;
443 m_prev_val_end =
npos;
447 if(m_options.locations())
449 _prepare_locations();
456 template<
class EventHandler>
457 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
459 _c4dbgp(
"relocate to new arena");
460 const char *pb = prev_arena.str;
461 const char *pe = prev_arena.str + prev_arena.len;
462 #define _ryml_relocate(s) \
463 if((s).str >= pb && (s).str <= pe) \
465 (s).str = next_arena.str + ((s).str - pb); \
467 for(ParserState &st : m_evt_handler->m_stack)
473 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
478 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
484 TagDirectives &tds = m_evt_handler->tag_directives();
485 for(
size_t i = 0, sz = tds.size(); i < sz; ++i)
492 TagCache &tch = m_evt_handler->tag_cache();
493 for(
id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
503 #undef _ryml_relocate
507 template<
class EventHandler>
508 substr ParseEngine<EventHandler>::_alloc_arena(
size_t len, substr *other)
510 csubstr prev = m_evt_handler->arena();
511 substr out = m_evt_handler->alloc_arena(len);
512 substr curr = m_evt_handler->arena();
513 if(curr.str != prev.str)
514 _relocate_arena(prev, curr, other);
523 template<
class EventHandler>
524 template<
class DumpFn>
525 C4_NO_INLINE
void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
527 ParserState
const *
const C4_RESTRICT st = m_evt_handler->m_curr;
528 LineContents
const& C4_RESTRICT lc = st->line_contents;
529 csubstr contents = lc.full.first(lc.num_cols);
533 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
534 csubstr m_file = m_evt_handler->m_curr->pos.name;
537 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
538 offs += m_file.len + 1;
540 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
541 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
542 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
543 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", escaped_scalar(maybe_full_content,
true), maybe_ellipsis, contents.len);
545 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
546 size_t lastcol = firstcol + lc.rem.len;
549 for(
size_t i = 0; i < offs + firstcol_adj; ++i)
550 std::forward<DumpFn>(dumpfn)(
" ");
551 std::forward<DumpFn>(dumpfn)(
"^");
552 for(
size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
553 std::forward<DumpFn>(dumpfn)(
"~");
554 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
558 std::forward<DumpFn>(dumpfn)(
"\n");
563 _dbg_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
567 template<
class EventHandler>
568 void ParseEngine<EventHandler>::_print_state_stack(substr buf)
const
572 for(ParserState
const& s : m_evt_handler->m_stack)
573 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
577 template<
class EventHandler>
578 void ParseEngine<EventHandler>::_print_state_stack()
const
581 _print_state_stack(buf);
588 template<
class EventHandler>
589 template<
class ...Args>
590 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc, Location
const& ymlloc,
const char* fmt, Args
const& ...args)
const
592 m_evt_handler->cancel_parse();
593 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
596 template<
class EventHandler>
597 template<
class ...Args>
598 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc,
const char *fmt, Args
const& ...args)
const
600 m_evt_handler->cancel_parse();
601 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
607 template<
class EventHandler>
608 template<
class ...Args>
609 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& ...args)
const
613 _dbg_printf(fmt, args...);
615 _fmt_msg(_dbg_dumper);
622 template<
class EventHandler>
623 bool ParseEngine<EventHandler>::_finished_file()
const
625 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
628 _c4dbgp(
"finished file!!!");
633 template<
class EventHandler>
634 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line() const
636 return m_evt_handler->m_curr->line_contents.rem.empty();
642 template<
class EventHandler>
643 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
645 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')))
649 pos = m_evt_handler->m_curr->line_contents.rem.len;
650 _c4dbgpf(
"skip {} whitespace characters", pos);
651 _line_progressed(pos);
655 template<
class EventHandler>
656 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
658 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
660 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
662 pos = m_evt_handler->m_curr->line_contents.rem.len;
663 _c4dbgpf(
"skip {}x'{}'", pos, _c4prc(c));
664 _line_progressed(pos);
668 template<
class EventHandler>
670 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
672 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
673 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
675 pos = m_evt_handler->m_curr->line_contents.rem.len;
676 _c4dbgpf(
"skip {} characters", pos);
677 _line_progressed(pos);
680 template<
class EventHandler>
681 void ParseEngine<EventHandler>::_skip_comment()
683 LineContents
const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
684 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
685 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with(
'#'), m_evt_handler->m_curr->pos);
686 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
687 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos);
688 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((
size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
690 if(lc.rem.str != lc.full.str)
692 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
693 const char prev = lc.full.str[col - 1u];
694 if(C4_UNLIKELY(prev !=
' ' && prev !=
'\t'))
695 _c4err(
"comment not preceded by whitespace");
697 _c4dbgpf(
"comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
698 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
701 template<
class EventHandler>
702 void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
704 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
707 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
709 _line_progressed(pos);
715 template<
class EventHandler>
716 void ParseEngine<EventHandler>::_maybe_skip_comment()
718 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
721 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
723 _line_progressed(pos);
729 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
733 template<
class EventHandler>
734 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
736 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
739 if(
':' == m_evt_handler->m_curr->line_contents.rem[pos])
743 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
745 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
751 _line_progressed(pos);
757 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
765 template<
class EventHandler>
766 csubstr ParseEngine<EventHandler>::_scan_anchor()
768 csubstr s = m_evt_handler->m_curr->line_contents.rem;
769 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'), m_evt_handler->m_curr->pos);
770 csubstr anchor = s.range(1, s.first_of(
" ,]}\t"));
771 _line_progressed(1u + anchor.len);
772 _maybe_skipchars(
' ');
776 template<
class EventHandler>
777 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
779 csubstr s = m_evt_handler->m_curr->line_contents.rem;
780 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
781 _set_first(s, s.first_of(
" ,]\t"));
782 _line_progressed(s.len);
786 template<
class EventHandler>
787 csubstr ParseEngine<EventHandler>::_scan_ref_map()
789 csubstr s = m_evt_handler->m_curr->line_contents.rem;
790 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
791 _set_first(s, s.first_of(
" ,}\t"));
792 _line_progressed(s.len);
796 template<
class EventHandler>
797 csubstr ParseEngine<EventHandler>::_scan_tag()
799 csubstr t = m_evt_handler->m_curr->line_contents.rem;
800 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
801 if(!t.begins_with(
"!<"))
803 _c4dbgp(
"begins with '!'");
804 _set_first(t, t.first_of(
" ,]}\t"));
805 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
807 _line_progressed(t.len);
808 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
813 _c4dbgp(
"begins with '!<'");
814 size_t pos = t.find(
'>');
815 if(C4_UNLIKELY(pos ==
npos))
817 _set_first_strict(t, pos+1);
818 _line_progressed(t.len);
821 _maybe_skip_whitespace_tokens();
825 template<
class EventHandler>
826 csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
828 csubstr t = m_evt_handler->m_curr->line_contents.rem;
829 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
830 if(!t.begins_with(
"!<"))
832 _c4dbgp(
"begins with '!'");
833 _set_first(t, t.first_of(
" ,\t"));
834 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
836 _line_progressed(t.len);
838 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
843 _c4dbgp(
"begins with '!<'");
844 size_t pos = t.find(
'>');
845 if(C4_UNLIKELY(pos ==
npos))
847 _set_first_strict(t, pos+1);
848 _line_progressed(t.len);
852 _maybe_skip_whitespace_tokens();
859 template<
class EventHandler>
860 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
862 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
863 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
":-"), m_evt_handler->m_curr->pos);
864 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
865 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
877 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
887 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
902 template<
class EventHandler>
903 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
905 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
906 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'?', m_evt_handler->m_curr->pos);
907 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
908 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
915 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
921 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
935 template<
class EventHandler>
936 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
938 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
954 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
959 _c4dbgpf(
"suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
960 return _is_valid_start_scalar_plain_flow_check_block_token(s);
962 _c4dbgpf(
"qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
963 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
971 template<
class EventHandler>
972 bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s,
size_t offs)
974 _c4dbgpf(
"newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs),
true));
977 _c4dbgp(
"newl[PLAIN]: buffer continues");
978 csubstr next_line = s.sub(offs + 1);
979 size_t next_line_indentation = next_line.first_not_of(
' ');
980 if(next_line_indentation !=
npos)
982 _c4dbgpf(
"newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
983 next_line = next_line.first(next_line.first_of(
"\n\r"));
984 _c4dbgpf(
"newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
985 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
986 if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
988 _c4dbgp(
"newl[PLAIN]: larger indentation");
989 next_line = next_line.sub(next_line_indentation);
991 else if(C4_UNLIKELY(next_line.len && next_line.triml(
' ').len))
993 _c4dbgp(
"newl[PLAIN]: err, smaller indentation");
994 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
997 if(m_evt_handler->m_curr->line_contents.indentation !=
npos)
998 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1001 _c4dbgpf(
"newl[PLAIN]: next_line.len={}", next_line.len);
1004 size_t fno = next_line.first_not_of(
" \t");
1008 switch(next_line.str[fno])
1010 case ',':
case ']':
case '#':
1011 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1014 _c4dbgp(
"newl[PLAIN]: found :");
1015 if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1017 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1026 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1032 template<
class EventHandler>
1033 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1035 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1036 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1037 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP), m_evt_handler->m_curr->pos);
1038 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1041 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1044 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1047 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1048 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1050 _c4dbgp(
"scanning seqflow scalar...");
1052 bool needs_filter =
false;
1055 for( ; offs < s.len; ++offs, ++col)
1057 const char c = s.str[offs];
1062 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1063 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1066 _c4dbgpf(
"found '\\n' at col={}", col);
1067 if(!_scan_scalar_plain_handle_newline(s, offs))
1070 needs_filter =
true;
1074 needs_filter =
true;
1077 _c4dbgp(
"found suspicious ':'");
1078 if(s.len > offs + 1)
1080 char next = s.str[offs + 1];
1081 _c4dbgpf(
"next char is '{}'", _c4prc(next));
1084 csubstr after = s.sub(offs + 1).triml(
'\r');
1087 next = after.str[0];
1088 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
1092 if(next ==
' ' _RYML_WITH_TAB_TOKENS(|| next ==
'\t') || next ==
',' || next ==
'\n' || next ==
']')
1094 _c4dbgp(
"map starting!");
1099 _c4dbgp(
"':' nothing to see here");
1104 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1105 _line_progressed(col);
1106 _c4err(
"missing termination: '{}'", c);
1111 _c4dbgp(
"found suspicious '#'");
1112 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1113 char prev = s.str[offs - 1];
1116 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1124 _line_progressed(col);
1125 _c4err(
"invalid character: '{}'", c);
1128 _c4dbgpf(
"doc token character: '{}', offs={}", c, offs);
1129 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1131 _c4dbgp(
"at line beginning");
1132 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1144 _line_progressed(col);
1145 _set_first(s, offs);
1147 sc->needs_filter = needs_filter;
1149 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1154 template<
class EventHandler>
1155 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1157 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1158 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1159 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP), m_evt_handler->m_curr->pos);
1160 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1161 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1164 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1166 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1169 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1170 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1172 _c4dbgp(
"scanning mapflow scalar...");
1174 bool needs_filter =
false;
1177 for( ; offs < s.len; ++offs, ++col)
1179 const char c = s.str[offs];
1184 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1185 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1188 _c4dbgpf(
"found '\\n' at col={}", col);
1189 if(!_scan_scalar_plain_handle_newline(s, offs))
1192 needs_filter =
true;
1196 needs_filter =
true;
1199 _c4dbgpf(
"found ':'", c);
1203 const char next = s.str[offs+1];
1204 _c4dbgpf(
"next='{}'", c);
1205 if(next ==
' ' || next ==
',' || next ==
'}' || next ==
'\n' || next ==
'\r' _RYML_WITH_TAB_TOKENS(|| next ==
'\t'))
1207 _c4dbgpf(
"found terminating character: '{}'", c);
1214 _line_progressed(col);
1215 _c4err(
"invalid character: '{}'", c);
1218 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1227 _line_progressed(col);
1230 sc->needs_filter = needs_filter;
1232 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1234 return sc->scalar.len > 0u;
1237 template<
class EventHandler>
1238 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1240 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1241 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1242 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1243 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1245 substr s = m_evt_handler->m_curr->line_contents.rem;
1246 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1247 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1249 _c4dbgp(
"seq_json: scanning scalar...");
1256 _c4dbgp(
"seq_json: not a scalar.");
1261 const size_t len = _begins_with_special_json_scalar(s);
1264 char c = s.len > len ? s.str[len] :
',';
1265 if(c ==
',' || c ==
']' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1267 sc->scalar = s.first(len);
1268 sc->needs_filter =
false;
1269 _c4dbgpf(
"seq_json: special scalar: '{}'", sc->scalar);
1270 _line_progressed(len);
1282 for( ; i < s.len; ++i)
1284 const char c = s.str[i];
1291 _c4dbgpf(
"seq_json: found terminating character: '{}'", c);
1300 if(C4_LIKELY(i > 0))
1302 _line_progressed(i);
1303 sc->scalar = s.first(i);
1304 sc->needs_filter =
false;
1305 _c4dbgpf(
"seq_json: scalar was {}", _prs(sc->scalar,
true));
1311 template<
class EventHandler>
1312 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1314 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1315 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1316 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1317 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL), m_evt_handler->m_curr->pos);
1320 substr s = m_evt_handler->m_curr->line_contents.rem;
1321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1322 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1324 _c4dbgp(
"scanning scalar...");
1327 const size_t len = _begins_with_special_json_scalar(s);
1330 char c = s.len > len ? s.str[len] :
',';
1331 _c4dbgpf(
"begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1332 if(c ==
',' || c ==
'}' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1334 sc->scalar = s.first(len);
1335 sc->needs_filter =
false;
1336 _c4dbgpf(
"special json scalar: '{}'", _prs(sc->scalar));
1337 _line_progressed(len);
1349 for( ; i < s.len; ++i)
1351 const char c = s.str[i];
1358 _c4dbgpf(
"found terminating character: '{}'", c);
1367 if(C4_LIKELY(i > 0))
1369 _line_progressed(i);
1370 sc->scalar = s.first(i);
1371 sc->needs_filter =
false;
1372 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1379 template<
class EventHandler>
1380 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1382 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-', m_evt_handler->m_curr->pos);
1383 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1386 template<
class EventHandler>
1387 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1389 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.', m_evt_handler->m_curr->pos);
1390 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1393 template<
class EventHandler>
1394 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1396 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1397 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1398 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY), m_evt_handler->m_curr->pos);
1400 substr s = m_evt_handler->m_curr->line_contents.rem;
1401 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1402 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1407 if(_is_blck_token(s))
1411 else if(_is_doc_begin(s))
1413 _c4dbgp(
"token is doc start");
1419 if(_is_blck_token(s))
1434 _c4dbgp(
"token is doc end");
1440 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1442 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1443 const size_t start_line = m_evt_handler->m_curr->pos.line;
1445 bool needs_filter =
false;
1448 _c4dbgpf(
"plain scalar line: {}", _prs(s));
1449 for(
size_t i = 0; i < s.len; ++i)
1451 const char curr = s.str[i];
1456 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1460 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1461 _line_progressed(i);
1463 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1465 _c4dbgp(
"start line. scalar ends here");
1470 _c4err(
"multiline scalars cannot be used as implicit keys");
1476 while(j + 1 < s.len && s.str[j+1] ==
':')
1478 _c4dbgp(
"skip colon");
1481 i = j > i ? j-1 : i;
1482 _c4dbgp(
"nothing to see here");
1486 _c4dbgp(
"got suspicious '#'");
1487 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1489 _c4dbgp(
"comment! scalar ends here");
1490 _line_progressed(i);
1495 _c4dbgp(
"nothing to see here");
1500 _line_progressed(s.len);
1501 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1502 next_peeked = next_peeked.trimr(
"\n\r");
1503 const size_t next_indentation = next_peeked.first_not_of(
' ');
1504 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1505 if(next_indentation < indentation)
1507 _c4dbgp(
"smaller indentation! scalar ended");
1510 else if(next_indentation == 0 && next_peeked.len > 0)
1512 const char first = next_peeked.str[0];
1516 _c4dbgpf(
"doc begin? peeked={}", _prs(next_peeked,
size_t(3)));
1517 if(_is_doc_begin_token(next_peeked))
1519 _c4dbgp(
"doc begin! scalar ended");
1524 _c4dbgpf(
"doc end? peeked={}", _prs(next_peeked,
size_t(3)));
1525 if(_is_doc_end_token(next_peeked))
1527 _c4dbgp(
"doc end! scalar ended");
1534 _c4dbgp(
"next line!");
1535 if(!_finished_file())
1537 _c4dbgp(
"next line!");
1543 _c4dbgp(
"file finished!");
1546 s = m_evt_handler->m_curr->line_contents.rem;
1547 needs_filter =
true;
1552 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1553 sc->needs_filter = needs_filter;
1555 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1560 template<
class EventHandler>
1561 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1563 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1564 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1565 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1566 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1567 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1568 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1569 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1572 template<
class EventHandler>
1573 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1575 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1576 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1577 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1578 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1579 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1580 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1583 template<
class EventHandler>
1584 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1586 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY), m_evt_handler->m_curr->pos);
1587 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1593 template<
class EventHandler>
1594 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1598 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1599 if(pos >= _buf().len)
1603 rem = _from_next_line(_buf().sub(pos));
1608 nlpos = rem.first_of(
"\r\n");
1610 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1611 rem = rem.left_of(nlpos,
true);
1613 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1617 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1623 template<
class EventHandler>
1624 void ParseEngine<EventHandler>::_scan_line()
1626 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1627 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1629 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1632 template<
class EventHandler>
1633 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1635 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1636 m_evt_handler->m_curr->pos.line,
1637 m_evt_handler->m_curr->line_contents.full.len,
1638 ahead, m_evt_handler->m_curr->pos.col,
1639 m_evt_handler->m_curr->pos.col+ahead,
1640 m_evt_handler->m_curr->pos.offset,
1641 m_evt_handler->m_curr->pos.offset+ahead);
1642 m_evt_handler->m_curr->pos.offset += ahead;
1643 m_evt_handler->m_curr->pos.col += ahead;
1644 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1645 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1648 template<
class EventHandler>
1649 void ParseEngine<EventHandler>::_line_ended()
1651 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1652 m_evt_handler->m_curr->pos.line,
1653 m_evt_handler->m_curr->line_contents.full.len,
1654 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1655 m_evt_handler->m_curr->pos.col, 1);
1656 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1657 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1658 ++m_evt_handler->m_curr->pos.line;
1659 m_evt_handler->m_curr->pos.col = 1;
1662 template<
class EventHandler>
1663 void ParseEngine<EventHandler>::_line_ended_undo()
1665 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1666 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1667 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1668 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1669 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1670 m_evt_handler->m_curr->pos.offset -= delta;
1671 --m_evt_handler->m_curr->pos.line;
1672 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1675 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1680 template<
class EventHandler>
1681 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation) noexcept
1683 m_evt_handler->m_curr->indref = indentation;
1684 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1687 template<
class EventHandler>
1688 void ParseEngine<EventHandler>::_save_indentation()
1690 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1691 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1692 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1695 template<
class EventHandler>
1696 void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1698 _c4dbgpf(
"SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1699 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1705 template<
class EventHandler>
1706 void ParseEngine<EventHandler>::_flow_container_was_a_key(
size_t orig_indent)
1708 _c4dbgpf(
"flow container is followed by colon! orig_indent={}", orig_indent);
1709 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1711 _set_indentation(orig_indent);
1712 _maybe_skip_whitespace_tokens();
1715 template<
class EventHandler>
1716 void ParseEngine<EventHandler>::_end_flow_container(
size_t orig_indent,
bool multiline)
1722 _c4dbgp(
"flow container: end as vanilla block map key!");
1723 if(C4_UNLIKELY(multiline))
1724 _c4err(
"multiline key is invalid");
1725 if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1726 _c4err(
"could not find ':' colon after key");
1727 _maybe_skip_whitespace_tokens();
1730 else if(has_none(
RFLOW))
1732 _c4dbgp(
"end_flow_container: now not in flow!");
1733 if(has_any(
RUNK|
RSEQ|
RKCL) && _maybe_scan_following_colon())
1735 if(C4_UNLIKELY(multiline))
1736 _c4err(
"multiline key is invalid");
1737 _flow_container_was_a_key(orig_indent);
1741 _c4dbgp(
"end_flow_container: end map as key!");
1744 else if(has_any(
RSEQ))
1746 _c4dbgp(
"end_flow_container: now in a flow seq");
1747 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1748 _mark_seqflow_val_end();
1752 template<
class EventHandler>
1753 void ParseEngine<EventHandler>::_end_map_flow()
1755 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1756 size_t orig_indent = m_evt_handler->m_curr->indref;
1757 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1758 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1759 _end_flow_container(orig_indent, multiline);
1762 template<
class EventHandler>
1763 void ParseEngine<EventHandler>::_end_seq_flow()
1765 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1766 size_t orig_indent = m_evt_handler->m_curr->indref;
1767 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1768 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1769 _end_flow_container(orig_indent, multiline);
1772 template<
class EventHandler>
1773 void ParseEngine<EventHandler>::_end_map_blck()
1775 _c4dbgp(
"mapblck: end");
1778 _c4dbgp(
"mapblck: set missing val");
1779 _handle_annotations_before_blck_val_scalar();
1780 m_evt_handler->set_val_scalar_plain_empty();
1782 else if(has_any(
QMRK))
1784 _c4dbgp(
"mapblck: set missing keyval");
1785 _handle_annotations_before_blck_key_scalar();
1786 m_evt_handler->set_key_scalar_plain_empty();
1787 _handle_annotations_before_blck_val_scalar();
1788 m_evt_handler->set_val_scalar_plain_empty();
1790 m_evt_handler->end_map_block();
1793 template<
class EventHandler>
1794 void ParseEngine<EventHandler>::_end_seq_blck()
1798 _c4dbgp(
"seqblck: set missing val");
1799 _handle_annotations_before_blck_val_scalar();
1800 m_evt_handler->set_val_scalar_plain_empty();
1802 m_evt_handler->end_seq_block();
1805 template<
class EventHandler>
1806 void ParseEngine<EventHandler>::_end2_map()
1808 _c4dbgp(
"map: end");
1809 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1816 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1817 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1818 m_evt_handler->_pop();
1822 template<
class EventHandler>
1823 void ParseEngine<EventHandler>::_end2_seq()
1825 _c4dbgp(
"seq: end");
1826 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1833 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1834 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1835 m_evt_handler->_pop();
1839 template<
class EventHandler>
1840 void ParseEngine<EventHandler>::_begin2_doc()
1842 _c4dbgp(
"begin_doc");
1843 m_has_directives_yaml =
false;
1844 m_has_directives =
false;
1847 m_evt_handler->begin_doc();
1848 m_evt_handler->m_curr->indref = 0;
1851 template<
class EventHandler>
1852 void ParseEngine<EventHandler>::_begin2_doc_expl()
1854 _c4dbgp(
"begin_doc_expl");
1855 m_has_directives_yaml =
false;
1856 m_has_directives =
false;
1859 m_evt_handler->begin_doc_expl();
1860 m_evt_handler->m_curr->indref = 0;
1863 template<
class EventHandler>
1864 void ParseEngine<EventHandler>::_end2_doc()
1866 _c4dbgp(
"doc: end");
1867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1868 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1870 _c4dbgp(
"doc was empty; add empty val");
1871 _handle_annotations_before_blck_val_scalar();
1872 m_evt_handler->set_val_scalar_plain_empty();
1874 m_evt_handler->end_doc();
1878 template<
class EventHandler>
1879 void ParseEngine<EventHandler>::_end2_doc_expl()
1881 _c4dbgp(
"doc: end");
1882 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1884 _c4dbgp(
"doc: no children; add empty val");
1885 _handle_annotations_before_blck_val_scalar();
1886 m_evt_handler->set_val_scalar_plain_empty();
1888 m_evt_handler->end_doc_expl();
1892 template<
class EventHandler>
1893 void ParseEngine<EventHandler>::_maybe_begin_doc()
1897 _c4dbgp(
"doc must be started");
1901 template<
class EventHandler>
1902 void ParseEngine<EventHandler>::_maybe_end_doc()
1906 _c4dbgp(
"doc must be finished");
1909 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1911 _c4dbgp(
"no doc to finish, but pending annotations");
1912 m_evt_handler->begin_doc();
1913 _handle_annotations_before_blck_val_scalar();
1914 m_evt_handler->set_val_scalar_plain_empty();
1915 m_evt_handler->end_doc();
1919 template<
class EventHandler>
1920 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1922 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1923 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags &
RDOC, m_evt_handler->m_curr->pos);
1924 _c4dbgp(
"root is RDOC");
1925 if(m_evt_handler->m_curr->level != 0)
1926 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1927 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1932 template<
class EventHandler>
1933 void ParseEngine<EventHandler>::_check_trailing_doc_token()
1935 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1936 const bool isndoc = (m_evt_handler->m_curr->flags &
NDOC) != 0;
1937 const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1938 _c4dbgpf(
"target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1939 if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1943 template<
class EventHandler>
1944 void ParseEngine<EventHandler>::_end_doc_suddenly()
1946 _c4dbgp(
"end doc suddenly");
1947 _end_doc_suddenly__pop();
1952 template<
class EventHandler>
1953 void ParseEngine<EventHandler>::_check_doc_end_tokens()
const
1955 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1956 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(
". \t"), m_evt_handler->m_curr->pos);
1957 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
1963 template<
class EventHandler>
1964 void ParseEngine<EventHandler>::_start_doc_suddenly()
1966 _c4dbgp(
"start doc suddenly");
1967 _end_doc_suddenly__pop();
1972 template<
class EventHandler>
1973 void ParseEngine<EventHandler>::_end_stream()
1975 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1977 _c4err(
"missing terminating ]");
1978 else if(C4_UNLIKELY(has_all(
RMAP|
RFLOW)))
1979 _c4err(
"missing terminating }");
1980 if(m_evt_handler->m_stack.size() > 1)
1981 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1988 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1992 m_evt_handler->begin_doc();
1993 _handle_annotations_before_blck_val_scalar();
1994 m_evt_handler->set_val_scalar_plain_empty();
1995 m_evt_handler->end_doc();
1999 m_evt_handler->end_stream();
2000 if(C4_UNLIKELY(m_has_directives))
2001 _c4err(
"directives cannot be used without a document");
2005 template<
class EventHandler>
2006 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
2008 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2009 while(m_evt_handler->m_curr != popto)
2013 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2016 else if(has_any(
RMAP))
2018 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2026 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2029 template<
class EventHandler>
2030 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2033 using state_type =
typename EventHandler::state;
2034 state_type
const* popto =
nullptr;
2035 auto &stack = m_evt_handler->m_stack;
2036 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2037 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2038 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2040 _print_state_stack();
2042 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2044 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2045 if(s->indref == ind)
2047 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
2052 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2054 _c4err(
"parse error: incorrect indentation?");
2056 _handle_indentation_pop(popto);
2059 template<
class EventHandler>
2060 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2063 using state_type =
typename EventHandler::state;
2064 auto &stack = m_evt_handler->m_stack;
2065 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2066 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2067 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2068 state_type
const* popto =
nullptr;
2071 _print_state_stack(flagbuf_);
2073 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
2075 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2080 else if(s->indref == ind)
2082 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
2083 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
2090 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2091 const size_t first = rem.first_not_of(
' ');
2092 _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first ==
npos, m_evt_handler->m_curr->pos);
2093 rem = rem.right_of(first,
true);
2094 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
2095 if(rem.begins_with(
'-') && _is_blck_token(rem))
2097 _c4dbgp(
"parent was indentless seq");
2103 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2105 _c4err(
"parse error: incorrect indentation?");
2107 _handle_indentation_pop(popto);
2112 template<
class EventHandler>
2113 void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2117 _c4err(
"multiline quoted keys are invalid");
2121 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(
RMAP|
RSEQ) && has_any(
RBLCK)));
2122 _c4dbgpf(
"indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2123 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2125 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2126 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(
' '),
2127 m_evt_handler->m_curr->pos);
2128 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2129 _c4dbgpf(
"trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem,
true));
2130 if(C4_UNLIKELY(!!trimmed.len))
2132 _c4err(
"bad indentation");
2140 template<
class EventHandler>
2141 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2146 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'\''), m_evt_handler->m_curr->pos);
2149 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2150 _line_progressed(1);
2151 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2153 bool needs_filter =
false;
2155 while( ! _finished_file())
2157 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2158 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2159 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2160 _c4err(
"token can not appear at line begin");
2161 for(
size_t i = 0; i < line.len; ++i)
2163 const char curr = line.str[i];
2166 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2169 _line_progressed(i + 1);
2170 pos = i + (size_t)(line.str - s.str);
2175 needs_filter =
true;
2181 needs_filter =
true;
2182 _line_progressed(line.len);
2185 _check_valid_newline_in_quoted_scalar();
2188 _c4err(
"reached end of file while looking for closing quote");
2192 _c4dbgpf(
"found closing quote at: {}", pos);
2193 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2194 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2195 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2196 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'\'', m_evt_handler->m_curr->pos);
2197 _set_first_strict(s, pos);
2199 _c4prscalar(
"scanned squoted scalar", s,
true);
2201 return ScannedScalar { s, needs_filter };
2206 template<
class EventHandler>
2207 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2212 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'"'), m_evt_handler->m_curr->pos);
2215 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2216 _line_progressed(1);
2217 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2219 bool needs_filter =
false;
2221 while( ! _finished_file())
2223 #if defined(__GNUC__) && (__GNUC__ == 13)
2224 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
2226 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2227 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2228 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2229 _c4err(
"token can not appear at line begin");
2230 for(
size_t i = 0; i < rem.len; ++i)
2232 const char curr = rem.str[i];
2236 const char next = i+1 < rem.len ? rem.str[i+1] :
'~';
2237 needs_filter =
true;
2238 if(next ==
'"' || next ==
'\\')
2241 else if(curr ==
'"')
2243 _line_progressed(i + 1);
2244 pos = i + (size_t)(rem.str - s.str);
2250 needs_filter =
true;
2251 _line_progressed(rem.len);
2254 _check_valid_newline_in_quoted_scalar();
2257 _c4err(
"reached end of file while looking for closing quote");
2261 _c4dbgpf(
"found closing quote at: {}", pos);
2262 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2263 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2264 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2265 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'"', m_evt_handler->m_curr->pos);
2266 _set_first_strict(s, pos);
2268 _c4prscalar(
"scanned dquoted scalar", s,
true);
2270 return ScannedScalar{s, needs_filter};
2275 template<
class EventHandler>
2276 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2278 _c4dbgpf(
"blck: indref={}", indref);
2279 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref !=
npos, m_evt_handler->m_curr->pos);
2282 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2283 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'), m_evt_handler->m_curr->pos);
2285 _c4dbgpf(
"blck: specs={}", _prs(s));
2288 BlockChomp_e chomp = CHOMP_CLIP;
2289 size_t indentation =
npos;
2292 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"), m_evt_handler->m_curr->pos);
2293 csubstr t = s.sub(1);
2294 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2295 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2296 size_t pos = t.first_of(
"-+");
2297 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2301 chomp = CHOMP_STRIP;
2302 else if(t[pos] ==
'+')
2310 pos = t.first_not_of(
"0123456789");
2311 csubstr digits = t.first(pos);
2312 if( ! digits.empty())
2314 if(C4_UNLIKELY(digits.len > 1))
2315 _c4err(
"parse error: invalid indentation");
2316 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2317 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2318 _c4err(
"parse error: could not read indentation as decimal");
2319 if(C4_UNLIKELY( ! indentation))
2320 _c4err(
"parse error: null indentation");
2321 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2322 indentation += m_evt_handler->m_curr->indref;
2326 if(C4_UNLIKELY(t.len && (!t.begins_with_any(
" \t") || !t.sub(pos).triml(
" \t").begins_with(
'#'))))
2327 _c4err(
"parse error: invalid token");
2331 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2334 _line_progressed(s.len);
2339 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2340 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2348 size_t num_lines = 0;
2349 size_t first = m_evt_handler->m_curr->pos.line;
2350 size_t provisional_indentation =
npos;
2352 while(( ! _finished_file()))
2355 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2356 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2357 C4_DONT_OPTIMIZE(lc.rem);
2359 _c4dbgpf(
"blck: peeking at {}", _prs(lc.rem.trimr(
"\r\n"),
true));
2361 if(indentation !=
npos)
2363 _c4dbgpf(
"blck: indentation={}", indentation);
2365 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2369 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2373 _c4err(
"indentation decreased without any scalar");
2377 else if(indentation == 0)
2379 _c4dbgpf(
"blck: noindent. lc.rem={}", _prs(lc.rem));
2380 if(_is_doc_token(lc.rem))
2382 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2389 const size_t fns = lc.rem.first_not_of(
' ');
2390 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2393 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2394 if(C4_UNLIKELY(lc.full.begins_with(
'\t')))
2396 if(provisional_indentation ==
npos)
2398 if(lc.indentation < indref)
2400 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2401 if(raw_block.len == 0)
2403 _c4dbgp(
"blck: was empty, undo next line");
2408 else if(lc.indentation == m_evt_handler->m_curr->indref)
2412 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2416 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2417 indentation = lc.indentation;
2421 if(lc.indentation >= provisional_indentation)
2423 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2425 indentation = lc.indentation;
2429 if(lc.indentation >= indref)
2430 _c4err(
"parse error: first non-empty block line should have at least the original indentation");
2431 _c4dbgp(
"blck: finished");
2438 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2439 if(provisional_indentation !=
npos)
2441 if(lc.rem.len >= provisional_indentation)
2443 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2444 provisional_indentation = lc.rem.len;
2449 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2450 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2451 if(provisional_indentation ==
npos)
2453 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2454 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2456 if(provisional_indentation < indref)
2458 provisional_indentation = indref;
2459 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2465 m_evt_handler->m_curr->line_contents = lc;
2466 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2467 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2468 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2472 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2473 C4_UNUSED(num_lines);
2476 if(indentation ==
npos)
2478 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2479 indentation = provisional_indentation;
2485 _c4prscalar(
"scanned block", raw_block,
true);
2487 sb->scalar = raw_block;
2488 sb->indentation = indentation;
2500 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2502 #define _c4dbgfws(...)
2505 template<
class EventHandler>
2506 template<
class FilterProcessor>
2507 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2509 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2510 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t', m_evt_handler->m_curr->pos);
2512 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2513 if(first_pos !=
npos)
2515 const char first_char = proc.src[first_pos];
2516 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2517 if(first_char ==
'\n' || first_char ==
'\r')
2519 _c4dbgfws(
"whitespace is trailing on line",
"");
2520 proc.skip(first_pos - proc.rpos);
2525 _c4dbgfws(
"legit whitespace. sofar={}", _prs(proc.sofar()));
2529 _c4dbgfws(
"whitespace is trailing on line",
"");
2533 template<
class EventHandler>
2534 template<
class FilterProcessor>
2535 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2537 if(!_filter_ws_handle_to_first_non_space(proc))
2539 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2540 proc.copy(proc.src.len - proc.rpos);
2544 template<
class EventHandler>
2545 template<
class FilterProcessor>
2546 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2548 if(!_filter_ws_handle_to_first_non_space(proc))
2550 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2551 proc.skip(proc.src.len - proc.rpos);
2565 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2567 #define _c4dbgfps(fmt, ...)
2570 template<
class EventHandler>
2571 template<
class FilterProcessor>
2572 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2574 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2576 _c4dbgfps(
"found newline. sofar={}", _prs(proc.sofar()));
2577 size_t ii = proc.rpos;
2578 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2581 proc.set(
'\n', numnl_following);
2582 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2586 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2590 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2594 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2601 template<
class EventHandler>
2602 template<
class FilterProcessor>
2603 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2605 _RYML_ASSERT_PARSE_(this->callbacks(), indentation !=
npos, m_evt_handler->m_curr->pos);
2606 _c4dbgfps(
"before={}", _prs(proc.src));
2608 while(proc.has_more_chars())
2610 const char curr = proc.curr();
2611 _c4dbgfps(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2616 _c4dbgfps(
"whitespace", curr);
2617 _filter_ws_skip_trailing(proc);
2620 _c4dbgfps(
"newline", curr);
2621 _filter_nl_plain(proc, indentation);
2624 _c4dbgfps(
"carriage return, ignore", curr);
2633 _c4dbgfps(
"after={}", _prs(proc.sofar()));
2635 return proc.result();
2641 template<
class EventHandler>
2644 FilterProcessorSrcDst proc(scalar, dst);
2645 return _filter_plain(proc, indentation);
2648 template<
class EventHandler>
2651 FilterProcessorInplaceEndExtending proc(dst, cap);
2652 return _filter_plain(proc, indentation);
2663 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2665 #define _c4dbgfsq(fmt, ...)
2668 template<
class EventHandler>
2669 template<
class FilterProcessor>
2670 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2672 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2674 _c4dbgfsq(
"found newline. sofar={}", _prs(proc.sofar()));
2675 size_t ii = proc.rpos;
2676 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2679 proc.set(
'\n', numnl_following);
2680 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2684 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2688 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2693 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2699 template<
class EventHandler>
2700 template<
class FilterProcessor>
2701 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2703 _c4dbgfsq(
"before={}", _prs(proc.src));
2707 while(proc.has_more_chars())
2709 const char curr = proc.curr();
2710 _c4dbgfsq(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2715 _c4dbgfsq(
"whitespace", curr);
2716 _filter_ws_copy_trailing(proc);
2719 _c4dbgfsq(
"newline", curr);
2720 _filter_nl_squoted(proc);
2723 _c4dbgfsq(
"skip cr", curr);
2727 _c4dbgfsq(
"squote", curr);
2728 if(proc.next() ==
'\'')
2730 _c4dbgfsq(
"two consecutive squotes", curr);
2745 _c4dbgfsq(
": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2747 return proc.result();
2752 template<
class EventHandler>
2755 FilterProcessorSrcDst proc(scalar, dst);
2756 return _filter_squoted(proc);
2759 template<
class EventHandler>
2762 FilterProcessorInplaceEndExtending proc(dst, cap);
2763 return _filter_squoted(proc);
2774 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2776 #define _c4dbgfdq(...)
2779 template<
class EventHandler>
2780 template<
class FilterProcessor>
2781 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2783 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2785 _c4dbgfdq(
"found newline. sofar={}", _prs(proc.sofar()));
2786 size_t ii = proc.rpos;
2787 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2790 proc.set(
'\n', numnl_following);
2791 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2795 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2799 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2804 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2806 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2808 _c4dbgfdq(
"backslash at [{}]", ii);
2809 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2810 if(next ==
' ' || next ==
'\t')
2812 _c4dbgfdq(
"extend skip to backslash",
"");
2820 template<
class EventHandler>
2821 template<
class FilterProcessor>
2822 void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc,
size_t sz)
2824 const size_t szp1 = sz + 1u;
2825 if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2826 _c4err(
"codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2828 csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2829 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2830 uint32_t codepoint_val = {};
2831 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2832 _c4err(
"failed to parse codepoint. scalar pos={}", proc.rpos);
2833 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2834 if(C4_UNLIKELY(numbytes == 0))
2835 _c4err(
"failed to decode code point={}", proc.rpos);
2836 _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2837 proc.translate_esc_bulk(readbuf, numbytes, szp1);
2838 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2841 template<
class EventHandler>
2842 template<
class FilterProcessor>
2843 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2845 char next = proc.next();
2846 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2849 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2853 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2859 size_t ii = proc.rpos + 2;
2860 for( ; ii < proc.src.len; ++ii)
2863 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2868 proc.skip(ii - proc.rpos);
2870 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2873 proc.translate_esc(next);
2874 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2876 else if(next ==
'\r')
2880 else if(next ==
'n')
2882 proc.translate_esc(
'\n');
2884 else if(next ==
'r')
2886 proc.translate_esc(
'\r');
2888 else if(next ==
't')
2890 proc.translate_esc(
'\t');
2892 else if(next ==
'\\')
2894 proc.translate_esc(
'\\');
2896 else if(next ==
'x')
2898 _filter_dquoted_backslash_decode(proc, 2u);
2900 else if(next ==
'u')
2902 _filter_dquoted_backslash_decode(proc, 4u);
2904 else if(next ==
'U')
2906 _filter_dquoted_backslash_decode(proc, 8u);
2909 else if(next ==
'0')
2911 proc.translate_esc(
'\0');
2913 else if(next ==
'b')
2915 proc.translate_esc(
'\b');
2917 else if(next ==
'f')
2919 proc.translate_esc(
'\f');
2921 else if(next ==
'a')
2923 proc.translate_esc(
'\a');
2925 else if(next ==
'v')
2927 proc.translate_esc(
'\v');
2929 else if(next ==
'e')
2931 proc.translate_esc(
'\x1b');
2933 else if(next ==
'_')
2936 const char payload[] = {
2937 _RYML_CHCONST(-0x3e, 0xc2),
2938 _RYML_CHCONST(-0x60, 0xa0),
2940 proc.translate_esc_bulk(payload, 2, 1);
2942 else if(next ==
'N')
2945 const char payload[] = {
2946 _RYML_CHCONST(-0x3e, 0xc2),
2947 _RYML_CHCONST(-0x7b, 0x85),
2949 proc.translate_esc_bulk(payload, 2, 1);
2951 else if(next ==
'L')
2954 const char payload[] = {
2955 _RYML_CHCONST(-0x1e, 0xe2),
2956 _RYML_CHCONST(-0x80, 0x80),
2957 _RYML_CHCONST(-0x58, 0xa8),
2959 proc.translate_esc_extending(payload, 3, 1);
2961 else if(next ==
'P')
2964 const char payload[] = {
2965 _RYML_CHCONST(-0x1e, 0xe2),
2966 _RYML_CHCONST(-0x80, 0x80),
2967 _RYML_CHCONST(-0x57, 0xa9),
2969 proc.translate_esc_extending(payload, 3, 1);
2971 else if(next ==
'\0')
2977 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2979 _c4dbgfdq(
"backslash...sofar={}", _prs(proc.sofar()));
2983 template<
class EventHandler>
2984 template<
class FilterProcessor>
2985 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2987 _c4dbgfdq(
"before={}", _prs(proc.src));
2990 while(proc.has_more_chars())
2992 const char curr = proc.curr();
2993 _c4dbgfdq(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
2999 _c4dbgfdq(
"whitespace", curr);
3000 _filter_ws_copy_trailing(proc);
3005 _c4dbgfdq(
"newline", curr);
3006 _filter_nl_dquoted(proc);
3011 _c4dbgfdq(
"carriage return, ignore", curr);
3017 _filter_dquoted_backslash(proc);
3027 _c4dbgfdq(
"after={}", _prs(proc.sofar()));
3028 return proc.result();
3034 template<
class EventHandler>
3037 FilterProcessorSrcDst proc(scalar, dst);
3038 return _filter_dquoted(proc);
3041 template<
class EventHandler>
3044 FilterProcessorInplaceMidExtending proc(dst, cap);
3045 return _filter_dquoted(proc);
3054 C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(csubstr s,
size_t indentation) noexcept
3056 if(indentation + 1 > s.len)
3058 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
3060 if(s.str[i] ==
'\n')
3062 csubstr rem = s.sub(i + 1);
3063 size_t first = rem.first_not_of(
' ');
3064 first = (first !=
npos) ? first : rem.len;
3065 if(first > indentation)
3072 template<
class EventHandler>
3073 template<
class FilterProcessor>
3074 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation)
3076 _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3077 _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos, m_evt_handler->m_curr->pos);
3081 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3083 #define _c4dbgchomp(...)
3088 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3091 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
3092 last = proc.rpos + last + size_t(1) + indentation;
3093 _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3095 while((proc.rpos < last) && proc.has_more_chars())
3097 const char curr = proc.curr();
3098 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
3103 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
3106 csubstr at_next_line = proc.rem();
3107 if(at_next_line.begins_with(
' '))
3109 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
3111 size_t first_non_space = at_next_line.first_not_of(
' ');
3112 _c4dbgchomp(
"first_non_space={}", first_non_space);
3113 if(first_non_space ==
npos)
3115 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
3116 first_non_space = at_next_line.len;
3118 if(first_non_space <= indentation)
3120 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
3121 proc.skip(first_non_space);
3125 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
3126 proc.skip(indentation);
3128 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3129 proc.copy(first_non_space - indentation);
3147 bool had_one =
false;
3148 while(proc.has_more_chars())
3150 const char curr = proc.curr();
3151 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
3156 _c4dbgchomp(
"copy newline!", curr);
3164 _c4dbgchomp(
"skip!", curr);
3171 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3178 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3179 while(proc.has_more_chars())
3181 const char curr = proc.curr();
3182 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3186 _c4dbgchomp(
"copy newline!", curr);
3191 _c4dbgchomp(
"skip!", curr);
3200 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3212 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3214 #define _c4dbgfb(...)
3217 template<
class EventHandler>
3218 template<
class FilterProcessor>
3219 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
3221 csubstr rem = proc.rem();
3224 size_t first = rem.first_not_of(
' ');
3227 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3228 if(first < indentation)
3230 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3235 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3236 proc.skip(indentation);
3239 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3242 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3246 if(first < indentation)
3248 _c4dbgfb(
"skip everything", first);
3249 proc.skip(proc.src.len - proc.rpos);
3253 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3254 proc.skip(indentation);
3262 template<
class EventHandler>
3263 template<
class FilterProcessor>
3264 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3266 csubstr contents = proc.src.trimr(
" \n\r");
3267 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3270 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3271 if(chomp == CHOMP_KEEP && proc.src.len)
3273 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3274 while(proc.has_more_chars())
3276 const char curr = proc.curr();
3288 return contents.len;
3291 template<
class EventHandler>
3292 template<
class FilterProcessor>
3293 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3295 _c4dbgfb(
"contents_len={}", contents_len);
3297 _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3301 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3302 if(firstnewl !=
npos)
3304 contents_len = firstnewl;
3305 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3309 contents_len = proc.src.len;
3310 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3313 return contents_len;
3325 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3327 #define _c4dbgfbl(...)
3330 template<
class EventHandler>
3331 template<
class FilterProcessor>
3332 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3334 _c4dbgfbl(
"indentation={} before={}", indentation, _prs(proc.src));
3336 size_t contents_len = _handle_all_whitespace(proc, chomp);
3338 return proc.result();
3340 contents_len = _extend_to_chomp(proc, contents_len);
3342 _c4dbgfbl(
"to filter={}", _prs(proc.src.first(contents_len)));
3344 _filter_block_indentation(proc, indentation);
3347 while(proc.has_more_chars(contents_len))
3349 const char curr = proc.curr();
3350 _c4dbgfbl(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3355 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3357 _filter_block_indentation(proc, indentation);
3369 _c4dbgfbl(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3371 _filter_chomp(proc, chomp, indentation);
3373 _c4dbgfbl(
"final={}", _prs(proc.sofar()));
3375 return proc.result();
3380 template<
class EventHandler>
3383 FilterProcessorSrcDst proc(scalar, dst);
3384 return _filter_block_literal(proc, indentation, chomp);
3387 template<
class EventHandler>
3390 FilterProcessorInplaceEndExtending proc(scalar, cap);
3391 return _filter_block_literal(proc, indentation, chomp);
3401 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3403 #define _c4dbgfbf(...)
3407 template<
class EventHandler>
3408 template<
class FilterProcessor>
3409 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3411 _filter_block_indentation(proc, indentation);
3412 while(proc.has_more_chars(len))
3414 const char curr = proc.curr();
3415 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3419 _c4dbgfbf(
"newline.", curr);
3421 _filter_block_indentation(proc, indentation);
3429 size_t first = proc.rem().first_not_of(
" \t");
3430 _c4dbgfbf(
"space. first={}", first);
3432 first = proc.rem().len;
3433 _c4dbgfbf(
"... indentation increased to {}", first);
3434 _filter_block_folded_indented_block(proc, indentation, len, first);
3438 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3444 template<
class EventHandler>
3445 template<
class FilterProcessor>
3446 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3451 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3452 wpos_at_first_newl = proc.wpos;
3457 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3458 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl !=
npos, m_evt_handler->m_curr->pos);
3459 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ', m_evt_handler->m_curr->pos);
3460 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3462 proc.set_at(wpos_at_first_newl,
'\n');
3463 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n', m_evt_handler->m_curr->pos);
3466 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3470 return wpos_at_first_newl;
3473 template<
class EventHandler>
3474 template<
class FilterProcessor>
3475 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3477 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
3478 size_t num_newl = 0;
3479 size_t wpos_at_first_newl =
npos;
3480 while(proc.has_more_chars(len))
3482 const char curr = proc.curr();
3483 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3488 _c4dbgfbf(
"newline. sofar={}", num_newl);
3524 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3525 _filter_block_indentation(proc, indentation);
3531 size_t first = proc.rem().first_not_of(
" \t");
3532 _c4dbgfbf(
"space. first={}", first);
3534 first = proc.rem().len;
3535 _c4dbgfbf(
"... indentation increased to {}", first);
3538 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3539 proc.set_at(wpos_at_first_newl,
'\n');
3543 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3546 _filter_block_folded_indented_block(proc, indentation, len, first);
3548 wpos_at_first_newl =
npos;
3555 _c4dbgfbf(
"not space, not newline. stop.", 0);
3562 template<
class EventHandler>
3563 template<
class FilterProcessor>
3564 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3566 _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos), m_evt_handler->m_curr->pos);
3567 if(curr_indentation)
3568 proc.copy(curr_indentation);
3569 while(proc.has_more_chars(len))
3571 const char curr = proc.curr();
3572 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3578 _filter_block_indentation(proc, indentation);
3579 csubstr rem = proc.rem();
3580 const size_t first = rem.first_not_of(
' ');
3581 _c4dbgfbf(
"newline. firstns={}", first);
3584 const char c = rem[first];
3585 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3586 if(c ==
'\n' || c ==
'\r')
3592 _c4dbgfbf(
"done with indented block", first);
3596 else if(first !=
npos)
3599 _c4dbgfbf(
"copy all {} spaces", first);
3617 template<
class EventHandler>
3618 template<
class FilterProcessor>
3619 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3621 _c4dbgfbf(
"indentation={} before={}", indentation, _prs(proc.src));
3623 size_t contents_len = _handle_all_whitespace(proc, chomp);
3625 return proc.result();
3627 contents_len = _extend_to_chomp(proc, contents_len);
3629 _c4dbgfbf(
"to filter={}", _prs(proc.src.first(contents_len)));
3631 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3634 while(proc.has_more_chars(contents_len))
3636 const char curr = proc.curr();
3637 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3642 _c4dbgfbf(
"found newline", curr);
3643 _filter_block_folded_newlines(proc, indentation, contents_len);
3655 _c4dbgfbf(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3657 _filter_chomp(proc, chomp, indentation);
3659 _c4dbgfbf(
"final={}", proc.sofar().len, _prs(proc.sofar()));
3661 return proc.result();
3666 template<
class EventHandler>
3669 FilterProcessorSrcDst proc(scalar, dst);
3670 return _filter_block_folded(proc, indentation, chomp);
3673 template<
class EventHandler>
3676 FilterProcessorInplaceEndExtending proc(scalar, cap);
3677 return _filter_block_folded(proc, indentation, chomp);
3685 template<
class EventHandler>
3686 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3688 _c4dbgpf(
"filtering plain scalar: s={}", _prs(s));
3689 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3690 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3691 _c4dbgpf(
"filtering plain scalar: success! s={}", _prs(r.get()));
3697 template<
class EventHandler>
3698 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3700 _c4dbgpf(
"filtering squo scalar: s={}", _prs(s));
3701 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3702 _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3703 _c4dbgpf(
"filtering squo scalar: success! s={}", _prs(r.get()));
3710 template<
class EventHandler>
3711 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3713 _c4dbgpf(
"filtering dquo scalar: s={}", _prs(s));
3714 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3715 if(C4_LIKELY(r.valid()))
3717 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(r.get()));
3722 const size_t len = r.required_len();
3723 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3724 substr dst = _alloc_arena(len, &s);
3725 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3728 _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3729 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3730 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3731 _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos);
3732 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3733 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(rsd.get()));
3743 template<
class EventHandler>
3744 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3746 if(s.is_sub(_buf()))
3748 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3749 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3751 memmove(s.str - 1, s.str, s.len);
3753 s.str[s.len] =
'\n';
3759 substr dst = _alloc_arena(s.len + 1, &s);
3761 memcpy(dst.str, s.str, s.len);
3767 template<
class EventHandler>
3768 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3770 _c4dbgpf(
"filtering block literal scalar: s={}", _prs(s));
3771 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3773 if(C4_LIKELY(r.valid()))
3779 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3780 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3783 result = _move_scalar_left_and_add_newline(s);
3785 _c4dbgpf(
"filtering block literal scalar: success! s={}", _prs(result));
3791 template<
class EventHandler>
3792 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3794 _c4dbgpf(
"filtering block folded scalar: s={}", _prs(s));
3795 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3797 if(C4_LIKELY(r.valid()))
3803 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3804 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3807 result = _move_scalar_left_and_add_newline(s);
3809 _c4dbgpf(
"filtering block folded scalar: success! s={}", _prs(result));
3816 template<
class EventHandler>
3817 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3821 if(m_options.scalar_filtering())
3823 return _filter_scalar_plain(sc.scalar, indentation);
3827 _c4dbgp(
"plain scalar left unfiltered");
3828 m_evt_handler->mark_key_scalar_unfiltered();
3833 _c4dbgp(
"plain scalar doesn't need filtering");
3838 template<
class EventHandler>
3839 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3843 if(m_options.scalar_filtering())
3845 return _filter_scalar_plain(sc.scalar, indentation);
3849 _c4dbgp(
"plain scalar left unfiltered");
3850 m_evt_handler->mark_val_scalar_unfiltered();
3855 _c4dbgp(
"plain scalar doesn't need filtering");
3863 template<
class EventHandler>
3864 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3868 if(m_options.scalar_filtering())
3870 return _filter_scalar_squot(sc.scalar);
3874 _c4dbgp(
"squo key scalar left unfiltered");
3875 m_evt_handler->mark_key_scalar_unfiltered();
3880 _c4dbgp(
"squo key scalar doesn't need filtering");
3885 template<
class EventHandler>
3886 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3890 if(m_options.scalar_filtering())
3892 return _filter_scalar_squot(sc.scalar);
3896 _c4dbgp(
"squo val scalar left unfiltered");
3897 m_evt_handler->mark_val_scalar_unfiltered();
3902 _c4dbgp(
"squo val scalar doesn't need filtering");
3910 template<
class EventHandler>
3911 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3915 if(m_options.scalar_filtering())
3917 return _filter_scalar_dquot(sc.scalar);
3921 _c4dbgp(
"dquo scalar left unfiltered");
3922 m_evt_handler->mark_key_scalar_unfiltered();
3927 _c4dbgp(
"dquo scalar doesn't need filtering");
3932 template<
class EventHandler>
3933 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3937 if(m_options.scalar_filtering())
3939 return _filter_scalar_dquot(sc.scalar);
3943 _c4dbgp(
"dquo scalar left unfiltered");
3944 m_evt_handler->mark_val_scalar_unfiltered();
3949 _c4dbgp(
"dquo scalar doesn't need filtering");
3957 template<
class EventHandler>
3958 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3960 if(m_options.scalar_filtering())
3962 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3966 _c4dbgp(
"literal scalar left unfiltered");
3967 m_evt_handler->mark_key_scalar_unfiltered();
3972 template<
class EventHandler>
3973 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3975 if(m_options.scalar_filtering())
3977 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3981 _c4dbgp(
"literal scalar left unfiltered");
3982 m_evt_handler->mark_val_scalar_unfiltered();
3990 template<
class EventHandler>
3991 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3993 if(m_options.scalar_filtering())
3995 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3999 _c4dbgp(
"folded scalar left unfiltered");
4000 m_evt_handler->mark_key_scalar_unfiltered();
4005 template<
class EventHandler>
4006 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
4008 if(m_options.scalar_filtering())
4010 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4014 _c4dbgp(
"folded scalar left unfiltered");
4015 m_evt_handler->mark_val_scalar_unfiltered();
4027 template<
class EventHandler>
4028 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on)
4030 ParserState *s = m_evt_handler->m_curr;
4031 char buf1_[64], buf2_[64], buf3_[64];
4032 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4033 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4034 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4035 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4039 template<
class EventHandler>
4042 ParserState *s = m_evt_handler->m_curr;
4043 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4044 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4045 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4046 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4047 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4048 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4054 template<
class EventHandler>
4055 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off)
4057 ParserState *s = m_evt_handler->m_curr;
4058 char buf1_[64], buf2_[64], buf3_[64];
4059 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4060 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4061 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4062 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4066 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
4069 bool gotone =
false;
4071 #define _prflag(fl) \
4072 if((flags & fl) == (fl)) \
4076 if(pos + 1 < buf.len) \
4080 csubstr fltxt = #fl; \
4081 if(pos + fltxt.len <= buf.len) \
4082 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4112 _RYML_CHECK_BASIC(pos <= buf.len);
4114 return buf.first(pos);
4124 template<
class EventHandler>
4127 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4128 return _buf().sub(loc.offset);
4131 template<
class EventHandler>
4134 if(C4_UNLIKELY(val ==
nullptr))
4135 return {m_evt_handler->m_curr->pos.
name, 0, 0, 0};
4136 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4139 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4140 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4141 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4142 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4144 csubstr src = _buf();
4145 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4146 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4148 using lineptr_type =
size_t const* C4_RESTRICT;
4149 lineptr_type lineptr =
nullptr;
4150 size_t offset = (size_t)(val - src.begin());
4154 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4169 size_t count = m_newline_offsets_size;
4170 lineptr = m_newline_offsets;
4173 size_t step = count >> 1;
4174 lineptr_type it = lineptr + step;
4186 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4187 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4188 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4190 loc.name = m_evt_handler->m_curr->pos.name;
4191 loc.offset = offset;
4192 loc.line = (size_t)(lineptr - m_newline_offsets);
4193 if(lineptr > m_newline_offsets)
4194 loc.col = (offset - *(lineptr-1) - 1u);
4200 template<
class EventHandler>
4201 void ParseEngine<EventHandler>::_prepare_locations()
4203 csubstr src = _buf();
4204 size_t numnewlines = 1u + src.count(
'\n');
4205 _resize_locations(numnewlines);
4206 m_newline_offsets_size = 0;
4207 for(
size_t i = 0; i < src.len; i++)
4208 if(src.str[i] ==
'\n')
4209 m_newline_offsets[m_newline_offsets_size++] = i;
4210 m_newline_offsets[m_newline_offsets_size++] = src.len;
4211 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4214 template<
class EventHandler>
4215 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4217 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4218 if(numnewlines > m_newline_offsets_capacity)
4220 if(m_newline_offsets)
4221 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4222 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4223 m_newline_offsets_capacity = numnewlines;
4227 template<
class EventHandler>
4228 bool ParseEngine<EventHandler>::_locations_dirty()
const
4230 return !m_newline_offsets_size;
4238 template<
class EventHandler>
4239 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4242 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4244 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4246 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4250 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4252 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4253 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4259 template<
class EventHandler>
4260 void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4262 _c4dbgpf(
"flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4263 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4264 if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4266 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4267 _c4dbgpf(
"flow: after indentation={}", _prs(trimmed));
4268 if(trimmed.len && trimmed.triml(
" \t").len)
4270 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4271 _c4err(
"bad indentation");
4276 template<
class EventHandler>
4277 size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4279 const size_t mark = m_evt_handler->m_curr->pos.offset;
4280 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
4281 _c4dbgpf(
"block: mark={} firstpos={}", mark, firstpos);
4282 if(firstpos !=
npos)
4284 _c4dbgp(
"block: non empty line");
4285 _line_progressed(firstpos);
4290 _c4dbgp(
"block: rest of line is whitespace");
4291 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4296 template<
class EventHandler>
4297 void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(
size_t start_mark,
size_t end_mark)
4299 _c4dbgpf(
"block: start_mark={} end_mark={}", start_mark, end_mark);
4300 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4301 if(end_mark != start_mark)
4303 csubstr leading = _buf().range(start_mark, end_mark);
4304 _c4dbgpf(
"block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading,
true));
4305 size_t pos = leading.find(
'\t');
4308 size_t fno = leading.first_not_of(
" \t");
4309 if(fno ==
npos || pos < fno)
4310 _c4err(
"invalid tab character to the left");
4320 template<
class EventHandler>
4321 void ParseEngine<EventHandler>::_handle_colon()
4323 size_t curr = m_evt_handler->m_curr->pos.line;
4324 if(C4_UNLIKELY(m_prev_colon !=
npos && curr == m_prev_colon))
4326 _c4dbgpf(
"colon: prevline={} currline={}", m_prev_colon, curr);
4327 _c4err(
"two colons on same line");
4329 _c4dbgpf(
"colon: set prevline={}->{}", m_prev_colon, curr);
4330 m_prev_colon = curr;
4333 template<
class EventHandler>
4334 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4336 _c4dbgpf(
"store annotation[{}]: {}", dst->num_entries, _prs(str));
4337 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4338 dst->annotations[dst->num_entries].str = str;
4339 dst->annotations[dst->num_entries].indentation = {};
4340 dst->annotations[dst->num_entries].line = {};
4341 dst->annotations[dst->num_entries].orig = {};
4345 template<
class EventHandler>
4346 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4348 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4349 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4350 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4354 dst->annotations[dst->num_entries].str = str;
4355 dst->annotations[dst->num_entries].indentation = indentation;
4356 dst->annotations[dst->num_entries].line = line;
4357 dst->annotations[dst->num_entries].orig = {};
4361 template<
class EventHandler>
4362 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line, csubstr orig)
4364 _c4dbgpf(
"store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4365 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4366 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4370 dst->annotations[dst->num_entries].str = str;
4371 dst->annotations[dst->num_entries].indentation = indentation;
4372 dst->annotations[dst->num_entries].line = line;
4373 dst->annotations[dst->num_entries].orig = orig;
4377 template<
class EventHandler>
4378 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4380 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4383 template<
class EventHandler>
4384 bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4386 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4388 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4389 if(m_pending_tags.num_entries)
4391 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4392 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4394 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4395 _clear_annotations(&m_pending_tags);
4402 if(m_pending_anchors.num_entries)
4404 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4405 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4407 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4408 _clear_annotations(&m_pending_anchors);
4412 _c4err(
"too many anchors");
4415 m_evt_handler->set_key_scalar_plain_empty();
4416 m_evt_handler->set_val_scalar_plain_empty();
4420 template<
class EventHandler>
4421 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4423 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4424 if(m_pending_tags.num_entries)
4426 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4427 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4429 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4430 _clear_annotations(&m_pending_tags);
4437 if(m_pending_anchors.num_entries)
4439 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4440 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4442 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4443 _clear_annotations(&m_pending_anchors);
4447 _c4err(
"too many anchors");
4452 template<
class EventHandler>
4453 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4455 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4456 if(m_pending_tags.num_entries)
4458 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4459 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4461 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4462 _clear_annotations(&m_pending_tags);
4469 if(m_pending_anchors.num_entries)
4471 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4472 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4474 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4475 _clear_annotations(&m_pending_anchors);
4479 _c4err(
"too many anchors");
4484 template<
class EventHandler>
4485 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4487 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4488 if(m_pending_tags.num_entries == 2)
4490 _c4dbgp(
"2 tags, setting entry 0");
4491 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4493 else if(m_pending_tags.num_entries == 1)
4495 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4496 if(m_pending_tags.annotations[0].line < current_line)
4498 _c4dbgp(
"...tag is for the map. setting it.");
4499 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4500 _clear_annotations(&m_pending_tags);
4504 if(m_pending_anchors.num_entries == 2)
4506 _c4dbgp(
"2 anchors, setting entry 0");
4507 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4509 else if(m_pending_anchors.num_entries == 1)
4511 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4512 if(m_pending_anchors.annotations[0].line < current_line)
4514 _c4dbgp(
"...anchor is for the map. setting it.");
4515 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4516 _clear_annotations(&m_pending_anchors);
4521 template<
class EventHandler>
4522 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4524 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4525 switch(m_pending_tags.num_entries)
4528 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4529 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4531 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map tag");
4532 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4533 _clear_annotations(&m_pending_tags);
4537 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4538 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4541 switch(m_pending_anchors.num_entries)
4544 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4545 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4547 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map anchor");
4548 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4549 _clear_annotations(&m_pending_anchors);
4553 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4554 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4559 template<
class EventHandler>
4560 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4562 _c4dbgp(
"annotations_after_start_mapblck");
4563 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4564 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4565 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4567 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4568 switch(m_pending_tags.num_entries)
4571 _c4dbgpf(
"annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4572 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4573 _clear_annotations(&m_pending_tags);
4576 _c4dbgpf(
"annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4577 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4578 _clear_annotations(&m_pending_tags);
4581 switch(m_pending_anchors.num_entries)
4584 _c4dbgpf(
"annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4585 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4586 _clear_annotations(&m_pending_anchors);
4589 _c4dbgpf(
"annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4590 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4591 _clear_annotations(&m_pending_anchors);
4595 _set_indentation(key_indentation);
4598 template<
class EventHandler>
4599 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4601 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4603 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4604 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4606 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4607 if(ann.line > curr->line)
4609 else if(ann.indentation < curr->indentation)
4612 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4614 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4615 if(ann.line > curr->line)
4617 else if(ann.indentation < curr->indentation)
4620 return curr->line < val_line ? val_indentation : curr->indentation;
4623 template<
class EventHandler>
4624 void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4626 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4627 m_evt_handler->set_key_ref(alias);
4629 _c4err(
"aliases cannot have anchors or tags");
4632 template<
class EventHandler>
4633 void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4635 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4636 m_evt_handler->set_val_ref(alias);
4638 _c4err(
"aliases cannot have anchors or tags");
4641 template<
class EventHandler>
4642 csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4644 _c4dbgpf(
"resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4646 TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4649 _c4dbgpf(
"resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4650 return ret.resolved;
4652 _c4dbgpf(
"resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4654 substr buf = m_evt_handler->arena_rem();
4655 TagDirectives
const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4656 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4657 m_evt_handler->m_curr->pos,
4658 m_evt_handler->m_stack.m_callbacks);
4659 _c4dbgpf(
"resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4660 _c4assert((bufsz > buf.len) == (!ttag.str));
4661 _c4assert(!!bufsz == (ttag.len == bufsz));
4665 _c4dbgpf(
"tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4667 buf = _alloc_arena(bufsz, &tag);
4670 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4671 m_evt_handler->m_curr->pos,
4672 m_evt_handler->m_stack.m_callbacks);
4675 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4679 _c4dbgp(
"tag required arena. update size");
4682 (void)_alloc_arena(bufsz);
4684 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127)
4685 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers)
4687 _c4dbgpf(
"handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4689 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4691 _c4dbgpf(
"copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4692 buf = _alloc_arena(ttag.len, &tag);
4694 memcpy(buf.str, ttag.str, ttag.len);
4696 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4699 C4_SUPPRESS_WARNING_MSVC_POP
4700 _c4dbgpf(
"resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4703 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4707 template<
class EventHandler>
4708 bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT
version)
const
4710 _c4assert(directive->begins_with(
"%YAML"));
4711 size_t version_start = directive->first_not_of(
" \t", 5);
4712 if(version_start !=
npos)
4714 csubstr digits =
"0123456789";
4715 size_t major_end = directive->first_not_of(digits, version_start);
4716 if(major_end !=
npos && directive->str[major_end] ==
'.')
4718 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4719 if(minor_end ==
npos)
4720 minor_end = directive->len;
4721 _set_first_strict(*directive, minor_end);
4722 *
version = directive->range(version_start, minor_end);
4723 _c4dbgpf(
"%YAML: version={} full={}", *
version, _prs(*directive,
true));
4730 template<
class EventHandler>
4731 bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix)
const
4733 _c4assert(directive->begins_with(
"%TAG"));
4734 csubstr whitespace =
" \t";
4735 size_t handle_start = directive->first_not_of(whitespace, 4);
4736 if(handle_start !=
npos && directive->str[handle_start] ==
'!')
4738 size_t handle_end = directive->first_of(whitespace, handle_start);
4739 if(handle_end !=
npos)
4741 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4742 if(prefix_start !=
npos)
4744 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4745 if(prefix_end ==
npos)
4746 prefix_end = directive->len;
4747 _set_first_strict(*directive, prefix_end);
4748 *handle = directive->range(handle_start, handle_end);
4749 *prefix = directive->range(prefix_start, prefix_end);
4750 _c4dbgpf(
"%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive,
true));
4759 template<
class EventHandler>
4760 void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4762 _c4dbgpf(
"handle_directive: rem={}", _prs(directive,
true));
4763 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with(
'%'));
4764 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4765 const char *err =
nullptr;
4768 auto isdirective = [](csubstr str, csubstr dir) {
4769 if(str.begins_with(dir))
4771 csubstr rest = str.sub(dir.len);
4772 return (!rest.len || rest.str[0] ==
' ' || rest.str[0] ==
'\t');
4776 if(isdirective(directive,
"%TAG"))
4780 if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4782 err =
"invalid %TAG directive";
4783 goto directive_error;
4785 m_evt_handler->add_directive_tag(handle, prefix);
4787 else if(isdirective(directive,
"%YAML"))
4790 if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &
version)))
4792 err =
"invalid %YAML directive";
4793 goto directive_error;
4795 if(C4_UNLIKELY(m_has_directives_yaml))
4797 err =
"multiple %YAML directives";
4798 goto directive_error;
4800 m_has_directives_yaml =
true;
4801 m_evt_handler->add_directive_yaml(
version);
4803 m_has_directives =
true;
4804 rem = m_evt_handler->m_curr->line_contents.rem;
4805 pos = rem.first_not_of(
" \t", directive.len);
4806 pos = pos !=
npos ? pos : rem.len;
4807 _line_progressed(pos);
4809 _c4dbgpf(
"handle_directive: rest={}", _prs(rem));
4810 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
4812 err =
"invalid tokens after directive";
4813 goto directive_error;
4816 if(C4_UNLIKELY(err !=
nullptr))
4820 template<
class EventHandler>
4821 bool ParseEngine<EventHandler>::_handle_bom()
4823 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4826 const csubstr rest = rem.sub(1);
4828 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4829 if(rem.begins_with(csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4831 _c4dbgp(
"byte order mark: UTF32BE");
4833 _line_progressed(4);
4837 else if(rem.begins_with(csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4839 _c4dbgp(
"byte order mark: UTF32LE");
4841 _line_progressed(4);
4845 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4847 _c4dbgp(
"byte order mark: UTF16BE");
4849 _line_progressed(2);
4853 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4855 _c4dbgp(
"byte order mark: UTF16LE");
4857 _line_progressed(2);
4861 else if(rem.begins_with(
"\xef\xbb\xbf"))
4863 _c4dbgp(
"byte order mark: UTF8");
4865 _line_progressed(3);
4874 template<
class EventHandler>
4875 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4877 if(m_encoding ==
NOBOM)
4879 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4882 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4884 else if(enc != m_encoding)
4886 _c4err(
"byte order mark can only be set once");
4893 template<
class EventHandler>
4894 void ParseEngine<EventHandler>::_handle_seq_json()
4897 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4899 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
4900 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
4901 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
4902 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
4903 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
4905 _handle_flow_skip_whitespace();
4906 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4912 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
4913 const char first = rem.str[0];
4914 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4919 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4920 ScannedScalar sc = _scan_scalar_dquot();
4921 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4922 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4928 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4930 m_evt_handler->begin_seq_val_flow();
4932 _line_progressed(1);
4937 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4939 m_evt_handler->begin_map_val_flow();
4941 _line_progressed(1);
4942 goto seqjson_finish;
4946 _c4dbgp(
"seqjson[RVAL]: end!");
4949 _line_progressed(1);
4951 goto seqjson_finish;
4957 if(_scan_scalar_seq_json(&sc))
4959 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4960 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4961 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4973 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
4974 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
4975 const char first = rem.str[0];
4976 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4981 _c4dbgp(
"seqjson[RNXT]: expect next val");
4983 m_evt_handler->add_sibling();
4984 _line_progressed(1);
4989 _c4dbgp(
"seqjson[RNXT]: end!");
4991 _line_progressed(1);
4992 goto seqjson_finish;
5000 _c4dbgt(
"seqjson: go again", 0);
5001 if(_finished_line())
5003 if(C4_LIKELY(!_finished_file()))
5011 _c4err(
"missing terminating ]");
5017 _c4dbgp(
"seqjson: finish");
5023 template<
class EventHandler>
5024 void ParseEngine<EventHandler>::_handle_map_json()
5027 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5029 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5030 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5031 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5032 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5033 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
5035 _handle_flow_skip_whitespace();
5036 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5043 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5044 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5045 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5046 const char first = rem.str[0];
5047 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
5052 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
5053 ScannedScalar sc = _scan_scalar_dquot();
5054 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5055 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5061 _c4dbgp(
"mapjson[RKEY]: end!");
5063 _line_progressed(1);
5064 goto mapjson_finish;
5070 else if(has_any(
RVAL))
5072 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5073 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5074 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5075 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5076 const char first = rem.str[0];
5077 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
5082 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
5083 ScannedScalar sc = _scan_scalar_dquot();
5084 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5085 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5091 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
5093 m_evt_handler->begin_seq_val_flow();
5094 _set_indentation(m_evt_handler->m_parent->indref);
5096 _line_progressed(1);
5097 goto mapjson_finish;
5101 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
5103 m_evt_handler->begin_map_val_flow();
5104 _set_indentation(m_evt_handler->m_parent->indref);
5106 _line_progressed(1);
5113 if(_scan_scalar_map_json(&sc))
5115 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
5116 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5117 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5128 else if(has_any(
RKCL))
5130 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5131 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5132 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5133 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5134 const char first = rem.str[0];
5135 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
5138 _c4dbgp(
"mapjson[RKCL]: found the colon");
5140 _line_progressed(1);
5147 else if(has_any(
RNXT))
5149 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5150 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5151 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5152 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5153 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
5154 if(rem.begins_with(
','))
5156 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
5157 m_evt_handler->add_sibling();
5159 _line_progressed(1);
5161 else if(rem.begins_with(
'}'))
5163 _c4dbgp(
"mapjson[RNXT]: end!");
5165 _line_progressed(1);
5166 goto mapjson_finish;
5175 _c4dbgt(
"mapjson: go again", 0);
5176 if(_finished_line())
5178 if(C4_LIKELY(!_finished_file()))
5186 _c4err(
"missing terminating }");
5192 _c4dbgp(
"mapjson: finish");
5198 template<
class EventHandler>
5199 void ParseEngine<EventHandler>::_handle_seq_imap()
5202 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5204 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP), m_evt_handler->m_curr->pos);
5205 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5206 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL), m_evt_handler->m_curr->pos);
5207 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL), m_evt_handler->m_curr->pos);
5208 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5210 _handle_flow_skip_whitespace();
5211 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5217 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
5218 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5219 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5220 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5221 const char first = rem.str[0];
5222 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
5226 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
5227 sc = _scan_scalar_squot();
5228 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5229 _handle_annotations_before_blck_val_scalar();
5230 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5232 goto seqimap_finish;
5234 else if(first ==
'"')
5236 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
5237 sc = _scan_scalar_dquot();
5238 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5239 _handle_annotations_before_blck_val_scalar();
5240 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5242 goto seqimap_finish;
5245 else if(_scan_scalar_plain_map_flow(&sc))
5247 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
5248 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5249 _handle_annotations_before_blck_val_scalar();
5250 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5252 goto seqimap_finish;
5254 else if(first ==
'[')
5256 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
5258 _handle_annotations_before_blck_val_scalar();
5259 m_evt_handler->begin_seq_val_flow();
5261 _set_indentation(m_evt_handler->m_parent->indref);
5262 _line_progressed(1);
5263 goto seqimap_finish;
5265 else if(first ==
'{')
5267 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
5269 _handle_annotations_before_blck_val_scalar();
5270 m_evt_handler->begin_map_val_flow();
5272 _set_indentation(m_evt_handler->m_parent->indref);
5273 _line_progressed(1);
5274 goto seqimap_finish;
5276 else if(first ==
',' || first ==
']')
5278 _c4dbgp(
"seqimap[RVAL]: finish without val.");
5279 _handle_annotations_before_blck_val_scalar();
5280 m_evt_handler->set_val_scalar_plain_empty();
5282 goto seqimap_finish;
5284 else if(first ==
'*')
5286 csubstr ref = _scan_ref_seq();
5287 _c4dbgpf(
"seqimap[RVAL]: ref! {}", _prs(ref));
5288 _handle_valref(ref);
5291 else if(first ==
'&')
5293 csubstr anchor = _scan_anchor();
5294 _c4dbgpf(
"seqimap[RVAL]: anchor! {}", _prs(anchor));
5295 _add_annotation(&m_pending_anchors, anchor);
5297 else if(first ==
'!')
5299 csubstr tag = _scan_tag();
5300 _c4dbgpf(
"seqimap[RVAL]: tag! {}", _prs(tag));
5301 _add_annotation(&m_pending_tags, tag);
5308 else if(has_any(
RNXT))
5310 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5311 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5312 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5313 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5314 const char first = rem.str[0];
5315 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
5316 if(first ==
',' || first ==
']')
5320 _c4dbgp(
"seqimap: done");
5322 goto seqimap_finish;
5329 else if(has_any(
QMRK))
5331 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK), m_evt_handler->m_curr->pos);
5332 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5333 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5334 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5335 const char first = rem.str[0];
5336 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
5340 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
5341 sc = _scan_scalar_squot();
5342 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5343 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5347 else if(first ==
'"')
5349 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
5350 sc = _scan_scalar_dquot();
5351 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5352 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5357 else if(_scan_scalar_plain_map_flow(&sc))
5359 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
5360 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5361 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5365 else if(first ==
'[')
5367 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
5369 m_evt_handler->begin_seq_key_flow();
5371 _set_indentation(m_evt_handler->m_parent->indref);
5372 _line_progressed(1);
5373 goto seqimap_finish;
5375 else if(first ==
'{')
5377 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
5379 m_evt_handler->begin_map_key_flow();
5381 _set_indentation(m_evt_handler->m_parent->indref);
5382 _line_progressed(1);
5383 goto seqimap_finish;
5385 else if(first ==
',' || first ==
']')
5387 _c4dbgp(
"seqimap[QMRK]: finish without key.");
5388 m_evt_handler->set_key_scalar_plain_empty();
5389 m_evt_handler->set_val_scalar_plain_empty();
5391 goto seqimap_finish;
5393 else if(first ==
'&')
5395 csubstr anchor = _scan_anchor();
5396 _c4dbgp(
"seqimap[QMRK]: anchor!");
5397 m_evt_handler->set_key_anchor(anchor);
5399 else if(first ==
'*')
5401 csubstr ref = _scan_ref_seq();
5402 _c4dbgp(
"seqimap[QMRK]: ref!");
5403 _handle_keyref(ref);
5411 else if(has_any(
RKCL))
5413 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5414 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5415 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5416 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL), m_evt_handler->m_curr->pos);
5417 const char first = rem.str[0];
5418 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
5421 _c4dbgp(
"seqimap[RKCL]: found ':'");
5423 _line_progressed(1);
5426 else if(first ==
',' || first ==
']')
5428 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
5429 m_evt_handler->set_val_scalar_plain_empty();
5431 goto seqimap_finish;
5440 _c4dbgt(
"seqimap: go again", 0);
5441 if(_finished_line())
5443 if(C4_LIKELY(!_finished_file()))
5457 _c4dbgp(
"seqimap: finish");
5463 template<
class EventHandler>
5464 void ParseEngine<EventHandler>::_handle_seq_flow()
5467 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5469 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5470 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
5471 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5472 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5473 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
5474 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
5476 if(m_evt_handler->m_curr->at_line_beginning())
5478 _handle_flow_line_beginning();
5481 _handle_flow_skip_whitespace();
5482 if(!m_evt_handler->m_curr->line_contents.rem.len)
5487 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5488 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5492 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5493 sc = _scan_scalar_squot();
5494 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5495 _handle_annotations_before_blck_val_scalar();
5496 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5498 _mark_seqflow_val_end();
5500 else if(first ==
'"')
5502 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5503 sc = _scan_scalar_dquot();
5504 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5505 _handle_annotations_before_blck_val_scalar();
5506 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5508 _mark_seqflow_val_end();
5511 else if(_scan_scalar_plain_seq_flow(&sc))
5513 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5514 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5515 _handle_annotations_before_blck_val_scalar();
5516 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5518 _mark_seqflow_val_end();
5520 else if(first ==
'[')
5522 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5524 _handle_annotations_before_blck_val_scalar();
5525 m_evt_handler->begin_seq_val_flow();
5526 _set_indentation(m_evt_handler->m_parent->indref);
5528 _line_progressed(1);
5530 else if(first ==
'{')
5532 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5534 _handle_annotations_before_blck_val_scalar();
5535 m_evt_handler->begin_map_val_flow();
5536 _set_indentation(m_evt_handler->m_parent->indref);
5538 _line_progressed(1);
5539 goto seqflow_finish;
5541 else if(first ==
']')
5543 _c4dbgp(
"seqflow[RVAL]: end!");
5544 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5546 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5547 _handle_annotations_before_blck_val_scalar();
5548 m_evt_handler->set_val_scalar_plain_empty();
5550 _line_progressed(1);
5552 goto seqflow_finish;
5554 else if(first ==
'*')
5556 csubstr ref = _scan_ref_seq();
5557 _c4dbgpf(
"seqflow[RVAL]: ref! {}", _prs(ref));
5558 _handle_valref(ref);
5561 else if(first ==
'&')
5563 csubstr anchor = _scan_anchor();
5564 _c4dbgpf(
"seqflow[RVAL]: anchor! {}", _prs(anchor));
5565 _add_annotation(&m_pending_anchors, anchor);
5567 else if(first ==
'!')
5569 csubstr tag = _scan_tag();
5570 _c4dbgpf(
"seqflow[RVAL]: tag! {}", _prs(tag));
5571 _add_annotation(&m_pending_tags, tag);
5573 else if(first ==
':')
5575 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5577 m_evt_handler->begin_map_val_flow();
5578 _set_indentation(m_evt_handler->m_parent->indref);
5579 _handle_annotations_before_blck_key_scalar();
5580 m_evt_handler->set_key_scalar_plain_empty();
5582 _line_progressed(1);
5583 goto seqflow_finish;
5585 else if(first ==
'?')
5587 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5589 m_evt_handler->begin_map_val_flow();
5590 _set_indentation(m_evt_handler->m_parent->indref);
5592 _line_progressed(1);
5593 _maybe_skip_whitespace_tokens();
5594 goto seqflow_finish;
5596 else if(first ==
',')
5598 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5600 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5601 _handle_annotations_before_blck_val_scalar();
5602 m_evt_handler->set_val_scalar_plain_empty();
5604 _mark_seqflow_val_end();
5618 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5619 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5620 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5623 _c4dbgp(
"seqflow[RNXT]: expect next val");
5625 m_evt_handler->add_sibling();
5626 _line_progressed(1);
5627 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5629 _c4err(
"parse error: invalid comment after comma");
5631 _mark_seqflow_val_end();
5633 else if(first ==
']')
5635 _c4dbgp(
"seqflow[RNXT]: end!");
5636 _line_progressed(1);
5638 goto seqflow_finish;
5640 else if(first ==
':')
5642 _c4dbgpf(
"seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5643 if(m_prev_val_end !=
NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5645 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5646 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5647 _set_indentation(m_evt_handler->m_parent->indref);
5648 _line_progressed(1);
5650 goto seqflow_finish;
5664 _c4dbgt(
"seqflow: go again", 0);
5665 if(_finished_line())
5667 if(C4_LIKELY(!_finished_file()))
5675 _c4err(
"missing terminating ]");
5681 _c4dbgp(
"seqflow: finish");
5687 template<
class EventHandler>
5688 void ParseEngine<EventHandler>::_handle_map_flow()
5691 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5693 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5694 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5695 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
5696 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
5698 if(m_evt_handler->m_curr->at_line_beginning())
5700 _handle_flow_line_beginning();
5703 _handle_flow_skip_whitespace();
5704 if(!m_evt_handler->m_curr->line_contents.rem.len)
5709 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5710 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5711 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5712 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5713 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5714 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5718 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5719 sc = _scan_scalar_squot();
5720 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5721 _handle_annotations_before_blck_key_scalar();
5722 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5725 else if(first ==
'"')
5727 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5728 sc = _scan_scalar_dquot();
5729 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5730 _handle_annotations_before_blck_key_scalar();
5731 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5735 else if(_scan_scalar_plain_map_flow(&sc))
5737 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5738 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5739 _handle_annotations_before_blck_key_scalar();
5740 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5743 else if(first ==
'?')
5745 _c4dbgp(
"mapflow[RKEY]: explicit key");
5746 _handle_annotations_before_blck_key_scalar();
5748 _line_progressed(1);
5749 _maybe_skip_whitespace_tokens();
5751 else if(first ==
':')
5753 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5754 _handle_annotations_before_blck_key_scalar();
5755 m_evt_handler->set_key_scalar_plain_empty();
5757 _line_progressed(1);
5758 _maybe_skip_whitespace_tokens();
5760 else if(first ==
',')
5762 _c4dbgp(
"mapflow[RKEY]: comma!");
5763 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5764 _c4err(
"unexpected comma");
5768 else if(first ==
'}')
5770 _c4dbgp(
"mapflow[RKEY]: end!");
5771 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5772 _line_progressed(1);
5774 goto mapflow_finish;
5776 else if(first ==
'&')
5778 csubstr anchor = _scan_anchor();
5779 _c4dbgpf(
"mapflow[RKEY]: key anchor! {}", _prs(anchor));
5780 _add_annotation(&m_pending_anchors, anchor);
5782 else if(first ==
'!')
5784 csubstr tag = _scan_tag();
5785 _c4dbgpf(
"mapflow[RKEY]: tag! {}", _prs(tag));
5786 _add_annotation(&m_pending_tags, tag);
5788 else if(first ==
'*')
5790 csubstr ref = _scan_ref_map();
5791 _c4dbgpf(
"mapflow[RKEY]: key ref! {}", _prs(ref));
5792 _handle_keyref(ref);
5795 else if(first ==
'[')
5800 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5801 _handle_annotations_before_blck_key_scalar();
5803 m_evt_handler->begin_seq_key_flow();
5805 _set_indentation(m_evt_handler->m_parent->indref);
5806 _line_progressed(1);
5807 goto mapflow_finish;
5809 else if(first ==
'{')
5814 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5815 _handle_annotations_before_blck_key_scalar();
5817 m_evt_handler->begin_map_key_flow();
5819 _set_indentation(m_evt_handler->m_parent->indref);
5820 _line_progressed(1);
5828 else if(has_any(
RKCL))
5830 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5831 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5832 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5833 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5834 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5835 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5838 _c4dbgp(
"mapflow[RKCL]: found the colon");
5840 _line_progressed(1);
5842 else if(first ==
'}')
5844 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5846 m_evt_handler->set_val_scalar_plain_empty();
5847 _line_progressed(1);
5849 goto mapflow_finish;
5851 else if(first ==
',')
5853 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5854 m_evt_handler->set_val_scalar_plain_empty();
5855 m_evt_handler->add_sibling();
5857 _line_progressed(1);
5858 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5860 _c4err(
"parse error: invalid comment after comma");
5868 else if(has_any(
RVAL))
5870 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5871 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5872 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5873 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5874 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5875 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5879 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5880 sc = _scan_scalar_squot();
5881 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5882 _handle_annotations_before_blck_val_scalar();
5883 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5886 else if(first ==
'"')
5888 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5889 sc = _scan_scalar_dquot();
5890 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5891 _handle_annotations_before_blck_val_scalar();
5892 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5896 else if(_scan_scalar_plain_map_flow(&sc))
5898 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5899 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5900 _handle_annotations_before_blck_val_scalar();
5901 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5904 else if(first ==
'[')
5906 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5908 _handle_annotations_before_blck_val_scalar();
5909 m_evt_handler->begin_seq_val_flow();
5910 _set_indentation(m_evt_handler->m_parent->indref);
5912 _line_progressed(1);
5913 goto mapflow_finish;
5915 else if(first ==
'{')
5917 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5919 _handle_annotations_before_blck_val_scalar();
5920 m_evt_handler->begin_map_val_flow();
5921 _set_indentation(m_evt_handler->m_parent->indref);
5923 _line_progressed(1);
5926 else if(first ==
'}')
5928 _c4dbgp(
"mapflow[RVAL]: end!");
5929 _handle_annotations_before_blck_val_scalar();
5930 m_evt_handler->set_val_scalar_plain_empty();
5931 _line_progressed(1);
5933 goto mapflow_finish;
5935 else if(first ==
',')
5937 _c4dbgp(
"mapflow[RVAL]: empty val!");
5938 _handle_annotations_before_blck_val_scalar();
5939 m_evt_handler->set_val_scalar_plain_empty();
5943 else if(first ==
'*')
5945 csubstr ref = _scan_ref_map();
5946 _c4dbgpf(
"mapflow[RVAL]: key ref! {}", _prs(ref));
5947 _handle_valref(ref);
5950 else if(first ==
'&')
5952 csubstr anchor = _scan_anchor();
5953 _c4dbgpf(
"mapflow[RVAL]: key anchor! {}", _prs(anchor));
5954 _add_annotation(&m_pending_anchors, anchor);
5956 else if(first ==
'!')
5958 csubstr tag = _scan_tag();
5959 _c4dbgpf(
"mapflow[RVAL]: tag! {}", _prs(tag));
5960 _add_annotation(&m_pending_tags, tag);
5967 else if(has_any(
RNXT))
5969 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5970 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5971 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5972 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5973 _c4dbgpf(
"mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5974 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
','))
5976 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5977 m_evt_handler->add_sibling();
5979 _line_progressed(1);
5980 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5982 _c4err(
"parse error: invalid comment after comma");
5985 else if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'}'))
5987 _c4dbgp(
"mapflow[RNXT]: end!");
5988 _line_progressed(1);
5990 goto mapflow_finish;
5997 else if(has_any(
QMRK))
5999 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6000 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6001 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6002 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6003 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6004 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
6008 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
6009 sc = _scan_scalar_squot();
6010 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6011 _handle_annotations_before_blck_key_scalar();
6012 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6015 else if(first ==
'"')
6017 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
6018 sc = _scan_scalar_dquot();
6019 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6020 _handle_annotations_before_blck_key_scalar();
6021 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6025 else if(_scan_scalar_plain_map_flow(&sc))
6027 _c4dbgp(
"mapflow[QMRK]: plain scalar");
6028 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6029 _handle_annotations_before_blck_key_scalar();
6030 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6033 else if(first ==
':')
6035 _c4dbgp(
"mapflow[QMRK]: setting empty key");
6036 _handle_annotations_before_blck_key_scalar();
6037 m_evt_handler->set_key_scalar_plain_empty();
6039 _line_progressed(1);
6040 _maybe_skip_whitespace_tokens();
6042 else if(first ==
'}')
6044 _c4dbgp(
"mapflow[QMRK]: end!");
6045 _handle_annotations_before_blck_key_scalar();
6046 m_evt_handler->set_key_scalar_plain_empty();
6047 m_evt_handler->set_val_scalar_plain_empty();
6049 _line_progressed(1);
6050 goto mapflow_finish;
6052 else if(first ==
',')
6054 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
6055 _handle_annotations_before_blck_key_scalar();
6056 m_evt_handler->set_key_scalar_plain_empty();
6057 m_evt_handler->set_val_scalar_plain_empty();
6060 else if(first ==
'&')
6062 csubstr anchor = _scan_anchor();
6063 _c4dbgpf(
"mapflow[QMRK]: key anchor! {}", _prs(anchor));
6064 _add_annotation(&m_pending_anchors, anchor);
6066 else if(first ==
'*')
6068 csubstr ref = _scan_ref_map();
6069 _c4dbgpf(
"mapflow[QMRK]: key ref! {}", _prs(ref));
6070 _handle_keyref(ref);
6073 else if(first ==
'[')
6078 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
6080 _handle_annotations_before_blck_key_scalar();
6081 m_evt_handler->begin_seq_key_flow();
6083 _set_indentation(m_evt_handler->m_parent->indref);
6084 _line_progressed(1);
6085 goto mapflow_finish;
6087 else if(first ==
'{')
6092 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
6094 _handle_annotations_before_blck_key_scalar();
6095 m_evt_handler->begin_map_key_flow();
6096 _set_indentation(m_evt_handler->m_parent->indref);
6098 _line_progressed(1);
6101 else if(first ==
'!')
6103 csubstr tag = _scan_tag();
6104 _c4dbgpf(
"mapflow[QMRK]: tag! {}", _prs(tag));
6105 _add_annotation(&m_pending_tags, tag);
6114 _c4dbgt(
"mapflow: go again", 0);
6115 if(_finished_line())
6117 if(C4_LIKELY(!_finished_file()))
6125 _c4err(
"missing terminating }");
6131 _c4dbgp(
"mapflow: finish");
6137 template<
class EventHandler>
6138 void ParseEngine<EventHandler>::_handle_seq_block()
6141 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6143 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
6144 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6145 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
6146 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
6148 _maybe_skip_comment_strict();
6149 if(!m_evt_handler->m_curr->line_contents.rem.len)
6154 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6156 if(m_evt_handler->m_curr->at_line_beginning())
6158 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6159 if(m_evt_handler->m_curr->indentation_ge_extra())
6161 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6162 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6163 if(!m_evt_handler->m_curr->line_contents.rem.len)
6166 else if(m_evt_handler->m_curr->indentation_lt_extra())
6168 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6169 if(m_evt_handler->m_curr->indentation_eq())
6171 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6172 _handle_annotations_before_blck_val_scalar();
6173 m_evt_handler->set_val_scalar_plain_empty();
6179 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6180 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
6181 _handle_indentation_pop_from_block_seq();
6182 goto seqblck_finish;
6185 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6187 _c4dbgp(
"seqblck[RVAL]: empty line!");
6188 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6192 _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6193 const size_t startmark = _handle_block_skip_leading_whitespace();
6194 _c4dbgpf(
"seqblck[RVAL]: startmark={}", startmark);
6195 if(startmark ==
npos)
6197 _c4dbgp(
"seqblck[RVAL]: whitespace only");
6200 const size_t tabmark = _handle_block_get_whitespace_mark();
6201 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6202 _c4dbgpf(
"seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6203 const size_t startline = m_evt_handler->m_curr->pos.line;
6204 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6208 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
6209 sc = _scan_scalar_squot();
6210 if(!_maybe_scan_following_colon())
6212 _c4dbgp(
"seqblck[RVAL]: set as val");
6213 _handle_annotations_before_blck_val_scalar();
6214 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6215 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6220 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6221 _handle_block_check_leading_tabs(startmark);
6223 _handle_annotations_before_start_mapblck(startline);
6225 m_evt_handler->begin_map_val_block();
6226 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6227 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6228 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6230 _maybe_skip_whitespace_tokens();
6231 goto seqblck_finish;
6234 else if(first ==
'"')
6236 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
6237 sc = _scan_scalar_dquot();
6238 if(!_maybe_scan_following_colon())
6240 _c4dbgp(
"seqblck[RVAL]: set as val");
6241 _handle_annotations_before_blck_val_scalar();
6242 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6243 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6248 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6250 _handle_block_check_leading_tabs(startmark);
6251 _handle_annotations_before_start_mapblck(startline);
6253 m_evt_handler->begin_map_val_block();
6254 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6255 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6256 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6258 _maybe_skip_whitespace_tokens();
6259 goto seqblck_finish;
6265 else if(first ==
'|')
6267 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
6269 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6270 _handle_annotations_before_blck_val_scalar();
6271 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6272 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6275 else if(first ==
'>')
6277 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
6279 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6280 _handle_annotations_before_blck_val_scalar();
6281 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6282 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6285 else if(_scan_scalar_plain_seq_blck(&sc))
6287 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
6288 if(!_maybe_scan_following_colon())
6290 _c4dbgp(
"seqblck[RVAL]: set as val");
6291 _handle_annotations_before_blck_val_scalar();
6292 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6293 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6298 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6299 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6300 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6301 _handle_block_check_leading_tabs(startmark, tabmark);
6303 _handle_annotations_before_start_mapblck(startline);
6305 m_evt_handler->begin_map_val_block();
6306 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6307 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6308 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6310 _maybe_skip_whitespace_tokens();
6311 goto seqblck_finish;
6314 else if(first ==
'[')
6316 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
6318 _handle_annotations_before_blck_val_scalar();
6319 m_evt_handler->begin_seq_val_flow();
6321 _line_progressed(1);
6322 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6323 goto seqblck_finish;
6325 else if(first ==
'{')
6327 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
6329 _handle_annotations_before_blck_val_scalar();
6330 m_evt_handler->begin_map_val_flow();
6332 _line_progressed(1);
6333 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6334 goto seqblck_finish;
6336 else if(first ==
'-')
6338 _c4dbgp(
"seqblck[RVAL]: dash");
6339 _handle_block_check_leading_tabs(startmark);
6340 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6341 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6342 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
6343 _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6345 _handle_annotations_before_blck_val_scalar();
6346 m_evt_handler->begin_seq_val_block();
6348 _set_indentation(startindent);
6350 _line_progressed(1);
6352 else if(first ==
':')
6354 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
6356 _handle_annotations_before_start_mapblck(startline);
6358 m_evt_handler->begin_map_val_block();
6359 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6360 m_evt_handler->set_key_scalar_plain_empty();
6362 _line_progressed(1);
6363 _maybe_skip_whitespace_tokens();
6364 goto seqblck_finish;
6366 else if(first ==
'&')
6368 const csubstr anchor = _scan_anchor();
6369 _c4dbgpf(
"seqblck[RVAL]: anchor! {}", _prs(anchor));
6372 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6374 else if(first ==
'*')
6376 csubstr ref = _scan_ref_seq();
6377 _c4dbgpf(
"seqblck[RVAL]: ref! {}", _prs(ref));
6378 if(!_maybe_scan_following_colon())
6380 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
6381 _handle_valref(ref);
6386 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
6388 _handle_annotations_before_start_mapblck(startline);
6389 m_evt_handler->begin_map_val_block();
6390 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6391 _handle_keyref(ref);
6393 _set_indentation(startindent);
6394 _maybe_skip_whitespace_tokens();
6395 goto seqblck_finish;
6398 else if(first ==
'!')
6400 csubstr tag = _scan_tag();
6401 _c4dbgpf(
"seqblck[RVAL]: val tag! {}", _prs(tag));
6404 _add_annotation(&m_pending_tags, tag, startindent, startline);
6406 else if(first ==
'?')
6408 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
6410 m_evt_handler->begin_map_val_block();
6412 _set_indentation(startindent);
6413 _line_progressed(1);
6414 _maybe_skipchars(
' ');
6415 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6417 _c4dbgp(
"seqblck[RVAL]: seqblck starts after ?");
6419 m_evt_handler->begin_seq_key_block();
6421 _save_indentation();
6422 _line_progressed(1);
6423 _maybe_skipchars(
' ');
6425 goto seqblck_finish;
6434 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6435 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6439 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6440 if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6442 _c4dbgp(
"seqblck[RNXT]: at line begin");
6443 if(m_evt_handler->m_curr->indentation_ge())
6445 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6446 _line_progressed(m_evt_handler->m_curr->indref);
6447 if(!m_evt_handler->m_curr->line_contents.rem.len)
6450 else if(m_evt_handler->m_curr->indentation_lt())
6452 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
6453 _handle_indentation_pop_from_block_seq();
6456 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
6457 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6458 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6459 if(!m_evt_handler->m_curr->line_contents.rem.len)
6464 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
6465 goto seqblck_finish;
6468 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6470 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6471 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6472 if(!m_evt_handler->m_curr->line_contents.rem.len)
6478 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6479 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
6486 if(!m_evt_handler->m_curr->line_contents.rem.len)
6488 _c4dbgp(
"seqblck[RNXT]: again");
6496 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6497 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6498 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6501 if(m_evt_handler->m_curr->indref > 0
6502 || m_evt_handler->m_curr->line_contents.indentation > 0
6503 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6505 if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6507 _c4dbgp(
"seqblck[RNXT]: expect next val");
6509 m_evt_handler->add_sibling();
6510 _line_progressed(1);
6519 _c4dbgp(
"seqblck[RNXT]: start doc");
6520 _start_doc_suddenly();
6521 _line_progressed(3);
6522 _maybe_skip_whitespace_tokens();
6523 goto seqblck_finish;
6526 else if(first ==
':')
6532 if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags &
RMAP)))
6534 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6535 m_evt_handler->end_seq_block();
6536 goto seqblck_finish;
6543 else if(first ==
'.')
6545 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6546 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6548 _c4dbgp(
"seqblck[RNXT]: end doc");
6549 _end_doc_suddenly();
6550 _line_progressed(3);
6551 _maybe_skip_whitespace_tokens();
6552 _check_doc_end_tokens();
6553 goto seqblck_finish;
6564 _print_state_stack();
6566 if(m_evt_handler->m_parent
6567 && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent)
6568 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6570 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6571 _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6572 _handle_indentation_pop(m_evt_handler->m_parent);
6573 _RYML_ASSERT_PARSE_(this->callbacks(), has_all(
RMAP|
RBLCK), m_evt_handler->m_curr->pos);
6574 m_evt_handler->add_sibling();
6576 goto seqblck_finish;
6578 else if(first ==
'\t')
6580 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
'\t');
6583 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6592 _c4dbgt(
"seqblck: go again", 0);
6593 if(_finished_line())
6598 if(_finished_file())
6600 _c4dbgp(
"seqblck: finish!");
6602 goto seqblck_finish;
6609 _c4dbgp(
"seqblck: finish");
6615 template<
class EventHandler>
6616 void ParseEngine<EventHandler>::_handle_map_block()
6619 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6623 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
6624 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6625 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
6626 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
6628 _maybe_skip_comment();
6629 if(!m_evt_handler->m_curr->line_contents.rem.len)
6634 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6635 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6636 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6637 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6641 if(m_evt_handler->m_curr->at_line_beginning())
6643 if(m_evt_handler->m_curr->indentation_eq())
6645 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6646 _line_progressed(m_evt_handler->m_curr->indref);
6647 if(!m_evt_handler->m_curr->line_contents.rem.len)
6650 else if(m_evt_handler->m_curr->indentation_lt())
6652 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6653 _handle_indentation_pop_from_block_map();
6654 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6657 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6658 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY), m_evt_handler->m_curr->pos);
6659 if(!m_evt_handler->m_curr->line_contents.rem.len)
6664 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6665 goto mapblck_finish;
6670 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6671 _c4err(
"invalid indentation");
6677 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6678 const size_t startline = m_evt_handler->m_curr->pos.line;
6679 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6680 _c4dbgpf(
"mapblck[RKEY]: '{}'", _c4prc(first));
6684 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6685 sc = _scan_scalar_squot();
6686 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6687 _handle_annotations_before_blck_key_scalar();
6688 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6690 if(!_maybe_scan_following_colon())
6691 _c4err(
"could not find ':' colon after key");
6693 _maybe_skip_whitespace_tokens();
6695 else if(first ==
'"')
6697 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6698 sc = _scan_scalar_dquot();
6699 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6700 _handle_annotations_before_blck_key_scalar();
6701 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6703 if(!_maybe_scan_following_colon())
6704 _c4err(
"could not find ':' colon after key");
6706 _maybe_skip_whitespace_tokens();
6710 else if(C4_UNLIKELY(first ==
'|'))
6712 _c4err(
"block map: literal keys must be enclosed in '?'");
6714 else if(C4_UNLIKELY(first ==
'>'))
6716 _c4err(
"block map: folded keys must be enclosed in '?'");
6718 else if(_scan_scalar_plain_map_blck(&sc))
6720 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6721 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6722 _handle_annotations_before_blck_key_scalar();
6723 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6725 if(!_maybe_scan_following_colon())
6726 _c4err(
"could not find ':' colon after key");
6728 _maybe_skip_whitespace_tokens();
6730 else if(first ==
'?')
6732 _c4dbgp(
"mapblck[RKEY]: key token!");
6734 _line_progressed(1);
6735 _maybe_skipchars(
' ');
6736 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6738 _c4dbgp(
"mapblck[RKEY]: seqblck starts after ?");
6740 m_evt_handler->begin_seq_key_block();
6742 _save_indentation();
6743 _line_progressed(1);
6744 _maybe_skipchars(
' ');
6745 goto mapblck_finish;
6749 else if(first ==
':')
6751 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6752 _handle_annotations_before_blck_key_scalar();
6753 m_evt_handler->set_key_scalar_plain_empty();
6755 _line_progressed(1);
6757 _maybe_skip_whitespace_tokens();
6759 else if(first ==
'*')
6761 csubstr ref = _scan_ref_map();
6762 _c4dbgpf(
"mapblck[RKEY]: key ref! {}", _prs(ref));
6763 _handle_keyref(ref);
6765 if(!_maybe_scan_following_colon())
6766 _c4err(
"could not find ':' colon after key");
6768 _maybe_skip_whitespace_tokens();
6770 else if(first ==
'&')
6772 csubstr anchor = _scan_anchor();
6773 _c4dbgpf(
"mapblck[RKEY]: key anchor! {}", _prs(anchor));
6774 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6776 else if(first ==
'!')
6778 csubstr tag = _scan_tag();
6779 _c4dbgpf(
"mapblck[RKEY]: key tag! {}", _prs(tag));
6780 _add_annotation(&m_pending_tags, tag, startindent, startline);
6782 else if(first ==
'[')
6787 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6788 _handle_annotations_before_blck_key_scalar();
6789 m_evt_handler->begin_seq_key_flow();
6791 _line_progressed(1);
6792 _set_indentation(startindent);
6793 goto mapblck_finish;
6795 else if(first ==
'{')
6800 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6801 _handle_annotations_before_blck_key_scalar();
6802 m_evt_handler->begin_map_key_flow();
6804 _line_progressed(1);
6805 _set_indentation(startindent);
6806 goto mapblck_finish;
6808 else if(first ==
'-')
6810 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6811 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6813 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6814 _start_doc_suddenly();
6815 _line_progressed(3);
6816 _maybe_skip_whitespace_tokens();
6817 goto mapblck_finish;
6824 else if(first ==
'.')
6826 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6827 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6829 _c4dbgp(
"mapblck[RKEY]: end doc");
6830 _end_doc_suddenly();
6831 _line_progressed(3);
6832 _maybe_skip_whitespace_tokens();
6833 _check_doc_end_tokens();
6834 goto mapblck_finish;
6846 else if(has_any(
RVAL))
6848 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6849 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6850 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6851 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6855 if(m_evt_handler->m_curr->at_line_beginning())
6857 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6858 m_evt_handler->m_curr->more_indented =
false;
6859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6860 if(m_evt_handler->m_curr->indentation_eq_extra())
6862 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6863 _line_progressed(m_evt_handler->m_curr->indref + 1);
6864 if(!m_evt_handler->m_curr->line_contents.rem.len)
6867 else if(m_evt_handler->m_curr->indentation_gt_extra())
6869 _c4dbgp(
"mapblck[RVAL]: more indented!");
6870 m_evt_handler->m_curr->more_indented =
true;
6871 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6872 if(!m_evt_handler->m_curr->line_contents.rem.len)
6875 else if(m_evt_handler->m_curr->indentation_lt_extra())
6877 if(m_evt_handler->m_curr->indentation_eq())
6879 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6881 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6883 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6884 _handle_annotations_before_blck_val_scalar();
6885 m_evt_handler->set_val_scalar_plain_empty();
6892 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RKEY!");
6893 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6894 _handle_indentation_pop_from_block_map();
6897 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6898 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6901 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6902 m_evt_handler->add_sibling();
6909 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6910 goto mapblck_finish;
6915 const size_t startcol = _handle_block_skip_leading_whitespace();
6916 if(startcol ==
npos)
6918 _c4dbgp(
"mapblck[RVAL]: whitespace only");
6921 const size_t tabmark = _handle_block_get_whitespace_mark();
6925 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6926 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6927 const size_t startline = m_evt_handler->m_curr->pos.line;
6928 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6929 _c4dbgpf(
"mapblck[RVAL]: '{}'", _c4prc(first));
6933 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6934 sc = _scan_scalar_squot();
6935 if(!_maybe_scan_following_colon())
6937 _c4dbgp(
"mapblck[RVAL]: set as val");
6938 _handle_annotations_before_blck_val_scalar();
6939 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6940 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6946 _c4assert(startindent > m_evt_handler->m_curr->indref);
6947 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6948 _handle_block_check_leading_tabs(startcol);
6949 _handle_annotations_before_start_mapblck(startline);
6952 m_evt_handler->begin_map_val_block();
6953 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6954 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6955 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6956 _maybe_skip_whitespace_tokens();
6961 else if(first ==
'"')
6963 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6964 sc = _scan_scalar_dquot();
6965 if(!_maybe_scan_following_colon())
6967 _c4dbgp(
"mapblck[RVAL]: set as val");
6968 _handle_annotations_before_blck_val_scalar();
6969 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6970 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6976 _c4assert(startindent > m_evt_handler->m_curr->indref);
6977 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6978 _handle_block_check_leading_tabs(startcol);
6979 _handle_annotations_before_start_mapblck(startline);
6982 m_evt_handler->begin_map_val_block();
6983 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6984 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6985 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6986 _maybe_skip_whitespace_tokens();
6993 else if(first ==
'|')
6995 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6997 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6998 _handle_annotations_before_blck_val_scalar();
6999 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7000 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7003 else if(first ==
'>')
7005 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
7007 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7008 _handle_annotations_before_blck_val_scalar();
7009 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7010 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7013 else if(_scan_scalar_plain_map_blck(&sc))
7015 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
7016 if(!_maybe_scan_following_colon())
7018 _c4dbgp(
"mapblck[RVAL]: set as val");
7019 _handle_annotations_before_blck_val_scalar();
7020 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
7021 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7027 _c4assert(startindent > m_evt_handler->m_curr->indref);
7028 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7029 _handle_block_check_leading_tabs(startcol, tabmark);
7031 _handle_annotations_before_start_mapblck(startline);
7033 m_evt_handler->begin_map_val_block();
7034 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7035 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7036 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7037 _maybe_skip_whitespace_tokens();
7042 else if(first ==
'-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7044 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7046 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
7047 _handle_block_check_leading_tabs(startcol);
7049 _handle_annotations_before_blck_val_scalar();
7050 m_evt_handler->begin_seq_val_block();
7052 _set_indentation(startindent);
7053 _line_progressed(1);
7054 _maybe_skip_whitespace_tokens();
7055 goto mapblck_finish;
7057 else if(first ==
'[')
7059 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
7061 _handle_annotations_before_blck_val_scalar();
7062 m_evt_handler->begin_seq_val_flow();
7064 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7065 _line_progressed(1);
7066 goto mapblck_finish;
7068 else if(first ==
'{')
7070 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
7072 _handle_annotations_before_blck_val_scalar();
7073 m_evt_handler->begin_map_val_flow();
7075 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7076 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7077 _line_progressed(1);
7078 goto mapblck_finish;
7080 else if(first ==
'*')
7082 csubstr ref = _scan_ref_map();
7083 _c4dbgpf(
"mapblck[RVAL]: ref! {}", _prs(ref));
7084 if(_maybe_scan_following_colon())
7086 _c4dbgp(
"mapblck[RVAL]: start child map, block");
7088 _handle_annotations_before_blck_val_scalar();
7089 m_evt_handler->begin_map_val_block();
7090 _handle_keyref(ref);
7091 _set_indentation(startindent);
7097 _c4dbgp(
"mapblck[RVAL]: was val ref");
7098 _handle_valref(ref);
7101 _maybe_skip_whitespace_tokens();
7103 else if(first ==
'&')
7105 csubstr anchor = _scan_anchor();
7106 _c4dbgpf(
"mapblck[RVAL]: anchor! {}", _prs(anchor));
7109 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7111 else if(first ==
'!')
7113 csubstr tag = _scan_tag();
7114 _c4dbgpf(
"mapblck[RVAL]: tag! {}", _prs(tag));
7117 _add_annotation(&m_pending_tags, tag, startindent, startline);
7119 else if(first ==
'?')
7121 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7123 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7125 _handle_annotations_before_blck_val_scalar();
7126 m_evt_handler->begin_map_val_block();
7128 _set_indentation(startindent);
7129 _line_progressed(1);
7130 _maybe_skipchars(
' ');
7131 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7133 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7135 m_evt_handler->begin_seq_key_block();
7137 _save_indentation();
7138 _line_progressed(1);
7139 _maybe_skipchars(
' ');
7140 goto mapblck_finish;
7144 else if(first ==
':')
7146 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7148 _handle_annotations_before_start_mapblck(startline);
7150 m_evt_handler->begin_map_val_block();
7151 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7152 m_evt_handler->set_key_scalar_plain_empty();
7155 _line_progressed(1);
7156 _maybe_skip_whitespace_tokens();
7164 else if(has_any(
RNXT))
7166 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7167 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7168 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7169 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7173 if(m_evt_handler->m_curr->at_line_beginning())
7175 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7176 if(m_evt_handler->m_curr->indentation_eq())
7178 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7179 _line_progressed(m_evt_handler->m_curr->indref);
7180 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7181 m_evt_handler->add_sibling();
7185 else if(m_evt_handler->m_curr->indentation_lt())
7187 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
7188 _handle_indentation_pop_from_block_map();
7191 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7194 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7195 m_evt_handler->add_sibling();
7202 goto mapblck_finish;
7208 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
7209 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
7216 if(!m_evt_handler->m_curr->line_contents.rem.len)
7218 _c4dbgp(
"seqblck[RNXT]: again");
7226 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7227 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7228 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
7231 _c4dbgp(
"mapblck[RNXT]: skip spaces");
7232 _maybe_skip_whitespace_tokens();
7239 else if(has_any(
QMRK))
7241 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7242 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7243 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7244 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7245 if(_handle_map_block_qmrk())
7248 goto mapblck_finish;
7250 else if(has_any(
RKCL))
7252 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7253 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7254 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7255 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7256 if(_handle_map_block_rkcl())
7259 goto mapblck_finish;
7263 _c4dbgt(
"mapblck: again", 0);
7264 if(_finished_line())
7268 if(_finished_file())
7270 _c4dbgp(
"mapblck: file finished!");
7272 goto mapblck_finish;
7279 _c4dbgp(
"mapblck: finish");
7286 template<
class EventHandler>
7287 bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7292 if(m_evt_handler->m_curr->at_line_beginning())
7294 _c4dbgpf(
"mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7295 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos, m_evt_handler->m_curr->pos);
7296 if(m_evt_handler->m_curr->indentation_eq_extra())
7298 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7299 _line_progressed(m_evt_handler->m_curr->indref + 1);
7300 if(!m_evt_handler->m_curr->line_contents.rem.len)
7304 else if(m_evt_handler->m_curr->indentation_gt_extra())
7306 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7307 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7308 if(!m_evt_handler->m_curr->line_contents.rem.len)
7313 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7314 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7315 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7316 if(m_evt_handler->m_curr->indentation_eq()
7318 && m_evt_handler->m_curr->line_contents.rem.str[0] !=
'-')
7320 _c4dbgp(
"mapblck[QMRK]: QMRK finished!");
7321 _handle_annotations_before_blck_key_scalar();
7322 m_evt_handler->set_key_scalar_plain_empty();
7326 else if(m_evt_handler->m_curr->indentation_lt())
7328 _c4dbgp(
"mapblck[QMRK]: indentation pop!");
7329 _handle_indentation_pop_from_block_map();
7330 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7333 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7338 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7347 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7348 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7349 const size_t startline = m_evt_handler->m_curr->pos.line;
7350 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7351 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7355 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7356 sc = _scan_scalar_squot();
7357 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7359 if(!_maybe_scan_following_colon())
7361 _c4dbgp(
"mapblck[QMRK]: set as key");
7362 _handle_annotations_before_blck_key_scalar();
7363 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7367 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7368 _handle_annotations_before_start_mapblck_as_key();
7369 m_evt_handler->begin_map_key_block();
7370 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7371 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7372 _maybe_skip_whitespace_tokens();
7373 _set_indentation(startindent);
7378 else if(first ==
'"')
7380 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7381 sc = _scan_scalar_dquot();
7382 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7384 if(!_maybe_scan_following_colon())
7386 _c4dbgp(
"mapblck[QMRK]: set as key");
7387 _handle_annotations_before_blck_key_scalar();
7388 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7392 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7393 _handle_annotations_before_start_mapblck_as_key();
7394 m_evt_handler->begin_map_key_block();
7395 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7396 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7397 _maybe_skip_whitespace_tokens();
7398 _set_indentation(startindent);
7403 else if(first ==
'|')
7405 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7407 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7408 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7409 _handle_annotations_before_blck_key_scalar();
7410 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7413 else if(first ==
'>')
7415 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7417 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7418 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7419 _handle_annotations_before_blck_key_scalar();
7420 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7423 else if(_scan_scalar_plain_map_blck(&sc))
7425 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7426 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7428 if(!_maybe_scan_following_colon())
7430 _c4dbgp(
"mapblck[QMRK]: set as key");
7431 _handle_annotations_before_blck_key_scalar();
7432 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7436 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7437 _handle_annotations_before_start_mapblck_as_key();
7438 m_evt_handler->begin_map_key_block();
7439 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7440 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7441 _maybe_skip_whitespace_tokens();
7442 _set_indentation(startindent);
7447 else if(first ==
':')
7449 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7451 _handle_annotations_before_start_mapblck_as_key();
7452 m_evt_handler->begin_map_key_block();
7453 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7454 m_evt_handler->set_key_scalar_plain_empty();
7455 _line_progressed(1);
7456 _maybe_skip_whitespace_tokens();
7457 _set_indentation(startindent);
7461 else if(first ==
'*')
7463 csubstr ref = _scan_ref_map();
7464 _c4dbgpf(
"mapblck[QMRK]: key ref! {}", _prs(ref));
7466 if(!_maybe_scan_following_colon())
7468 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7469 _handle_keyref(ref);
7473 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7474 _handle_annotations_before_start_mapblck_as_key();
7475 m_evt_handler->begin_map_key_block();
7476 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7477 _handle_keyref(ref);
7478 _set_indentation(startindent);
7482 _maybe_skip_whitespace_tokens();
7484 else if(first ==
'&')
7486 csubstr anchor = _scan_anchor();
7487 _c4dbgpf(
"mapblck[QMRK]: key anchor! {}", _prs(anchor));
7488 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7490 else if(first ==
'!')
7492 csubstr tag = _scan_tag();
7493 _c4dbgpf(
"mapblck[QMRK]: key tag! {}", _prs(tag));
7494 _add_annotation(&m_pending_tags, tag, startindent, startline);
7496 else if(first ==
'-')
7498 _c4dbgp(
"mapblck[QMRK]: maybe seq or doc?");
7499 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7501 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7503 _handle_annotations_before_blck_key_scalar();
7504 m_evt_handler->begin_seq_key_block();
7506 _set_indentation(startindent);
7507 _line_progressed(1);
7511 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7512 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7513 _start_doc_suddenly();
7514 _line_progressed(3);
7516 _maybe_skip_whitespace_tokens();
7519 else if(first ==
'[')
7521 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7523 _handle_annotations_before_blck_key_scalar();
7524 m_evt_handler->begin_seq_key_flow();
7526 _set_indentation(m_evt_handler->m_parent->indref + 1);
7527 _line_progressed(1);
7530 else if(first ==
'{')
7532 _c4dbgp(
"mapblck[QMRK]: start child mapflow (!)");
7534 _handle_annotations_before_blck_key_scalar();
7535 m_evt_handler->begin_map_key_flow();
7537 _set_indentation(m_evt_handler->m_parent->indref + 1);
7538 _line_progressed(1);
7541 else if(first ==
'?')
7543 _c4dbgpf(
"mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7544 _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7545 _c4dbgp(
"mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7547 _handle_annotations_before_blck_key_scalar();
7548 m_evt_handler->begin_map_key_block();
7550 _set_indentation(startindent);
7552 _line_progressed(1);
7553 _maybe_skipchars(
' ');
7554 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7556 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7558 m_evt_handler->begin_seq_key_block();
7560 _save_indentation();
7561 _line_progressed(1);
7562 _maybe_skipchars(
' ');
7577 template<
class EventHandler>
7578 bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7583 if(m_evt_handler->m_curr->at_line_beginning())
7585 if(m_evt_handler->m_curr->indentation_eq())
7587 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7588 _line_progressed(m_evt_handler->m_curr->indref);
7589 if(!m_evt_handler->m_curr->line_contents.rem.len)
7592 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7594 _c4err(
"invalid indentation");
7597 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7598 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
7601 _c4dbgp(
"mapblck[RKCL]: found the colon");
7602 _line_progressed(1);
7603 _maybe_skipchars(
' ');
7604 #if defined(__GNUC__) && (__GNUC__ >= 12) \
7605 && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))
7606 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7609 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7616 _c4dbgp(
"mapblck[RKCL]: start val seqblck");
7618 m_evt_handler->begin_seq_val_block();
7620 _save_indentation();
7621 _line_progressed(1);
7622 _maybe_skipchars(
' ');
7626 else if(first ==
'?')
7628 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
7629 m_evt_handler->set_val_scalar_plain_empty();
7630 m_evt_handler->add_sibling();
7632 _line_progressed(1);
7633 _maybe_skipchars(
' ');
7634 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7636 _c4dbgp(
"mapblck[RKCL]: seqblck starts after ?");
7638 m_evt_handler->begin_seq_key_block();
7640 _save_indentation();
7641 _line_progressed(1);
7642 _maybe_skipchars(
' ');
7646 else if(first ==
'-')
7648 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7650 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7651 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7652 _start_doc_suddenly();
7653 _line_progressed(3);
7654 _maybe_skip_whitespace_tokens();
7662 else if(first ==
'.')
7664 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
7665 csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7666 if(rs ==
".." || rs.begins_with(
".. "))
7668 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7669 _end_doc_suddenly();
7670 _line_progressed(3);
7671 _maybe_skip_whitespace_tokens();
7672 _check_doc_end_tokens();
7682 _c4dbgp(
"mapblck[RKCL]: missing :");
7683 if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7685 m_evt_handler->set_val_scalar_plain_empty();
7686 m_evt_handler->add_sibling();
7695 template<
class EventHandler>
7696 void ParseEngine<EventHandler>::_handle_unk_json()
7698 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7700 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7701 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7703 _maybe_skip_comment();
7704 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7708 size_t pos = rem.first_not_of(
" \t");
7711 pos = pos !=
npos ? pos : rem.len;
7712 _c4dbgpf(
"skipping indentation of {}", pos);
7713 _line_progressed(pos);
7714 rem = m_evt_handler->m_curr->line_contents.rem;
7717 _c4dbgpf(
"rem is now {}", _prs(rem));
7720 if(rem.begins_with(
'['))
7722 _c4dbgp(
"it's a seq");
7723 _check_trailing_doc_token();
7725 m_evt_handler->begin_seq_val_flow();
7727 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7728 m_doc_empty =
false;
7729 _line_progressed(1);
7731 else if(rem.begins_with(
'{'))
7733 _c4dbgp(
"it's a map");
7734 _check_trailing_doc_token();
7736 m_evt_handler->begin_map_val_flow();
7738 m_doc_empty =
false;
7739 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7740 _line_progressed(1);
7742 else if(_handle_bom())
7744 _c4dbgp(
"byte order mark");
7748 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
7749 _maybe_skip_whitespace_tokens();
7750 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7753 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7754 const char first = s.str[0];
7758 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7759 _check_trailing_doc_token();
7762 m_doc_empty =
false;
7763 sc = _scan_scalar_dquot();
7764 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7765 if(!_maybe_scan_following_colon())
7767 _c4dbgp(
"runk_json: set as val");
7768 _handle_annotations_before_blck_val_scalar();
7769 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7776 else if(_scan_scalar_plain_unk(&sc))
7778 _c4dbgp(
"runk_json: got a plain scalar");
7779 _check_trailing_doc_token();
7782 m_doc_empty =
false;
7783 if(!_maybe_scan_following_colon())
7785 _c4dbgp(
"runk_json: set as val");
7786 _handle_annotations_before_blck_val_scalar();
7787 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7788 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7805 template<
class EventHandler>
7806 void ParseEngine<EventHandler>::_handle_unk()
7808 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7810 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7811 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7813 _maybe_skipchars(
' ');
7814 _maybe_skip_comment();
7816 if(!m_evt_handler->m_curr->line_contents.rem.len)
7819 _c4dbgpf(
"runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7821 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7823 _c4dbgpf(
"runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7824 _c4dbgp(
"runk: check BOM");
7827 m_bom_line = m_evt_handler->m_curr->pos.line;
7828 _c4dbgpf(
"runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7831 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7832 _c4dbgpf(
"runk: rtop: first={}", _c4prc(first));
7835 _c4dbgp(
"runk: rtop: suspecting doc");
7836 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7838 _c4dbgp(
"runk: rtop: begin doc");
7841 _set_indentation(0);
7843 _line_progressed(3u);
7844 _maybe_skip_whitespace_tokens();
7848 else if(first ==
'.')
7850 _c4dbgp(
"runk: rtop: suspecting doc end");
7851 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7853 _c4dbgp(
"runk: rtop: end doc");
7860 _c4dbgp(
"runk: rtop: ignore end doc");
7863 _line_progressed(3u);
7864 _maybe_skip_whitespace_tokens();
7865 _check_doc_end_tokens();
7869 else if(first ==
'%')
7871 _c4dbgpf(
"directive: {}", m_evt_handler->m_curr->line_contents.rem);
7872 if(C4_UNLIKELY(has_any(
RDOC) || (!m_doc_empty && has_none(
NDOC))))
7873 _c4err(
"need document footer before directives");
7874 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7881 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7882 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7885 _c4dbgpf(
"runk: prev BOMlen={}", m_bom_len);
7886 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7888 _c4dbgpf(
"runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7889 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7890 remindent -= m_bom_len;
7898 size_t startcol = _handle_block_skip_leading_whitespace();
7899 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7903 _c4dbgp(
"runk: flow seq?");
7904 _handle_unk_begin_doc();
7905 if(C4_LIKELY( ! _annotations_require_key_container()))
7907 _c4dbgp(
"runk: it's a seq, flow");
7908 _handle_annotations_before_blck_val_scalar();
7909 m_evt_handler->begin_seq_val_flow();
7911 _set_indentation(0);
7915 _c4dbgp(
"runk: start new block map, set flow seq as key (!)");
7916 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7917 m_evt_handler->begin_map_val_block();
7919 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7920 m_evt_handler->begin_seq_key_flow();
7922 _set_indentation(0);
7924 _line_progressed(1);
7926 else if(first ==
'{')
7928 _c4dbgp(
"runk: flow map?");
7929 _handle_unk_begin_doc();
7930 if(C4_LIKELY( ! _annotations_require_key_container()))
7932 _c4dbgp(
"runk: it's a map, flow");
7933 _handle_annotations_before_blck_val_scalar();
7934 m_evt_handler->begin_map_val_flow();
7936 _set_indentation(0);
7940 _c4dbgp(
"runk: start new block map, set flow map as key (!)");
7941 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7942 m_evt_handler->begin_map_val_block();
7944 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7945 m_evt_handler->begin_map_key_flow();
7947 _set_indentation(0);
7949 _line_progressed(1);
7951 else if(first ==
'-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7953 _c4dbgp(
"runk: it's a seq, block");
7954 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7955 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7956 _handle_unk_begin_doc();
7957 _handle_annotations_before_blck_val_scalar();
7958 m_evt_handler->begin_seq_val_block();
7960 _set_indentation(startindent);
7961 _line_progressed(1);
7962 _maybe_skipchars(
' ');
7964 else if(first ==
'?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7966 _c4dbgp(
"runk: it's a map + this key is complex");
7967 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7968 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7969 _handle_block_check_leading_tabs(startcol);
7970 _handle_unk_begin_doc();
7971 _handle_annotations_before_blck_val_scalar();
7972 m_evt_handler->begin_map_val_block();
7974 _set_indentation(startindent);
7975 _line_progressed(1);
7976 _maybe_skipchars(
' ');
7977 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7979 _c4dbgp(
"runk: seqblck key starts after ?");
7981 m_evt_handler->begin_seq_key_block();
7983 _save_indentation();
7984 _line_progressed(1);
7985 _maybe_skipchars(
' ');
7988 else if(first ==
':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7990 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7992 _c4dbgp(
"runk: it's a map with an empty key");
7993 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7994 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7995 _handle_block_check_leading_tabs(startcol);
7996 const size_t startline = m_evt_handler->m_curr->pos.line;
7997 _handle_unk_begin_doc();
7998 _handle_annotations_before_start_mapblck(startline);
8000 m_evt_handler->begin_map_val_block();
8001 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8002 m_evt_handler->set_key_scalar_plain_empty();
8003 _set_indentation(startindent);
8007 _c4err(
"block colon cannot occur on a new line unless ? is used");
8010 _line_progressed(1);
8011 _maybe_skip_whitespace_tokens();
8013 else if(first ==
'&')
8015 csubstr anchor = _scan_anchor();
8016 _c4dbgpf(
"anchor! {}", _prs(anchor));
8017 const size_t line = m_evt_handler->m_curr->pos.line;
8018 _handle_unk_begin_doc();
8019 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8020 _set_indentation(0);
8022 else if(first ==
'*')
8024 csubstr ref = _scan_ref_map();
8025 _c4dbgpf(
"runk: ref! {}", _prs(ref));
8026 _handle_unk_begin_doc();
8027 if(!_maybe_scan_following_colon())
8029 _c4dbgp(
"runk: set val ref");
8030 _handle_valref(ref);
8034 _c4dbgp(
"runk: start new block map, set ref as key");
8035 _handle_block_check_leading_tabs(startcol);
8036 const size_t startline = m_evt_handler->m_curr->pos.line;
8037 _handle_annotations_before_start_mapblck(startline);
8038 m_evt_handler->begin_map_val_block();
8039 _handle_keyref(ref);
8040 _maybe_skip_whitespace_tokens();
8041 _set_indentation(0);
8045 else if(first ==
'!')
8048 csubstr tag = _scan_tag(&tag_orig);
8049 _c4dbgpf(
"runk: val tag! {}", _prs(tag));
8052 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8053 const size_t line = m_evt_handler->m_curr->pos.line;
8054 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8058 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8059 const size_t startscalar = _handle_block_get_whitespace_mark();
8060 const size_t startline = m_evt_handler->m_curr->pos.line;
8061 auto beginmap = [&](
size_t startindent_){
8062 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8063 _c4err(
"multiline scalars cannot be used as implicit keys");
8064 _handle_block_check_leading_tabs(startcol, startscalar);
8065 _handle_annotations_before_start_mapblck(startline);
8067 m_evt_handler->begin_map_val_block();
8068 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8070 auto after_beginmap = [&](
size_t startindent_){
8071 _maybe_skip_whitespace_tokens();
8072 _set_indentation(startindent_);
8077 _c4dbgp(
"runk: block-literal scalar");
8078 _handle_unk_begin_doc();
8080 _scan_block(&sb, startindent);
8081 _handle_annotations_before_blck_val_scalar();
8082 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8083 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8085 else if(first ==
'>')
8087 _c4dbgp(
"runk: block-folded scalar");
8088 _handle_unk_begin_doc();
8090 _scan_block(&sb, startindent);
8091 _handle_annotations_before_blck_val_scalar();
8092 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8093 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8095 else if(first ==
'\'')
8097 _c4dbgp(
"runk: single-quoted scalar");
8098 _handle_unk_begin_doc();
8099 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8100 size_t col = m_evt_handler->m_curr->pos.col;
8101 ScannedScalar sc = _scan_scalar_squot();
8102 if(!_maybe_scan_following_colon())
8104 _c4dbgp(
"runk: set as val");
8105 _handle_annotations_before_blck_val_scalar();
8106 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8107 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8111 _c4dbgp(
"runk: start new block map, set single-quoted scalar as key");
8113 startindent = _handle_unk_check_left_tokens(startindent, col);
8114 beginmap(startindent);
8115 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8116 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8117 after_beginmap(startindent);
8120 else if(first ==
'"')
8122 _c4dbgp(
"runk: double-quoted scalar");
8123 _handle_unk_begin_doc();
8124 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8125 size_t col = m_evt_handler->m_curr->pos.col;
8126 ScannedScalar sc = _scan_scalar_dquot();
8127 if(!_maybe_scan_following_colon())
8129 _c4dbgp(
"runk: set as val");
8130 _handle_annotations_before_blck_val_scalar();
8131 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8132 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8136 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
8138 startindent = _handle_unk_check_left_tokens(startindent, col);
8139 beginmap(startindent);
8140 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8141 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8142 after_beginmap(startindent);
8147 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8148 size_t col = m_evt_handler->m_curr->pos.col;
8150 if(_scan_scalar_plain_unk(&sc))
8152 _c4dbgp(
"runk: plain scalar");
8153 _handle_unk_begin_doc();
8154 if(!_maybe_scan_following_colon())
8156 _c4dbgp(
"runk: set as val");
8157 _handle_annotations_before_blck_val_scalar();
8158 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8159 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8163 _c4dbgp(
"runk: start new block map, set plain scalar as key");
8165 startindent = _handle_unk_check_left_tokens(startindent, col);
8166 beginmap(startindent);
8167 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8168 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8169 after_beginmap(startindent);
8180 template<
class EventHandler>
8181 void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8183 _c4dbgp(
"runk: begin doc");
8184 _check_trailing_doc_token();
8187 m_doc_empty =
false;
8190 template<
class EventHandler>
8191 size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(
size_t realindent,
size_t col,
bool skip_annotations)
8196 csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8198 _c4dbgpf(
"runk: check left tokens: s={}", _prs(s,
true));
8199 if(skip_annotations)
8201 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8202 _c4dbgpf(
"runk: skip annotations: realindent={} pos={}", realindent, pos);
8204 size_t firstns = s.first_not_of(
' ', pos);
8207 _c4dbgpf(
"runk: check left tokens:\n"
8208 " tokens={} skipped={}\n"
8209 " bomlen={} first={} col={}\n"
8210 " (bomlen+first)={} vs {}=col\n"
8211 " startindent={} lineindent={}"
8212 , _prs(s,
true), _prs(s.sub(firstns),
true)
8213 , m_bom_len, firstns, col
8214 , m_bom_len+firstns, col,
8215 realindent, m_evt_handler->m_curr->line_contents.indentation);
8216 if(m_bom_len + firstns != col)
8218 if(!skip_annotations)
8219 realindent = firstns;
8220 _c4dbgpf(
"runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8226 template<
class EventHandler>
8227 void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s,
size_t *indent,
size_t *first_non_token_pos)
8229 csubstr first, second;
8230 uint32_t total = _get_annotations_same_line(s, &first, &second);
8231 _c4dbgpf(
"runk: before skip: {}", _prs(s,
true));
8232 size_t pos = s.first_not_of(
" \t");
8237 *indent = *first_non_token_pos = pos;
8240 _c4assert(!s.sub(pos).begins_with_any(
" \t"));
8241 _c4dbgpf(
"runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos),
true));
8242 _c4dbgpf(
"runk: first annotation: {}", first);
8246 _c4assert(s.sub(pos).begins_with(first));
8249 _c4dbgpf(
"runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos),
true));
8252 _c4dbgpf(
"runk: second annotation: {}", second);
8257 csubstr spos = s.sub(pos);
8258 size_t more = spos.first_not_of(
" \t");
8260 _c4dbgpf(
"runk: next nonspace: {}", pos + more);
8262 _c4dbgpf(
"runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos),
true));
8263 _c4assert(s.sub(pos).begins_with(second));
8265 _c4dbgpf(
"runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos),
true));
8267 *first_non_token_pos = pos;
8271 template<
class EventHandler>
8272 uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_)
const
8274 _c4assert(!m_evt_handler->m_curr->at_first_token());
8276 using EntryPtr =
typename Annotation::Entry
const* C4_RESTRICT;
8277 EntryPtr first =
nullptr;
8278 EntryPtr second =
nullptr;
8279 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8282 _c4dbgpf(
"there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8283 auto valid_if_same_line = [
this](EntryPtr entry){
8284 _c4dbgpf(
"pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8285 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry :
nullptr;
8289 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8290 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8291 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8292 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8293 _c4dbgpf(
"{} annotations on same line", total);
8298 auto get_first_on_same_line = [
this](EntryPtr not_this_one){
8299 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8300 if(&m_pending_anchors.annotations[i] != not_this_one
8301 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8302 return &m_pending_anchors.annotations[i];
8303 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8304 if(&m_pending_tags.annotations[i] != not_this_one
8305 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8306 return &m_pending_tags.annotations[i];
8308 return (EntryPtr)
nullptr;
8312 first = get_first_on_same_line(
nullptr);
8314 _c4dbgpf(
"first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8319 second = get_first_on_same_line(first);
8321 _c4dbgpf(
"second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8323 auto extract_string = [&](EntryPtr e){
8325 if(!e->str.str || e->str.begins_with_any(
"!<"))
8327 csubstr tag = e->orig;
8331 _c4dbgpf(
"tag: {} -> {}", _maybe_null_str(e->str), tag);
8334 csubstr anchor = e->str;
8339 _c4assert(anchor.str - token_soup.str > 0);
8344 _c4dbgpf(
"anchor: {} -> {}", e->str, anchor);
8347 *first_ = first ? extract_string(first) : nullptr;
8348 *second_ = second ? extract_string(second) : nullptr;
8349 if(total > 1 && (first_->str > second_->str))
8351 csubstr tmp = *first_;
8354 _c4dbgpf(
"swap first and second: {} -> {}", *first_, *second_);
8363 template<
class EventHandler>
8364 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
8366 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8368 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW), m_evt_handler->m_curr->pos);
8370 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8373 _c4dbgp(
"usty[RNXT]: finishing!");
8378 _maybe_skip_comment();
8379 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8383 size_t pos = rem.first_not_of(
" \t");
8386 pos = pos !=
npos ? pos : rem.len;
8387 _c4dbgpf(
"skipping indentation of {}", pos);
8388 _line_progressed(pos);
8389 rem = m_evt_handler->m_curr->line_contents.rem;
8392 _c4dbgpf(
"rem is now {}", _prs(rem));
8395 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8396 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
8397 char first = rem.str[0];
8400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP), m_evt_handler->m_curr->pos);
8401 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
8404 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
8406 m_evt_handler->_push();
8408 _set_indentation(startindent);
8409 _line_progressed(1);
8410 _maybe_skip_whitespace_tokens();
8412 else if(first ==
'-' && _is_blck_token(rem))
8414 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
8416 m_evt_handler->_push();
8418 _set_indentation(startindent);
8419 _line_progressed(1);
8420 _maybe_skip_whitespace_tokens();
8424 _c4err(
"can only parse a seq into an existing seq");
8427 else if(has_any(
RMAP))
8429 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8430 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
8433 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
8435 _handle_annotations_before_blck_val_scalar();
8436 m_evt_handler->_push();
8438 _set_indentation(startindent);
8439 _line_progressed(1);
8440 _maybe_skip_whitespace_tokens();
8442 else if(first ==
'?' && _is_blck_token(rem))
8444 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
8446 _handle_annotations_before_blck_val_scalar();
8447 m_evt_handler->_push();
8449 _save_indentation();
8450 _line_progressed(1);
8451 _maybe_skip_whitespace_tokens();
8453 else if(first ==
':' && _is_blck_token(rem))
8455 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
8457 _handle_annotations_before_blck_val_scalar();
8458 m_evt_handler->_push();
8459 m_evt_handler->set_key_scalar_plain_empty();
8461 _save_indentation();
8462 _line_progressed(1);
8463 _maybe_skip_whitespace_tokens();
8465 else if(rem.begins_with(
'&'))
8467 csubstr anchor = _scan_anchor();
8468 _c4dbgpf(
"usty[RMAP]: anchor! {}", _prs(anchor));
8469 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8470 const size_t line = m_evt_handler->m_curr->pos.line;
8471 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8472 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8474 else if(first ==
'*')
8476 csubstr ref = _scan_ref_map();
8477 _c4dbgpf(
"usty[RMAP]: ref! {}", _prs(ref));
8478 if(!_maybe_scan_following_colon())
8480 _c4err(
"cannot read a VAL to a map");
8484 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
8485 const size_t startline = m_evt_handler->m_curr->pos.line;
8487 _handle_annotations_before_start_mapblck(startline);
8488 m_evt_handler->_push();
8489 _handle_keyref(ref);
8490 _maybe_skip_whitespace_tokens();
8491 _set_indentation(startindent);
8495 else if(first ==
'!')
8497 csubstr tag = _scan_tag();
8498 _c4dbgpf(
"usty[RMAP]: val tag! {}", _prs(tag));
8501 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8502 const size_t line = m_evt_handler->m_curr->pos.line;
8503 _add_annotation(&m_pending_tags, tag, indentation, line);
8505 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
8507 _c4err(
"cannot parse a seq into an existing map");
8511 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8512 startindent = m_evt_handler->m_curr->line_contents.indentation;
8513 const size_t startline = m_evt_handler->m_curr->pos.line;
8515 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8518 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
8519 sc = _scan_scalar_squot();
8520 if(!_maybe_scan_following_colon())
8522 _c4err(
"cannot read a VAL to a map");
8526 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8528 _handle_annotations_before_start_mapblck(startline);
8529 m_evt_handler->_push();
8530 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8531 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8532 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8533 _set_indentation(startindent);
8535 _maybe_skip_whitespace_tokens();
8538 else if(first ==
'"')
8540 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
8541 sc = _scan_scalar_dquot();
8542 if(!_maybe_scan_following_colon())
8544 _c4err(
"cannot read a VAL to a map");
8548 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
8550 _handle_annotations_before_start_mapblck(startline);
8551 m_evt_handler->_push();
8552 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8553 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8554 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8555 _set_indentation(startindent);
8557 _maybe_skip_whitespace_tokens();
8560 else if(first ==
'|')
8562 _c4err(
"block literal keys must be enclosed in '?'");
8564 else if(first ==
'>')
8566 _c4err(
"block literal keys must be enclosed in '?'");
8568 else if(_scan_scalar_plain_unk(&sc))
8570 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8571 if(!_maybe_scan_following_colon())
8573 _c4err(
"cannot read a VAL to a map");
8577 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8579 _handle_annotations_before_start_mapblck(startline);
8580 m_evt_handler->_push();
8581 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8582 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8583 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8584 _set_indentation(startindent);
8586 _maybe_skip_whitespace_tokens();
8597 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8598 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8601 _c4dbgp(
"usty[UNK]: it's a flow seq");
8603 _handle_annotations_before_blck_val_scalar();
8604 m_evt_handler->begin_seq_val_flow();
8606 _set_indentation(startindent);
8607 _line_progressed(1);
8608 _maybe_skip_whitespace_tokens();
8610 else if(first ==
'-' && _is_blck_token(rem))
8612 _c4dbgp(
"usty[UNK]: it's a block seq");
8614 _handle_annotations_before_blck_val_scalar();
8615 m_evt_handler->begin_seq_val_block();
8617 _set_indentation(startindent);
8618 _line_progressed(1);
8619 _maybe_skip_whitespace_tokens();
8621 else if(first ==
'{')
8623 _c4dbgp(
"usty[UNK]: it's a flow map");
8625 _handle_annotations_before_blck_val_scalar();
8626 m_evt_handler->begin_map_val_flow();
8628 _set_indentation(startindent);
8629 _line_progressed(1);
8630 _maybe_skip_whitespace_tokens();
8632 else if(first ==
'?' && _is_blck_token(rem))
8634 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8636 _handle_annotations_before_blck_val_scalar();
8637 m_evt_handler->begin_map_val_block();
8639 _save_indentation();
8640 _line_progressed(1);
8641 _maybe_skip_whitespace_tokens();
8643 else if(first ==
':' && _is_blck_token(rem))
8645 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8647 _handle_annotations_before_blck_val_scalar();
8648 m_evt_handler->begin_map_val_block();
8649 m_evt_handler->set_key_scalar_plain_empty();
8651 _save_indentation();
8652 _line_progressed(1);
8653 _maybe_skip_whitespace_tokens();
8655 else if(first ==
'&')
8657 csubstr anchor = _scan_anchor();
8658 _c4dbgpf(
"usty[UNK]: anchor! {}", _prs(anchor));
8659 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8660 const size_t line = m_evt_handler->m_curr->pos.line;
8661 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8662 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8664 else if(first ==
'*')
8666 csubstr ref = _scan_ref_map();
8667 _c4dbgpf(
"usty[UNK]: ref! {}", _prs(ref));
8668 if(!_maybe_scan_following_colon())
8670 _c4dbgp(
"usty[UNK]: set val ref");
8671 _handle_valref(ref);
8675 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8676 const size_t startline = m_evt_handler->m_curr->pos.line;
8678 _handle_annotations_before_start_mapblck(startline);
8679 m_evt_handler->begin_map_val_block();
8680 _handle_keyref(ref);
8681 _maybe_skip_whitespace_tokens();
8682 _set_indentation(startindent);
8686 else if(first ==
'!')
8688 csubstr tag = _scan_tag();
8689 _c4dbgpf(
"usty[UNK]: val tag! {}", _prs(tag));
8692 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8693 const size_t line = m_evt_handler->m_curr->pos.line;
8694 _add_annotation(&m_pending_tags, tag, indentation, line);
8698 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8699 startindent = m_evt_handler->m_curr->line_contents.indentation;
8700 const size_t startline = m_evt_handler->m_curr->pos.line;
8703 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8706 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8707 sc = _scan_scalar_squot();
8708 if(!_maybe_scan_following_colon())
8710 _c4dbgp(
"usty[UNK]: set as val");
8711 _handle_annotations_before_blck_val_scalar();
8712 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8713 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8718 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8720 _handle_annotations_before_start_mapblck(startline);
8721 m_evt_handler->begin_map_val_block();
8722 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8723 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8724 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8725 _set_indentation(startindent);
8727 _maybe_skip_whitespace_tokens();
8730 else if(first ==
'"')
8732 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8733 sc = _scan_scalar_dquot();
8734 if(!_maybe_scan_following_colon())
8736 _c4dbgp(
"usty[UNK]: set as val");
8737 _handle_annotations_before_blck_val_scalar();
8738 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8739 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8744 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8746 _handle_annotations_before_start_mapblck(startline);
8747 m_evt_handler->begin_map_val_block();
8748 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8749 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8750 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8751 _set_indentation(startindent);
8753 _maybe_skip_whitespace_tokens();
8756 else if(first ==
'|')
8758 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8760 _scan_block(&sb, startindent);
8761 _c4dbgp(
"usty[UNK]: set as val");
8762 _handle_annotations_before_blck_val_scalar();
8763 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8764 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8767 else if(first ==
'>')
8769 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8771 _scan_block(&sb, startindent);
8772 _c4dbgp(
"usty[UNK]: set as val");
8773 _handle_annotations_before_blck_val_scalar();
8774 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8775 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8778 else if(_scan_scalar_plain_unk(&sc))
8780 _c4dbgp(
"usty[UNK]: got a plain scalar");
8781 if(!_maybe_scan_following_colon())
8783 _c4dbgp(
"usty[UNK]: set as val");
8784 _handle_annotations_before_blck_val_scalar();
8785 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8786 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8791 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8793 _handle_annotations_before_start_mapblck(startline);
8794 m_evt_handler->begin_map_val_block();
8795 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8796 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8797 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8798 _set_indentation(startindent);
8800 _maybe_skip_whitespace_tokens();
8814 template<
class EventHandler>
8817 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8819 m_evt_handler->start_parse(filename.str, src);
8820 m_evt_handler->begin_stream();
8822 while( ! _finished_file())
8825 while( ! _finished_line())
8828 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8833 else if(has_any(
RMAP))
8837 else if(has_any(
RUNK))
8843 _c4err(
"internal error");
8846 if(_finished_file())
8851 m_evt_handler->finish_parse();
8857 template<
class EventHandler>
8860 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8862 m_evt_handler->start_parse(filename.str, src);
8863 m_evt_handler->begin_stream();
8865 while( ! _finished_file())
8868 while( ! _finished_line())
8871 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8882 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8891 else if(has_any(
RBLCK))
8895 _handle_seq_block();
8899 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8900 _handle_map_block();
8903 else if(has_any(
RUNK))
8907 else if(has_any(
USTY))
8913 _c4err(
"internal error");
8916 if(_finished_file())
8921 m_evt_handler->finish_parse();
8930 #undef _c4dbgnextline
8934 C4_SUPPRESS_WARNING_MSVC_POP
8935 C4_SUPPRESS_WARNING_GCC_CLANG_POP
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
bool is_valid_tag_handle(csubstr handle)
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
@ UTF16BE
UTF16, Big-Endian.
@ UTF16LE
UTF16, Little-Endian.
@ NOBOM
No Byte Order Mark was found.
@ UTF32BE
UTF32, Big-Endian.
@ UTF32LE
UTF32, Little-Endian.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
#define _ryml_relocate(s)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
csubstr name
name of the file
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark