1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
4 #ifndef _C4_YML_PARSE_ENGINE_HPP_
7 #ifndef _C4_CHARCONV_HPP_
13 #ifndef _C4_YML_FILTER_PROCESSOR_HPP_
16 #ifndef _C4_YML_TAG_HPP_
19 #ifndef _C4_YML_NODE_TYPE_HPP_
23 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24 #include "c4/yml/detail/dbgprint.hpp"
29 #include <c4/dump.hpp>
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
37 #define _c4assert(...) \
38 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
41 #if defined(RYML_WITH_TAB_TOKENS)
42 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43 #define _RYML_WITHOUT_TAB_TOKENS(...)
44 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
46 #define _RYML_WITH_TAB_TOKENS(...)
47 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
52 #ifndef RYML_SAVE_TEST_YAML
53 #define _RYML_SAVE_TEST_YAML(filename, src)
54 #define _RYML_SAVE_TEST_JSON(filename, src)
56 #define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57 #define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
60 void ryml_save_test_yaml(csubstr filename, csubstr src);
61 void ryml_save_test_json(csubstr filename, csubstr src);
68 #define _c4dbgnextline() \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
78 # pragma warning(push)
79 # pragma warning(disable: 4296)
80 # pragma warning(disable: 4702)
81 #elif defined(__clang__)
82 # pragma clang diagnostic push
83 # pragma clang diagnostic ignored "-Wtype-limits"
84 # pragma clang diagnostic ignored "-Wformat-nonliteral"
85 # pragma clang diagnostic ignored "-Wold-style-cast"
86 #elif defined(__GNUC__)
87 # pragma GCC diagnostic push
88 # pragma GCC diagnostic ignored "-Wtype-limits"
89 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
90 # pragma GCC diagnostic ignored "-Wold-style-cast"
92 # pragma GCC diagnostic ignored "-Wduplicated-branches"
103 C4_HOT C4_ALWAYS_INLINE
void _set_first(substr &C4_RESTRICT subject,
size_t pos) noexcept
106 subject.len = pos !=
npos ? pos : subject.len;
108 C4_HOT C4_ALWAYS_INLINE
void _set_first(csubstr &C4_RESTRICT subject,
size_t pos) noexcept
111 subject.len = pos !=
npos ? pos : subject.len;
113 C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(substr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
116 _RYML_ASSERT_BASIC(pos !=
npos);
119 C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(csubstr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
122 _RYML_ASSERT_BASIC(pos !=
npos);
126 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s)
RYML_NOEXCEPT
128 _RYML_ASSERT_BASIC(s.len > 0);
129 _RYML_ASSERT_BASIC(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
133 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_seq_token_maybe(csubstr
const& C4_RESTRICT s) noexcept
135 return ((s.len >= 1) && (s.str[0] ==
'-') && ((s.len == 1) || ((s.str[1] ==
' ')
_RYML_WITH_TAB_TOKENS( || (s.str[1] ==
'\t')))));
140 _RYML_ASSERT_BASIC(s.begins_with(
'-'));
141 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
142 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
143 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
149 _RYML_ASSERT_BASIC(s.begins_with(
'.'));
150 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
151 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
152 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
156 inline bool _is_doc_token(csubstr s) noexcept
164 return (s.str[1] ==
'-' && s.str[2] ==
'-')
168 return (s.str[1] ==
'.' && s.str[2] ==
'.')
175 inline size_t _begins_with_special_json_scalar(csubstr s)
RYML_NOEXCEPT
177 _RYML_ASSERT_BASIC(s.len);
181 return s.begins_with(
"false") ? 5u : 0u;
183 return s.begins_with(
"true") ? 4u : 0u;
185 return s.begins_with(
"null") ? 4u : 0u;
193 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
195 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
199 inline substr _from_next_line(substr rem)
201 size_t nlpos = rem.first_of(
"\r\n");
204 const char nl = rem[nlpos];
205 rem = rem.right_of(nlpos);
208 if(_extend_from_combined_newline(nl, rem.front()))
216 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
218 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
219 size_t numnl_following = 0;
221 for( ; *i < r.len; ++(*i))
223 if(r.str[*i] ==
'\n')
226 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
231 return numnl_following;
236 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
238 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
239 size_t numnl_following = 0;
243 for( ; *i < r.len; ++(*i))
245 const char c = r.str[*i];
249 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
255 for( ; *i < r.len; ++(*i))
262 size_t stop = *i + indentation;
263 for( ; *i < r.len; ++(*i))
266 if(c !=
' ' && c !=
'\r')
268 _RYML_ASSERT_BASIC(*i < stop);
273 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
279 return numnl_following;
289 template<
class EventHandler>
296 template<
class EventHandler>
299 , m_evt_handler(evt_handler)
300 , m_pending_anchors()
302 , m_has_directives_yaml(false)
303 , m_has_directives(false)
306 , m_prev_val_end(
npos)
308 , m_newline_offsets()
309 , m_newline_offsets_size(0)
310 , m_newline_offsets_capacity(0)
312 _RYML_CHECK_BASIC(evt_handler);
315 template<
class EventHandler>
317 : m_options(that.m_options)
318 , m_evt_handler(that.m_evt_handler)
319 , m_pending_anchors(that.m_pending_anchors)
320 , m_pending_tags(that.m_pending_tags)
321 , m_has_directives_yaml(that.m_has_directives_yaml)
322 , m_has_directives(that.m_has_directives)
323 , m_doc_empty(that.m_doc_empty)
325 , m_prev_val_end(
npos)
327 , m_newline_offsets(that.m_newline_offsets)
328 , m_newline_offsets_size(that.m_newline_offsets_size)
329 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
334 template<
class EventHandler>
336 : m_options(that.m_options)
337 , m_evt_handler(that.m_evt_handler)
338 , m_pending_anchors(that.m_pending_anchors)
339 , m_pending_tags(that.m_pending_tags)
340 , m_has_directives_yaml(that.m_has_directives_yaml)
341 , m_has_directives(that.m_has_directives)
342 , m_doc_empty(that.m_doc_empty)
344 , m_prev_val_end(
npos)
346 , m_newline_offsets()
347 , m_newline_offsets_size()
348 , m_newline_offsets_capacity()
350 if(that.m_newline_offsets_capacity)
352 _resize_locations(that.m_newline_offsets_capacity);
353 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
354 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
355 m_newline_offsets_size = that.m_newline_offsets_size;
359 template<
class EventHandler>
363 m_options = (that.m_options);
364 m_evt_handler = that.m_evt_handler;
365 m_pending_anchors = that.m_pending_anchors;
366 m_pending_tags = that.m_pending_tags;
367 m_has_directives_yaml = that.m_has_directives_yaml;
368 m_has_directives = that.m_has_directives;
369 m_doc_empty = that.m_doc_empty;
370 m_prev_colon = that.m_prev_colon;
371 m_prev_val_end = that.m_prev_val_end;
372 m_encoding = that.m_encoding;
373 m_newline_offsets = (that.m_newline_offsets);
374 m_newline_offsets_size = (that.m_newline_offsets_size);
375 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
380 template<
class EventHandler>
386 m_options = (that.m_options);
387 m_evt_handler = that.m_evt_handler;
388 m_pending_anchors = that.m_pending_anchors;
389 m_pending_tags = that.m_pending_tags;
390 m_has_directives_yaml = that.m_has_directives_yaml;
391 m_has_directives = that.m_has_directives;
392 m_doc_empty = that.m_doc_empty;
393 m_prev_colon = that.m_prev_colon;
394 m_prev_val_end = that.m_prev_val_end;
395 m_encoding = that.m_encoding;
396 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
397 _resize_locations(that.m_newline_offsets_capacity);
398 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
399 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
400 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
401 m_newline_offsets_size = that.m_newline_offsets_size;
406 template<
class EventHandler>
411 m_pending_anchors = {};
413 m_has_directives_yaml =
false;
414 m_has_directives =
false;
417 m_prev_val_end =
npos;
419 m_newline_offsets = {};
420 m_newline_offsets_size = {};
421 m_newline_offsets_capacity = {};
424 template<
class EventHandler>
425 void ParseEngine<EventHandler>::_free()
427 if(m_newline_offsets)
429 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
430 m_newline_offsets =
nullptr;
431 m_newline_offsets_size = 0u;
432 m_newline_offsets_capacity = 0u;
439 template<
class EventHandler>
440 void ParseEngine<EventHandler>::_reset()
442 m_pending_anchors = {};
444 m_has_directives_yaml =
false;
445 m_has_directives =
false;
448 m_prev_val_end =
npos;
452 if(m_options.locations())
454 _prepare_locations();
461 template<
class EventHandler>
462 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
464 _c4dbgp(
"relocate to new arena");
465 const char *pb = prev_arena.str;
466 const char *pe = prev_arena.str + prev_arena.len;
467 #define _ryml_relocate(s) \
468 if((s).str >= pb && (s).str <= pe) \
470 (s).str = next_arena.str + ((s).str - pb); \
472 for(ParserState &st : m_evt_handler->m_stack)
478 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
483 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
489 TagDirectives &tds = m_evt_handler->tag_directives();
490 for(
size_t i = 0, sz = tds.size(); i < sz; ++i)
497 TagCache &tch = m_evt_handler->tag_cache();
498 for(
id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
508 #undef _ryml_relocate
512 template<
class EventHandler>
513 substr ParseEngine<EventHandler>::_alloc_arena(
size_t len, substr *other)
515 csubstr prev = m_evt_handler->arena();
516 substr out = m_evt_handler->alloc_arena(len);
517 substr curr = m_evt_handler->arena();
518 if(curr.str != prev.str)
519 _relocate_arena(prev, curr, other);
528 template<
class EventHandler>
529 template<
class DumpFn>
530 C4_NO_INLINE
void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
532 ParserState
const *
const C4_RESTRICT st = m_evt_handler->m_curr;
533 LineContents
const& C4_RESTRICT lc = st->line_contents;
534 csubstr contents = lc.full.first(lc.num_cols);
538 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
539 csubstr m_file = m_evt_handler->m_curr->pos.name;
542 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
543 offs += m_file.len + 1;
545 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
546 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
547 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
548 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", escaped_scalar(maybe_full_content,
true), maybe_ellipsis, contents.len);
550 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
551 size_t lastcol = firstcol + lc.rem.len;
554 for(
size_t i = 0; i < offs + firstcol_adj; ++i)
555 std::forward<DumpFn>(dumpfn)(
" ");
556 std::forward<DumpFn>(dumpfn)(
"^");
557 for(
size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
558 std::forward<DumpFn>(dumpfn)(
"~");
559 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
563 std::forward<DumpFn>(dumpfn)(
"\n");
568 _dbg_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
572 template<
class EventHandler>
573 void ParseEngine<EventHandler>::_print_state_stack(substr buf)
const
577 for(ParserState
const& s : m_evt_handler->m_stack)
578 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
582 template<
class EventHandler>
583 void ParseEngine<EventHandler>::_print_state_stack()
const
586 _print_state_stack(buf);
593 template<
class EventHandler>
594 template<
class ...Args>
595 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc, Location
const& ymlloc,
const char* fmt, Args
const& ...args)
const
597 m_evt_handler->cancel_parse();
598 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
601 template<
class EventHandler>
602 template<
class ...Args>
603 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc,
const char *fmt, Args
const& ...args)
const
605 m_evt_handler->cancel_parse();
606 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
612 template<
class EventHandler>
613 template<
class ...Args>
614 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& ...args)
const
618 _dbg_printf(fmt, args...);
620 _fmt_msg(_dbg_dumper);
627 template<
class EventHandler>
628 bool ParseEngine<EventHandler>::_finished_file()
const
630 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
633 _c4dbgp(
"finished file!!!");
638 template<
class EventHandler>
639 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line() const
641 return m_evt_handler->m_curr->line_contents.rem.empty();
647 template<
class EventHandler>
648 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
650 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')))
654 pos = m_evt_handler->m_curr->line_contents.rem.len;
655 _c4dbgpf(
"skip {} whitespace characters", pos);
656 _line_progressed(pos);
660 template<
class EventHandler>
661 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
663 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
665 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
667 pos = m_evt_handler->m_curr->line_contents.rem.len;
668 _c4dbgpf(
"skip {}x'{}'", pos, _c4prc(c));
669 _line_progressed(pos);
673 template<
class EventHandler>
675 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
677 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
678 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
680 pos = m_evt_handler->m_curr->line_contents.rem.len;
681 _c4dbgpf(
"skip {} characters", pos);
682 _line_progressed(pos);
685 template<
class EventHandler>
686 void ParseEngine<EventHandler>::_skip_comment()
688 LineContents
const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
689 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
690 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with(
'#'), m_evt_handler->m_curr->pos);
691 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
692 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos);
693 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((
size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
695 if(lc.rem.str != lc.full.str)
697 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
698 const char prev = lc.full.str[col - 1u];
699 if(C4_UNLIKELY(prev !=
' ' && prev !=
'\t'))
700 _c4err(
"comment not preceded by whitespace");
702 _c4dbgpf(
"comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
703 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
706 template<
class EventHandler>
707 void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
709 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
712 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
714 _line_progressed(pos);
720 template<
class EventHandler>
721 void ParseEngine<EventHandler>::_maybe_skip_comment()
723 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
726 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
728 _line_progressed(pos);
734 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
738 template<
class EventHandler>
739 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
741 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
744 if(
':' == m_evt_handler->m_curr->line_contents.rem[pos])
748 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
750 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
756 _line_progressed(pos);
762 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
770 template<
class EventHandler>
771 csubstr ParseEngine<EventHandler>::_scan_anchor()
773 csubstr s = m_evt_handler->m_curr->line_contents.rem;
774 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'), m_evt_handler->m_curr->pos);
775 csubstr anchor = s.range(1, s.first_of(
" ,]}\t"));
776 _line_progressed(1u + anchor.len);
777 _maybe_skipchars(
' ');
781 template<
class EventHandler>
782 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
784 csubstr s = m_evt_handler->m_curr->line_contents.rem;
785 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
786 _set_first(s, s.first_of(
" ,]\t"));
787 _line_progressed(s.len);
791 template<
class EventHandler>
792 csubstr ParseEngine<EventHandler>::_scan_ref_map()
794 csubstr s = m_evt_handler->m_curr->line_contents.rem;
795 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
796 _set_first(s, s.first_of(
" ,}\t"));
797 _line_progressed(s.len);
801 template<
class EventHandler>
802 csubstr ParseEngine<EventHandler>::_scan_tag()
804 csubstr t = m_evt_handler->m_curr->line_contents.rem;
805 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
806 if(!t.begins_with(
"!<"))
808 _c4dbgp(
"begins with '!'");
809 _set_first(t, t.first_of(
" ,]}\t"));
810 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
812 _line_progressed(t.len);
813 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
818 _c4dbgp(
"begins with '!<'");
819 size_t pos = t.find(
'>');
820 if(C4_UNLIKELY(pos ==
npos))
822 _set_first_strict(t, pos+1);
823 _line_progressed(t.len);
826 _maybe_skip_whitespace_tokens();
830 template<
class EventHandler>
831 csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
833 csubstr t = m_evt_handler->m_curr->line_contents.rem;
834 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
835 if(!t.begins_with(
"!<"))
837 _c4dbgp(
"begins with '!'");
838 _set_first(t, t.first_of(
" ,\t"));
839 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
841 _line_progressed(t.len);
843 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
848 _c4dbgp(
"begins with '!<'");
849 size_t pos = t.find(
'>');
850 if(C4_UNLIKELY(pos ==
npos))
852 _set_first_strict(t, pos+1);
853 _line_progressed(t.len);
857 _maybe_skip_whitespace_tokens();
864 template<
class EventHandler>
865 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
868 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
":-"), m_evt_handler->m_curr->pos);
869 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
870 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
882 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
892 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
907 template<
class EventHandler>
908 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'?', m_evt_handler->m_curr->pos);
912 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
913 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
920 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
926 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
940 template<
class EventHandler>
941 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
943 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
959 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
964 _c4dbgpf(
"suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
965 return _is_valid_start_scalar_plain_flow_check_block_token(s);
967 _c4dbgpf(
"qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
968 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
976 template<
class EventHandler>
977 bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s,
size_t offs)
979 _c4dbgpf(
"newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs),
true));
982 _c4dbgp(
"newl[PLAIN]: buffer continues");
983 csubstr next_line = s.sub(offs + 1);
984 size_t next_line_indentation = next_line.first_not_of(
' ');
985 if(next_line_indentation !=
npos)
987 _c4dbgpf(
"newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
988 next_line = next_line.first(next_line.first_of(
"\n\r"));
989 _c4dbgpf(
"newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
990 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
991 if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
993 _c4dbgp(
"newl[PLAIN]: larger indentation");
994 next_line = next_line.sub(next_line_indentation);
996 else if(C4_UNLIKELY(next_line.len && next_line.triml(
' ').len))
998 _c4dbgp(
"newl[PLAIN]: err, smaller indentation");
999 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1002 if(m_evt_handler->m_curr->line_contents.indentation !=
npos)
1003 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1006 _c4dbgpf(
"newl[PLAIN]: next_line.len={}", next_line.len);
1009 next_line = next_line.triml(
" \t");
1010 if(next_line.begins_with_any(
",]#:"))
1012 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[0]);
1018 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1024 template<
class EventHandler>
1025 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1027 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1028 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1029 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP), m_evt_handler->m_curr->pos);
1030 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1031 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1033 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1034 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1036 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1039 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1042 _c4dbgp(
"scanning seqflow scalar...");
1044 bool needs_filter =
false;
1047 for( ; offs < s.len; ++offs, ++col)
1049 const char c = s.str[offs];
1054 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1055 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1058 _c4dbgpf(
"found '\\n' at col={}", col);
1059 if(!_scan_scalar_plain_handle_newline(s, offs))
1062 needs_filter =
true;
1066 needs_filter =
true;
1069 _c4dbgp(
"found suspicious ':'");
1070 if(s.len > offs + 1)
1072 char next = s.str[offs + 1];
1073 _c4dbgpf(
"next char is '{}'", _c4prc(next));
1076 csubstr after = s.sub(offs + 1).triml(
'\r');
1079 next = after.str[0];
1080 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
1084 if(next ==
' ' _RYML_WITH_TAB_TOKENS(|| next ==
'\t') || next ==
',' || next ==
'\n' || next ==
']')
1086 _c4dbgp(
"map starting!");
1091 _c4dbgp(
"':' nothing to see here");
1096 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1097 _line_progressed(col);
1098 _c4err(
"missing termination: '{}'", c);
1103 _c4dbgp(
"found suspicious '#'");
1104 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1105 char prev = s.str[offs - 1];
1108 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1116 _line_progressed(col);
1117 _c4err(
"invalid character: '{}'", c);
1120 _c4dbgpf(
"doc token character: '{}', offs={}", c, offs);
1121 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1123 _c4dbgp(
"at line beginning");
1124 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1136 _line_progressed(col);
1137 _set_first(s, offs);
1139 sc->needs_filter = needs_filter;
1141 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1146 template<
class EventHandler>
1147 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1149 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1150 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1151 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP), m_evt_handler->m_curr->pos);
1152 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1156 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1158 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1161 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1164 _c4dbgp(
"scanning mapflow scalar...");
1166 bool needs_filter =
false;
1169 for( ; offs < s.len; ++offs, ++col)
1171 const char c = s.str[offs];
1176 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1177 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1180 _c4dbgpf(
"found '\\n' at col={}", col);
1181 if(!_scan_scalar_plain_handle_newline(s, offs))
1184 needs_filter =
true;
1188 needs_filter =
true;
1191 _c4dbgpf(
"found ':'", c);
1195 const char next = s.str[offs+1];
1196 _c4dbgpf(
"next='{}'", c);
1197 if(next ==
' ' || next ==
',' || next ==
'}' || next ==
'\n' || next ==
'\r' _RYML_WITH_TAB_TOKENS(|| next ==
'\t'))
1199 _c4dbgpf(
"found terminating character: '{}'", c);
1206 _line_progressed(col);
1207 _c4err(
"invalid character: '{}'", c);
1210 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1219 _line_progressed(col);
1222 sc->needs_filter = needs_filter;
1224 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1226 return sc->scalar.len > 0u;
1229 template<
class EventHandler>
1230 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1232 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1233 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1234 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1235 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1237 substr s = m_evt_handler->m_curr->line_contents.rem;
1238 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1239 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1241 _c4dbgp(
"seq_json: scanning scalar...");
1248 _c4dbgp(
"seq_json: not a scalar.");
1253 const size_t len = _begins_with_special_json_scalar(s);
1256 char c = s.len > len ? s.str[len] :
',';
1257 if(c ==
',' || c ==
']' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1259 sc->scalar = s.first(len);
1260 sc->needs_filter =
false;
1261 _c4dbgpf(
"seq_json: special scalar: '{}'", sc->scalar);
1262 _line_progressed(len);
1274 for( ; i < s.len; ++i)
1276 const char c = s.str[i];
1283 _c4dbgpf(
"seq_json: found terminating character: '{}'", c);
1292 if(C4_LIKELY(i > 0))
1294 _line_progressed(i);
1295 sc->scalar = s.first(i);
1296 sc->needs_filter =
false;
1297 _c4dbgpf(
"seq_json: scalar was {}", _prs(sc->scalar,
true));
1303 template<
class EventHandler>
1304 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1306 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1307 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1308 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1309 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1310 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL), m_evt_handler->m_curr->pos);
1312 substr s = m_evt_handler->m_curr->line_contents.rem;
1313 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1314 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1316 _c4dbgp(
"scanning scalar...");
1319 const size_t len = _begins_with_special_json_scalar(s);
1322 char c = s.len > len ? s.str[len] :
',';
1323 _c4dbgpf(
"begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1324 if(c ==
',' || c ==
'}' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1326 sc->scalar = s.first(len);
1327 sc->needs_filter =
false;
1328 _c4dbgpf(
"special json scalar: '{}'", _prs(sc->scalar));
1329 _line_progressed(len);
1341 for( ; i < s.len; ++i)
1343 const char c = s.str[i];
1350 _c4dbgpf(
"found terminating character: '{}'", c);
1359 if(C4_LIKELY(i > 0))
1361 _line_progressed(i);
1362 sc->scalar = s.first(i);
1363 sc->needs_filter =
false;
1364 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1371 template<
class EventHandler>
1372 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1374 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-', m_evt_handler->m_curr->pos);
1375 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1378 template<
class EventHandler>
1379 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1381 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.', m_evt_handler->m_curr->pos);
1382 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1385 template<
class EventHandler>
1386 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1388 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1389 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1390 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY), m_evt_handler->m_curr->pos);
1392 substr s = m_evt_handler->m_curr->line_contents.rem;
1393 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1394 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1399 if(_is_blck_token(s))
1403 else if(_is_doc_begin(s))
1405 _c4dbgp(
"token is doc start");
1411 if(_is_blck_token(s))
1426 _c4dbgp(
"token is doc end");
1432 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1434 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1435 const size_t start_line = m_evt_handler->m_curr->pos.line;
1437 bool needs_filter =
false;
1440 _c4dbgpf(
"plain scalar line: {}", _prs(s));
1441 for(
size_t i = 0; i < s.len; ++i)
1443 const char curr = s.str[i];
1448 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1452 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1453 _line_progressed(i);
1455 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1457 _c4dbgp(
"start line. scalar ends here");
1462 _c4err(
"multiline scalars cannot be used as implicit keys");
1468 while(j + 1 < s.len && s.str[j+1] ==
':')
1470 _c4dbgp(
"skip colon");
1473 i = j > i ? j-1 : i;
1474 _c4dbgp(
"nothing to see here");
1478 _c4dbgp(
"got suspicious '#'");
1479 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1481 _c4dbgp(
"comment! scalar ends here");
1482 _line_progressed(i);
1487 _c4dbgp(
"nothing to see here");
1492 _line_progressed(s.len);
1493 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1494 next_peeked = next_peeked.trimr(
"\n\r");
1495 const size_t next_indentation = next_peeked.first_not_of(
' ');
1496 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1497 if(next_indentation < indentation)
1499 _c4dbgp(
"smaller indentation! scalar ended");
1502 else if(next_indentation == 0 && next_peeked.len > 0)
1504 const char first = next_peeked.str[0];
1508 _c4dbgpf(
"doc begin? peeked={}", _prs(next_peeked,
size_t(3)));
1509 if(_is_doc_begin_token(next_peeked))
1511 _c4dbgp(
"doc begin! scalar ended");
1516 _c4dbgpf(
"doc end? peeked={}", _prs(next_peeked,
size_t(3)));
1517 if(_is_doc_end_token(next_peeked))
1519 _c4dbgp(
"doc end! scalar ended");
1526 _c4dbgp(
"next line!");
1527 if(!_finished_file())
1529 _c4dbgp(
"next line!");
1535 _c4dbgp(
"file finished!");
1538 s = m_evt_handler->m_curr->line_contents.rem;
1539 needs_filter =
true;
1544 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1545 sc->needs_filter = needs_filter;
1547 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1552 template<
class EventHandler>
1553 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1555 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1556 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1557 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1558 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1559 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1560 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1561 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1564 template<
class EventHandler>
1565 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1567 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1568 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1569 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1570 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1571 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1572 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1575 template<
class EventHandler>
1576 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1578 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY), m_evt_handler->m_curr->pos);
1579 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1585 template<
class EventHandler>
1586 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1590 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1591 if(pos >= _buf().len)
1595 rem = _from_next_line(_buf().sub(pos));
1600 nlpos = rem.first_of(
"\r\n");
1602 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1603 rem = rem.left_of(nlpos,
true);
1605 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1609 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1615 template<
class EventHandler>
1616 void ParseEngine<EventHandler>::_scan_line()
1618 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1619 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1621 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1624 template<
class EventHandler>
1625 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1627 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1628 m_evt_handler->m_curr->pos.line,
1629 m_evt_handler->m_curr->line_contents.full.len,
1630 ahead, m_evt_handler->m_curr->pos.col,
1631 m_evt_handler->m_curr->pos.col+ahead,
1632 m_evt_handler->m_curr->pos.offset,
1633 m_evt_handler->m_curr->pos.offset+ahead);
1634 m_evt_handler->m_curr->pos.offset += ahead;
1635 m_evt_handler->m_curr->pos.col += ahead;
1636 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1637 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1640 template<
class EventHandler>
1641 void ParseEngine<EventHandler>::_line_ended()
1643 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1644 m_evt_handler->m_curr->pos.line,
1645 m_evt_handler->m_curr->line_contents.full.len,
1646 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1647 m_evt_handler->m_curr->pos.col, 1);
1648 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1649 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1650 ++m_evt_handler->m_curr->pos.line;
1651 m_evt_handler->m_curr->pos.col = 1;
1654 template<
class EventHandler>
1655 void ParseEngine<EventHandler>::_line_ended_undo()
1657 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1658 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1659 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1660 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1661 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1662 m_evt_handler->m_curr->pos.offset -= delta;
1663 --m_evt_handler->m_curr->pos.line;
1664 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1667 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1672 template<
class EventHandler>
1673 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation) noexcept
1675 m_evt_handler->m_curr->indref = indentation;
1676 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1679 template<
class EventHandler>
1680 void ParseEngine<EventHandler>::_save_indentation()
1682 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1683 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1684 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1687 template<
class EventHandler>
1688 void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1690 _c4dbgpf(
"SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1691 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1697 template<
class EventHandler>
1698 void ParseEngine<EventHandler>::_flow_container_was_a_key(
size_t orig_indent)
1700 _c4dbgpf(
"flow container is followed by colon! orig_indent={}", orig_indent);
1701 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1703 _set_indentation(orig_indent);
1704 _maybe_skip_whitespace_tokens();
1707 template<
class EventHandler>
1708 void ParseEngine<EventHandler>::_end_flow_container(
size_t orig_indent,
bool multiline)
1714 _c4dbgp(
"flow container: end as vanilla block map key!");
1715 if(C4_UNLIKELY(multiline))
1716 _c4err(
"multiline key is invalid");
1717 if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1718 _c4err(
"could not find ':' colon after key");
1719 _maybe_skip_whitespace_tokens();
1722 else if(has_none(
RFLOW))
1724 _c4dbgp(
"end_flow_container: now not in flow!");
1725 if(has_any(
RUNK|
RSEQ|
RKCL) && _maybe_scan_following_colon())
1727 if(C4_UNLIKELY(multiline))
1728 _c4err(
"multiline key is invalid");
1729 _flow_container_was_a_key(orig_indent);
1733 _c4dbgp(
"end_flow_container: end map as key!");
1736 else if(has_any(
RSEQ))
1738 _c4dbgp(
"end_flow_container: now in a flow seq");
1739 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1740 _mark_seqflow_val_end();
1744 template<
class EventHandler>
1745 void ParseEngine<EventHandler>::_end_map_flow()
1747 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1748 size_t orig_indent = m_evt_handler->m_curr->indref;
1749 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1750 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1751 _end_flow_container(orig_indent, multiline);
1754 template<
class EventHandler>
1755 void ParseEngine<EventHandler>::_end_seq_flow()
1757 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1758 size_t orig_indent = m_evt_handler->m_curr->indref;
1759 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1760 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1761 _end_flow_container(orig_indent, multiline);
1764 template<
class EventHandler>
1765 void ParseEngine<EventHandler>::_end_map_blck()
1767 _c4dbgp(
"mapblck: end");
1770 _c4dbgp(
"mapblck: set missing val");
1771 _handle_annotations_before_blck_val_scalar();
1772 m_evt_handler->set_val_scalar_plain_empty();
1774 else if(has_any(
QMRK))
1776 _c4dbgp(
"mapblck: set missing keyval");
1777 _handle_annotations_before_blck_key_scalar();
1778 m_evt_handler->set_key_scalar_plain_empty();
1779 _handle_annotations_before_blck_val_scalar();
1780 m_evt_handler->set_val_scalar_plain_empty();
1782 m_evt_handler->end_map_block();
1785 template<
class EventHandler>
1786 void ParseEngine<EventHandler>::_end_seq_blck()
1790 _c4dbgp(
"seqblck: set missing val");
1791 _handle_annotations_before_blck_val_scalar();
1792 m_evt_handler->set_val_scalar_plain_empty();
1794 m_evt_handler->end_seq_block();
1797 template<
class EventHandler>
1798 void ParseEngine<EventHandler>::_end2_map()
1800 _c4dbgp(
"map: end");
1801 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1808 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1809 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1810 m_evt_handler->_pop();
1814 template<
class EventHandler>
1815 void ParseEngine<EventHandler>::_end2_seq()
1817 _c4dbgp(
"seq: end");
1818 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1825 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1826 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1827 m_evt_handler->_pop();
1831 template<
class EventHandler>
1832 void ParseEngine<EventHandler>::_begin2_doc()
1834 _c4dbgp(
"begin_doc");
1835 m_has_directives_yaml =
false;
1836 m_has_directives =
false;
1839 m_evt_handler->begin_doc();
1840 m_evt_handler->m_curr->indref = 0;
1843 template<
class EventHandler>
1844 void ParseEngine<EventHandler>::_begin2_doc_expl()
1846 _c4dbgp(
"begin_doc_expl");
1847 m_has_directives_yaml =
false;
1848 m_has_directives =
false;
1851 m_evt_handler->begin_doc_expl();
1852 m_evt_handler->m_curr->indref = 0;
1855 template<
class EventHandler>
1856 void ParseEngine<EventHandler>::_end2_doc()
1858 _c4dbgp(
"doc: end");
1859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1860 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1862 _c4dbgp(
"doc was empty; add empty val");
1863 _handle_annotations_before_blck_val_scalar();
1864 m_evt_handler->set_val_scalar_plain_empty();
1866 m_evt_handler->end_doc();
1870 template<
class EventHandler>
1871 void ParseEngine<EventHandler>::_end2_doc_expl()
1873 _c4dbgp(
"doc: end");
1874 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1876 _c4dbgp(
"doc: no children; add empty val");
1877 _handle_annotations_before_blck_val_scalar();
1878 m_evt_handler->set_val_scalar_plain_empty();
1880 m_evt_handler->end_doc_expl();
1884 template<
class EventHandler>
1885 void ParseEngine<EventHandler>::_maybe_begin_doc()
1889 _c4dbgp(
"doc must be started");
1893 template<
class EventHandler>
1894 void ParseEngine<EventHandler>::_maybe_end_doc()
1898 _c4dbgp(
"doc must be finished");
1901 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1903 _c4dbgp(
"no doc to finish, but pending annotations");
1904 m_evt_handler->begin_doc();
1905 _handle_annotations_before_blck_val_scalar();
1906 m_evt_handler->set_val_scalar_plain_empty();
1907 m_evt_handler->end_doc();
1911 template<
class EventHandler>
1912 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1914 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1915 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags &
RDOC, m_evt_handler->m_curr->pos);
1916 _c4dbgp(
"root is RDOC");
1917 if(m_evt_handler->m_curr->level != 0)
1918 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1919 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1924 template<
class EventHandler>
1925 void ParseEngine<EventHandler>::_check_trailing_doc_token()
1927 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1928 const bool isndoc = (m_evt_handler->m_curr->flags &
NDOC) != 0;
1929 const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1930 _c4dbgpf(
"target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1931 if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1935 template<
class EventHandler>
1936 void ParseEngine<EventHandler>::_end_doc_suddenly()
1938 _c4dbgp(
"end doc suddenly");
1939 _end_doc_suddenly__pop();
1944 template<
class EventHandler>
1945 void ParseEngine<EventHandler>::_check_doc_end_tokens()
const
1947 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1948 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(
". \t"), m_evt_handler->m_curr->pos);
1949 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
1955 template<
class EventHandler>
1956 void ParseEngine<EventHandler>::_start_doc_suddenly()
1958 _c4dbgp(
"start doc suddenly");
1959 _end_doc_suddenly__pop();
1964 template<
class EventHandler>
1965 void ParseEngine<EventHandler>::_end_stream()
1967 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1969 _c4err(
"missing terminating ]");
1970 else if(C4_UNLIKELY(has_all(
RMAP|
RFLOW)))
1971 _c4err(
"missing terminating }");
1972 if(m_evt_handler->m_stack.size() > 1)
1973 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1980 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1984 m_evt_handler->begin_doc();
1985 _handle_annotations_before_blck_val_scalar();
1986 m_evt_handler->set_val_scalar_plain_empty();
1987 m_evt_handler->end_doc();
1991 m_evt_handler->end_stream();
1992 if(C4_UNLIKELY(m_has_directives))
1993 _c4err(
"directives cannot be used without a document");
1997 template<
class EventHandler>
1998 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
2000 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2001 while(m_evt_handler->m_curr != popto)
2005 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2008 else if(has_any(
RMAP))
2010 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2018 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2021 template<
class EventHandler>
2022 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2025 using state_type =
typename EventHandler::state;
2026 state_type
const* popto =
nullptr;
2027 auto &stack = m_evt_handler->m_stack;
2028 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2029 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2030 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2032 _print_state_stack();
2034 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2036 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2037 if(s->indref == ind)
2039 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
2044 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2046 _c4err(
"parse error: incorrect indentation?");
2048 _handle_indentation_pop(popto);
2051 template<
class EventHandler>
2052 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2055 using state_type =
typename EventHandler::state;
2056 auto &stack = m_evt_handler->m_stack;
2057 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2058 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2059 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2060 state_type
const* popto =
nullptr;
2063 _print_state_stack(flagbuf_);
2065 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
2067 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2072 else if(s->indref == ind)
2074 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
2075 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
2082 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2083 const size_t first = rem.first_not_of(
' ');
2084 _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first ==
npos, m_evt_handler->m_curr->pos);
2085 rem = rem.right_of(first,
true);
2086 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
2087 if(rem.begins_with(
'-') && _is_blck_token(rem))
2089 _c4dbgp(
"parent was indentless seq");
2095 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2097 _c4err(
"parse error: incorrect indentation?");
2099 _handle_indentation_pop(popto);
2104 template<
class EventHandler>
2105 void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2109 _c4err(
"multiline quoted keys are invalid");
2113 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(
RMAP|
RSEQ) && has_any(
RBLCK)));
2114 _c4dbgpf(
"indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2115 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2117 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2118 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(
' '),
2119 m_evt_handler->m_curr->pos);
2120 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2121 _c4dbgpf(
"trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem,
true));
2122 if(C4_UNLIKELY(!!trimmed.len))
2124 _c4err(
"bad indentation");
2132 template<
class EventHandler>
2133 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2138 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'\''), m_evt_handler->m_curr->pos);
2141 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2142 _line_progressed(1);
2144 bool needs_filter =
false;
2146 while( ! _finished_file())
2148 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2149 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2150 if(C4_UNLIKELY(_is_doc_token(line)))
2151 _c4err(
"token can not appear at line begin");
2152 for(
size_t i = 0; i < line.len; ++i)
2154 const char curr = line.str[i];
2157 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2160 _line_progressed(i + 1);
2161 pos = i + (size_t)(line.str - s.str);
2166 needs_filter =
true;
2172 needs_filter =
true;
2173 _line_progressed(line.len);
2176 _check_valid_newline_in_quoted_scalar();
2179 _c4err(
"reached end of file while looking for closing quote");
2183 _c4dbgpf(
"found closing quote at: {}", pos);
2184 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2185 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2186 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2187 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'\'', m_evt_handler->m_curr->pos);
2188 _set_first_strict(s, pos);
2190 _c4prscalar(
"scanned squoted scalar", s,
true);
2192 return ScannedScalar { s, needs_filter };
2197 template<
class EventHandler>
2198 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2203 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'"'), m_evt_handler->m_curr->pos);
2206 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2207 _line_progressed(1);
2209 bool needs_filter =
false;
2211 while( ! _finished_file())
2213 #if defined(__GNUC__) && (__GNUC__ == 13)
2214 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
2216 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2217 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2218 if(C4_UNLIKELY(_is_doc_token(rem)))
2219 _c4err(
"token can not appear at line begin");
2220 for(
size_t i = 0; i < rem.len; ++i)
2222 const char curr = rem.str[i];
2226 const char next = i+1 < rem.len ? rem.str[i+1] :
'~';
2227 needs_filter =
true;
2228 if(next ==
'"' || next ==
'\\')
2231 else if(curr ==
'"')
2233 _line_progressed(i + 1);
2234 pos = i + (size_t)(rem.str - s.str);
2240 needs_filter =
true;
2241 _line_progressed(rem.len);
2244 _check_valid_newline_in_quoted_scalar();
2247 _c4err(
"reached end of file while looking for closing quote");
2251 _c4dbgpf(
"found closing quote at: {}", pos);
2252 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2253 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2254 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2255 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'"', m_evt_handler->m_curr->pos);
2256 _set_first_strict(s, pos);
2258 _c4prscalar(
"scanned dquoted scalar", s,
true);
2260 return ScannedScalar{s, needs_filter};
2265 template<
class EventHandler>
2266 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2268 _c4dbgpf(
"blck: indref={}", indref);
2269 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref !=
npos, m_evt_handler->m_curr->pos);
2272 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2273 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'), m_evt_handler->m_curr->pos);
2275 _c4dbgpf(
"blck: specs={}", _prs(s));
2278 BlockChomp_e chomp = CHOMP_CLIP;
2279 size_t indentation =
npos;
2282 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"), m_evt_handler->m_curr->pos);
2283 csubstr t = s.sub(1);
2284 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2285 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2286 size_t pos = t.first_of(
"-+");
2287 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2291 chomp = CHOMP_STRIP;
2292 else if(t[pos] ==
'+')
2300 pos = t.first_not_of(
"0123456789");
2301 csubstr digits = t.first(pos);
2302 if( ! digits.empty())
2304 if(C4_UNLIKELY(digits.len > 1))
2305 _c4err(
"parse error: invalid indentation");
2306 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2307 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2308 _c4err(
"parse error: could not read indentation as decimal");
2309 if(C4_UNLIKELY( ! indentation))
2310 _c4err(
"parse error: null indentation");
2311 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2312 indentation += m_evt_handler->m_curr->indref;
2316 if(C4_UNLIKELY(t.len && (!t.begins_with_any(
" \t") || !t.sub(pos).triml(
" \t").begins_with(
'#'))))
2317 _c4err(
"parse error: invalid token");
2321 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2324 _line_progressed(s.len);
2329 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2330 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2338 size_t num_lines = 0;
2339 size_t first = m_evt_handler->m_curr->pos.line;
2340 size_t provisional_indentation =
npos;
2342 while(( ! _finished_file()))
2345 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2346 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2347 C4_DONT_OPTIMIZE(lc.rem);
2349 _c4dbgpf(
"blck: peeking at {}", _prs(lc.rem.trimr(
"\r\n"),
true));
2351 if(indentation !=
npos)
2353 _c4dbgpf(
"blck: indentation={}", indentation);
2355 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2359 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2363 _c4err(
"indentation decreased without any scalar");
2367 else if(indentation == 0)
2369 _c4dbgpf(
"blck: noindent. lc.rem={}", _prs(lc.rem));
2370 if(_is_doc_token(lc.rem))
2372 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2379 const size_t fns = lc.rem.first_not_of(
' ');
2380 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2383 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2384 if(C4_UNLIKELY(lc.full.begins_with(
'\t')))
2386 if(provisional_indentation ==
npos)
2388 if(lc.indentation < indref)
2390 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2391 if(raw_block.len == 0)
2393 _c4dbgp(
"blck: was empty, undo next line");
2398 else if(lc.indentation == m_evt_handler->m_curr->indref)
2402 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2406 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2407 indentation = lc.indentation;
2411 if(lc.indentation >= provisional_indentation)
2413 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2415 indentation = lc.indentation;
2419 if(lc.indentation >= indref)
2420 _c4err(
"parse error: first non-empty block line should have at least the original indentation");
2421 _c4dbgp(
"blck: finished");
2428 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2429 if(provisional_indentation !=
npos)
2431 if(lc.rem.len >= provisional_indentation)
2433 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2434 provisional_indentation = lc.rem.len;
2439 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2440 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2441 if(provisional_indentation ==
npos)
2443 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2444 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2446 if(provisional_indentation < indref)
2448 provisional_indentation = indref;
2449 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2455 m_evt_handler->m_curr->line_contents = lc;
2456 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2457 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2458 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2462 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2463 C4_UNUSED(num_lines);
2466 if(indentation ==
npos)
2468 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2469 indentation = provisional_indentation;
2475 _c4prscalar(
"scanned block", raw_block,
true);
2477 sb->scalar = raw_block;
2478 sb->indentation = indentation;
2490 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2492 #define _c4dbgfws(...)
2495 template<
class EventHandler>
2496 template<
class FilterProcessor>
2497 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2499 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2500 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t', m_evt_handler->m_curr->pos);
2502 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2503 if(first_pos !=
npos)
2505 const char first_char = proc.src[first_pos];
2506 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2507 if(first_char ==
'\n' || first_char ==
'\r')
2509 _c4dbgfws(
"whitespace is trailing on line",
"");
2510 proc.skip(first_pos - proc.rpos);
2515 _c4dbgfws(
"legit whitespace. sofar={}", _prs(proc.sofar()));
2519 _c4dbgfws(
"whitespace is trailing on line",
"");
2523 template<
class EventHandler>
2524 template<
class FilterProcessor>
2525 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2527 if(!_filter_ws_handle_to_first_non_space(proc))
2529 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2530 proc.copy(proc.src.len - proc.rpos);
2534 template<
class EventHandler>
2535 template<
class FilterProcessor>
2536 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2538 if(!_filter_ws_handle_to_first_non_space(proc))
2540 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2541 proc.skip(proc.src.len - proc.rpos);
2555 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2557 #define _c4dbgfps(fmt, ...)
2560 template<
class EventHandler>
2561 template<
class FilterProcessor>
2562 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2564 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2566 _c4dbgfps(
"found newline. sofar={}", _prs(proc.sofar()));
2567 size_t ii = proc.rpos;
2568 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2571 proc.set(
'\n', numnl_following);
2572 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2576 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2580 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2584 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2591 template<
class EventHandler>
2592 template<
class FilterProcessor>
2593 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2595 _RYML_ASSERT_PARSE_(this->callbacks(), indentation !=
npos, m_evt_handler->m_curr->pos);
2596 _c4dbgfps(
"before={}", _prs(proc.src));
2598 while(proc.has_more_chars())
2600 const char curr = proc.curr();
2601 _c4dbgfps(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2606 _c4dbgfps(
"whitespace", curr);
2607 _filter_ws_skip_trailing(proc);
2610 _c4dbgfps(
"newline", curr);
2611 _filter_nl_plain(proc, indentation);
2614 _c4dbgfps(
"carriage return, ignore", curr);
2623 _c4dbgfps(
"after={}", _prs(proc.sofar()));
2625 return proc.result();
2631 template<
class EventHandler>
2634 FilterProcessorSrcDst proc(scalar, dst);
2635 return _filter_plain(proc, indentation);
2638 template<
class EventHandler>
2641 FilterProcessorInplaceEndExtending proc(dst, cap);
2642 return _filter_plain(proc, indentation);
2653 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2655 #define _c4dbgfsq(fmt, ...)
2658 template<
class EventHandler>
2659 template<
class FilterProcessor>
2660 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2662 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2664 _c4dbgfsq(
"found newline. sofar={}", _prs(proc.sofar()));
2665 size_t ii = proc.rpos;
2666 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2669 proc.set(
'\n', numnl_following);
2670 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2674 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2678 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2683 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2689 template<
class EventHandler>
2690 template<
class FilterProcessor>
2691 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2693 _c4dbgfsq(
"before={}", _prs(proc.src));
2697 while(proc.has_more_chars())
2699 const char curr = proc.curr();
2700 _c4dbgfsq(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2705 _c4dbgfsq(
"whitespace", curr);
2706 _filter_ws_copy_trailing(proc);
2709 _c4dbgfsq(
"newline", curr);
2710 _filter_nl_squoted(proc);
2713 _c4dbgfsq(
"skip cr", curr);
2717 _c4dbgfsq(
"squote", curr);
2718 if(proc.next() ==
'\'')
2720 _c4dbgfsq(
"two consecutive squotes", curr);
2735 _c4dbgfsq(
": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2737 return proc.result();
2742 template<
class EventHandler>
2745 FilterProcessorSrcDst proc(scalar, dst);
2746 return _filter_squoted(proc);
2749 template<
class EventHandler>
2752 FilterProcessorInplaceEndExtending proc(dst, cap);
2753 return _filter_squoted(proc);
2764 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2766 #define _c4dbgfdq(...)
2769 template<
class EventHandler>
2770 template<
class FilterProcessor>
2771 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2773 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2775 _c4dbgfdq(
"found newline. sofar={}", _prs(proc.sofar()));
2776 size_t ii = proc.rpos;
2777 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2780 proc.set(
'\n', numnl_following);
2781 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2785 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2789 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2794 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2796 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2798 _c4dbgfdq(
"backslash at [{}]", ii);
2799 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2800 if(next ==
' ' || next ==
'\t')
2802 _c4dbgfdq(
"extend skip to backslash",
"");
2810 template<
class EventHandler>
2811 template<
class FilterProcessor>
2812 void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc,
size_t sz)
2814 const size_t szp1 = sz + 1u;
2815 if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2816 _c4err(
"codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2818 csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2819 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2820 uint32_t codepoint_val = {};
2821 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2822 _c4err(
"failed to parse codepoint. scalar pos={}", proc.rpos);
2823 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2824 if(C4_UNLIKELY(numbytes == 0))
2825 _c4err(
"failed to decode code point={}", proc.rpos);
2826 _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2827 proc.translate_esc_bulk(readbuf, numbytes, szp1);
2828 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2831 template<
class EventHandler>
2832 template<
class FilterProcessor>
2833 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2835 char next = proc.next();
2836 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2839 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2843 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2849 size_t ii = proc.rpos + 2;
2850 for( ; ii < proc.src.len; ++ii)
2853 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2858 proc.skip(ii - proc.rpos);
2860 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2863 proc.translate_esc(next);
2864 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2866 else if(next ==
'\r')
2870 else if(next ==
'n')
2872 proc.translate_esc(
'\n');
2874 else if(next ==
'r')
2876 proc.translate_esc(
'\r');
2878 else if(next ==
't')
2880 proc.translate_esc(
'\t');
2882 else if(next ==
'\\')
2884 proc.translate_esc(
'\\');
2886 else if(next ==
'x')
2888 _filter_dquoted_backslash_decode(proc, 2u);
2890 else if(next ==
'u')
2892 _filter_dquoted_backslash_decode(proc, 4u);
2894 else if(next ==
'U')
2896 _filter_dquoted_backslash_decode(proc, 8u);
2899 else if(next ==
'0')
2901 proc.translate_esc(
'\0');
2903 else if(next ==
'b')
2905 proc.translate_esc(
'\b');
2907 else if(next ==
'f')
2909 proc.translate_esc(
'\f');
2911 else if(next ==
'a')
2913 proc.translate_esc(
'\a');
2915 else if(next ==
'v')
2917 proc.translate_esc(
'\v');
2919 else if(next ==
'e')
2921 proc.translate_esc(
'\x1b');
2923 else if(next ==
'_')
2926 const char payload[] = {
2927 _RYML_CHCONST(-0x3e, 0xc2),
2928 _RYML_CHCONST(-0x60, 0xa0),
2930 proc.translate_esc_bulk(payload, 2, 1);
2932 else if(next ==
'N')
2935 const char payload[] = {
2936 _RYML_CHCONST(-0x3e, 0xc2),
2937 _RYML_CHCONST(-0x7b, 0x85),
2939 proc.translate_esc_bulk(payload, 2, 1);
2941 else if(next ==
'L')
2944 const char payload[] = {
2945 _RYML_CHCONST(-0x1e, 0xe2),
2946 _RYML_CHCONST(-0x80, 0x80),
2947 _RYML_CHCONST(-0x58, 0xa8),
2949 proc.translate_esc_extending(payload, 3, 1);
2951 else if(next ==
'P')
2954 const char payload[] = {
2955 _RYML_CHCONST(-0x1e, 0xe2),
2956 _RYML_CHCONST(-0x80, 0x80),
2957 _RYML_CHCONST(-0x57, 0xa9),
2959 proc.translate_esc_extending(payload, 3, 1);
2961 else if(next ==
'\0')
2967 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2969 _c4dbgfdq(
"backslash...sofar={}", _prs(proc.sofar()));
2973 template<
class EventHandler>
2974 template<
class FilterProcessor>
2975 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2977 _c4dbgfdq(
"before={}", _prs(proc.src));
2980 while(proc.has_more_chars())
2982 const char curr = proc.curr();
2983 _c4dbgfdq(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
2989 _c4dbgfdq(
"whitespace", curr);
2990 _filter_ws_copy_trailing(proc);
2995 _c4dbgfdq(
"newline", curr);
2996 _filter_nl_dquoted(proc);
3001 _c4dbgfdq(
"carriage return, ignore", curr);
3007 _filter_dquoted_backslash(proc);
3017 _c4dbgfdq(
"after={}", _prs(proc.sofar()));
3018 return proc.result();
3024 template<
class EventHandler>
3027 FilterProcessorSrcDst proc(scalar, dst);
3028 return _filter_dquoted(proc);
3031 template<
class EventHandler>
3034 FilterProcessorInplaceMidExtending proc(dst, cap);
3035 return _filter_dquoted(proc);
3044 C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(csubstr s,
size_t indentation) noexcept
3046 if(indentation + 1 > s.len)
3048 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
3050 if(s.str[i] ==
'\n')
3052 csubstr rem = s.sub(i + 1);
3053 size_t first = rem.first_not_of(
' ');
3054 first = (first !=
npos) ? first : rem.len;
3055 if(first > indentation)
3062 template<
class EventHandler>
3063 template<
class FilterProcessor>
3064 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation)
3066 _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3067 _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos, m_evt_handler->m_curr->pos);
3071 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3073 #define _c4dbgchomp(...)
3078 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3081 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
3082 last = proc.rpos + last + size_t(1) + indentation;
3083 _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3085 while((proc.rpos < last) && proc.has_more_chars())
3087 const char curr = proc.curr();
3088 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
3093 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
3096 csubstr at_next_line = proc.rem();
3097 if(at_next_line.begins_with(
' '))
3099 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
3101 size_t first_non_space = at_next_line.first_not_of(
' ');
3102 _c4dbgchomp(
"first_non_space={}", first_non_space);
3103 if(first_non_space ==
npos)
3105 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
3106 first_non_space = at_next_line.len;
3108 if(first_non_space <= indentation)
3110 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
3111 proc.skip(first_non_space);
3115 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
3116 proc.skip(indentation);
3118 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3119 proc.copy(first_non_space - indentation);
3137 bool had_one =
false;
3138 while(proc.has_more_chars())
3140 const char curr = proc.curr();
3141 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
3146 _c4dbgchomp(
"copy newline!", curr);
3154 _c4dbgchomp(
"skip!", curr);
3161 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3168 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3169 while(proc.has_more_chars())
3171 const char curr = proc.curr();
3172 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3176 _c4dbgchomp(
"copy newline!", curr);
3181 _c4dbgchomp(
"skip!", curr);
3190 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3202 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3204 #define _c4dbgfb(...)
3207 template<
class EventHandler>
3208 template<
class FilterProcessor>
3209 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
3211 csubstr rem = proc.rem();
3214 size_t first = rem.first_not_of(
' ');
3217 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3218 if(first < indentation)
3220 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3225 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3226 proc.skip(indentation);
3229 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3232 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3236 if(first < indentation)
3238 _c4dbgfb(
"skip everything", first);
3239 proc.skip(proc.src.len - proc.rpos);
3243 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3244 proc.skip(indentation);
3252 template<
class EventHandler>
3253 template<
class FilterProcessor>
3254 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3256 csubstr contents = proc.src.trimr(
" \n\r");
3257 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3260 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3261 if(chomp == CHOMP_KEEP && proc.src.len)
3263 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3264 while(proc.has_more_chars())
3266 const char curr = proc.curr();
3278 return contents.len;
3281 template<
class EventHandler>
3282 template<
class FilterProcessor>
3283 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3285 _c4dbgfb(
"contents_len={}", contents_len);
3287 _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3291 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3292 if(firstnewl !=
npos)
3294 contents_len = firstnewl;
3295 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3299 contents_len = proc.src.len;
3300 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3303 return contents_len;
3315 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3317 #define _c4dbgfbl(...)
3320 template<
class EventHandler>
3321 template<
class FilterProcessor>
3322 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3324 _c4dbgfbl(
"indentation={} before={}", indentation, _prs(proc.src));
3326 size_t contents_len = _handle_all_whitespace(proc, chomp);
3328 return proc.result();
3330 contents_len = _extend_to_chomp(proc, contents_len);
3332 _c4dbgfbl(
"to filter={}", _prs(proc.src.first(contents_len)));
3334 _filter_block_indentation(proc, indentation);
3337 while(proc.has_more_chars(contents_len))
3339 const char curr = proc.curr();
3340 _c4dbgfbl(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3345 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3347 _filter_block_indentation(proc, indentation);
3359 _c4dbgfbl(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3361 _filter_chomp(proc, chomp, indentation);
3363 _c4dbgfbl(
"final={}", _prs(proc.sofar()));
3365 return proc.result();
3370 template<
class EventHandler>
3373 FilterProcessorSrcDst proc(scalar, dst);
3374 return _filter_block_literal(proc, indentation, chomp);
3377 template<
class EventHandler>
3380 FilterProcessorInplaceEndExtending proc(scalar, cap);
3381 return _filter_block_literal(proc, indentation, chomp);
3391 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3393 #define _c4dbgfbf(...)
3397 template<
class EventHandler>
3398 template<
class FilterProcessor>
3399 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3401 _filter_block_indentation(proc, indentation);
3402 while(proc.has_more_chars(len))
3404 const char curr = proc.curr();
3405 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3409 _c4dbgfbf(
"newline.", curr);
3411 _filter_block_indentation(proc, indentation);
3419 size_t first = proc.rem().first_not_of(
" \t");
3420 _c4dbgfbf(
"space. first={}", first);
3422 first = proc.rem().len;
3423 _c4dbgfbf(
"... indentation increased to {}", first);
3424 _filter_block_folded_indented_block(proc, indentation, len, first);
3428 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3434 template<
class EventHandler>
3435 template<
class FilterProcessor>
3436 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3441 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3442 wpos_at_first_newl = proc.wpos;
3447 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3448 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl !=
npos, m_evt_handler->m_curr->pos);
3449 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ', m_evt_handler->m_curr->pos);
3450 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3452 proc.set_at(wpos_at_first_newl,
'\n');
3453 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n', m_evt_handler->m_curr->pos);
3456 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3460 return wpos_at_first_newl;
3463 template<
class EventHandler>
3464 template<
class FilterProcessor>
3465 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3467 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
3468 size_t num_newl = 0;
3469 size_t wpos_at_first_newl =
npos;
3470 while(proc.has_more_chars(len))
3472 const char curr = proc.curr();
3473 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3478 _c4dbgfbf(
"newline. sofar={}", num_newl);
3514 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3515 _filter_block_indentation(proc, indentation);
3521 size_t first = proc.rem().first_not_of(
" \t");
3522 _c4dbgfbf(
"space. first={}", first);
3524 first = proc.rem().len;
3525 _c4dbgfbf(
"... indentation increased to {}", first);
3528 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3529 proc.set_at(wpos_at_first_newl,
'\n');
3533 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3536 _filter_block_folded_indented_block(proc, indentation, len, first);
3538 wpos_at_first_newl =
npos;
3545 _c4dbgfbf(
"not space, not newline. stop.", 0);
3552 template<
class EventHandler>
3553 template<
class FilterProcessor>
3554 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3556 _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos), m_evt_handler->m_curr->pos);
3557 if(curr_indentation)
3558 proc.copy(curr_indentation);
3559 while(proc.has_more_chars(len))
3561 const char curr = proc.curr();
3562 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3568 _filter_block_indentation(proc, indentation);
3569 csubstr rem = proc.rem();
3570 const size_t first = rem.first_not_of(
' ');
3571 _c4dbgfbf(
"newline. firstns={}", first);
3574 const char c = rem[first];
3575 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3576 if(c ==
'\n' || c ==
'\r')
3582 _c4dbgfbf(
"done with indented block", first);
3586 else if(first !=
npos)
3589 _c4dbgfbf(
"copy all {} spaces", first);
3607 template<
class EventHandler>
3608 template<
class FilterProcessor>
3609 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3611 _c4dbgfbf(
"indentation={} before={}", indentation, _prs(proc.src));
3613 size_t contents_len = _handle_all_whitespace(proc, chomp);
3615 return proc.result();
3617 contents_len = _extend_to_chomp(proc, contents_len);
3619 _c4dbgfbf(
"to filter={}", _prs(proc.src.first(contents_len)));
3621 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3624 while(proc.has_more_chars(contents_len))
3626 const char curr = proc.curr();
3627 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3632 _c4dbgfbf(
"found newline", curr);
3633 _filter_block_folded_newlines(proc, indentation, contents_len);
3645 _c4dbgfbf(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3647 _filter_chomp(proc, chomp, indentation);
3649 _c4dbgfbf(
"final={}", proc.sofar().len, _prs(proc.sofar()));
3651 return proc.result();
3656 template<
class EventHandler>
3659 FilterProcessorSrcDst proc(scalar, dst);
3660 return _filter_block_folded(proc, indentation, chomp);
3663 template<
class EventHandler>
3666 FilterProcessorInplaceEndExtending proc(scalar, cap);
3667 return _filter_block_folded(proc, indentation, chomp);
3675 template<
class EventHandler>
3676 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3678 _c4dbgpf(
"filtering plain scalar: s={}", _prs(s));
3679 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3680 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3681 _c4dbgpf(
"filtering plain scalar: success! s={}", _prs(r.get()));
3687 template<
class EventHandler>
3688 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3690 _c4dbgpf(
"filtering squo scalar: s={}", _prs(s));
3691 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3692 _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3693 _c4dbgpf(
"filtering squo scalar: success! s={}", _prs(r.get()));
3700 template<
class EventHandler>
3701 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3703 _c4dbgpf(
"filtering dquo scalar: s={}", _prs(s));
3704 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3705 if(C4_LIKELY(r.valid()))
3707 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(r.get()));
3712 const size_t len = r.required_len();
3713 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3714 substr dst = _alloc_arena(len, &s);
3715 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3718 _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3719 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3720 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3721 _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos);
3722 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3723 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(rsd.get()));
3733 template<
class EventHandler>
3734 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3736 if(s.is_sub(_buf()))
3738 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3739 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3741 memmove(s.str - 1, s.str, s.len);
3743 s.str[s.len] =
'\n';
3749 substr dst = _alloc_arena(s.len + 1, &s);
3751 memcpy(dst.str, s.str, s.len);
3757 template<
class EventHandler>
3758 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3760 _c4dbgpf(
"filtering block literal scalar: s={}", _prs(s));
3761 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3763 if(C4_LIKELY(r.valid()))
3769 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3770 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3773 result = _move_scalar_left_and_add_newline(s);
3775 _c4dbgpf(
"filtering block literal scalar: success! s={}", _prs(result));
3781 template<
class EventHandler>
3782 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3784 _c4dbgpf(
"filtering block folded scalar: s={}", _prs(s));
3785 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3787 if(C4_LIKELY(r.valid()))
3793 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3794 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3797 result = _move_scalar_left_and_add_newline(s);
3799 _c4dbgpf(
"filtering block folded scalar: success! s={}", _prs(result));
3806 template<
class EventHandler>
3807 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3811 if(m_options.scalar_filtering())
3813 return _filter_scalar_plain(sc.scalar, indentation);
3817 _c4dbgp(
"plain scalar left unfiltered");
3818 m_evt_handler->mark_key_scalar_unfiltered();
3823 _c4dbgp(
"plain scalar doesn't need filtering");
3828 template<
class EventHandler>
3829 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3833 if(m_options.scalar_filtering())
3835 return _filter_scalar_plain(sc.scalar, indentation);
3839 _c4dbgp(
"plain scalar left unfiltered");
3840 m_evt_handler->mark_val_scalar_unfiltered();
3845 _c4dbgp(
"plain scalar doesn't need filtering");
3853 template<
class EventHandler>
3854 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3858 if(m_options.scalar_filtering())
3860 return _filter_scalar_squot(sc.scalar);
3864 _c4dbgp(
"squo key scalar left unfiltered");
3865 m_evt_handler->mark_key_scalar_unfiltered();
3870 _c4dbgp(
"squo key scalar doesn't need filtering");
3875 template<
class EventHandler>
3876 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3880 if(m_options.scalar_filtering())
3882 return _filter_scalar_squot(sc.scalar);
3886 _c4dbgp(
"squo val scalar left unfiltered");
3887 m_evt_handler->mark_val_scalar_unfiltered();
3892 _c4dbgp(
"squo val scalar doesn't need filtering");
3900 template<
class EventHandler>
3901 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3905 if(m_options.scalar_filtering())
3907 return _filter_scalar_dquot(sc.scalar);
3911 _c4dbgp(
"dquo scalar left unfiltered");
3912 m_evt_handler->mark_key_scalar_unfiltered();
3917 _c4dbgp(
"dquo scalar doesn't need filtering");
3922 template<
class EventHandler>
3923 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3927 if(m_options.scalar_filtering())
3929 return _filter_scalar_dquot(sc.scalar);
3933 _c4dbgp(
"dquo scalar left unfiltered");
3934 m_evt_handler->mark_val_scalar_unfiltered();
3939 _c4dbgp(
"dquo scalar doesn't need filtering");
3947 template<
class EventHandler>
3948 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3950 if(m_options.scalar_filtering())
3952 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3956 _c4dbgp(
"literal scalar left unfiltered");
3957 m_evt_handler->mark_key_scalar_unfiltered();
3962 template<
class EventHandler>
3963 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3965 if(m_options.scalar_filtering())
3967 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3971 _c4dbgp(
"literal scalar left unfiltered");
3972 m_evt_handler->mark_val_scalar_unfiltered();
3980 template<
class EventHandler>
3981 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3983 if(m_options.scalar_filtering())
3985 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3989 _c4dbgp(
"folded scalar left unfiltered");
3990 m_evt_handler->mark_key_scalar_unfiltered();
3995 template<
class EventHandler>
3996 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3998 if(m_options.scalar_filtering())
4000 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4004 _c4dbgp(
"folded scalar left unfiltered");
4005 m_evt_handler->mark_val_scalar_unfiltered();
4017 template<
class EventHandler>
4018 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on)
4020 ParserState *s = m_evt_handler->m_curr;
4021 char buf1_[64], buf2_[64], buf3_[64];
4022 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4023 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4024 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4025 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4029 template<
class EventHandler>
4032 ParserState *s = m_evt_handler->m_curr;
4033 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4034 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4035 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4036 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4037 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4038 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4044 template<
class EventHandler>
4045 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off)
4047 ParserState *s = m_evt_handler->m_curr;
4048 char buf1_[64], buf2_[64], buf3_[64];
4049 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4050 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4051 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4052 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4056 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
4059 bool gotone =
false;
4061 #define _prflag(fl) \
4062 if((flags & fl) == (fl)) \
4066 if(pos + 1 < buf.len) \
4070 csubstr fltxt = #fl; \
4071 if(pos + fltxt.len <= buf.len) \
4072 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4102 _RYML_CHECK_BASIC(pos <= buf.len);
4104 return buf.first(pos);
4114 template<
class EventHandler>
4117 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4118 return _buf().sub(loc.offset);
4121 template<
class EventHandler>
4124 if(C4_UNLIKELY(val ==
nullptr))
4125 return {m_evt_handler->m_curr->pos.
name, 0, 0, 0};
4126 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4129 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4130 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4131 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4132 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4134 csubstr src = _buf();
4135 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4136 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4138 using lineptr_type =
size_t const* C4_RESTRICT;
4139 lineptr_type lineptr =
nullptr;
4140 size_t offset = (size_t)(val - src.begin());
4144 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4159 size_t count = m_newline_offsets_size;
4160 lineptr = m_newline_offsets;
4163 size_t step = count >> 1;
4164 lineptr_type it = lineptr + step;
4176 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4177 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4178 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4180 loc.name = m_evt_handler->m_curr->pos.name;
4181 loc.offset = offset;
4182 loc.line = (size_t)(lineptr - m_newline_offsets);
4183 if(lineptr > m_newline_offsets)
4184 loc.col = (offset - *(lineptr-1) - 1u);
4190 template<
class EventHandler>
4191 void ParseEngine<EventHandler>::_prepare_locations()
4193 csubstr src = _buf();
4194 size_t numnewlines = 1u + src.count(
'\n');
4195 _resize_locations(numnewlines);
4196 m_newline_offsets_size = 0;
4197 for(
size_t i = 0; i < src.len; i++)
4198 if(src.str[i] ==
'\n')
4199 m_newline_offsets[m_newline_offsets_size++] = i;
4200 m_newline_offsets[m_newline_offsets_size++] = src.len;
4201 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4204 template<
class EventHandler>
4205 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4207 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4208 if(numnewlines > m_newline_offsets_capacity)
4210 if(m_newline_offsets)
4211 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4212 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4213 m_newline_offsets_capacity = numnewlines;
4217 template<
class EventHandler>
4218 bool ParseEngine<EventHandler>::_locations_dirty()
const
4220 return !m_newline_offsets_size;
4228 template<
class EventHandler>
4229 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4232 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4234 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4236 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4240 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4242 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4243 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4249 template<
class EventHandler>
4250 void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4252 _c4dbgpf(
"flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4253 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4254 if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4256 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4257 _c4dbgpf(
"flow: after indentation={}", _prs(trimmed));
4258 if(trimmed.len && trimmed.triml(
" \t").len)
4260 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4261 _c4err(
"bad indentation");
4266 template<
class EventHandler>
4267 size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4269 const size_t mark = m_evt_handler->m_curr->pos.offset;
4270 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
4271 _c4dbgpf(
"block: mark={} firstpos={}", mark, firstpos);
4272 if(firstpos !=
npos)
4274 _c4dbgp(
"block: non empty line");
4275 _line_progressed(firstpos);
4280 _c4dbgp(
"block: rest of line is whitespace");
4281 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4286 template<
class EventHandler>
4287 void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(
size_t start_mark,
size_t end_mark)
4289 _c4dbgpf(
"block: start_mark={} end_mark={}", start_mark, end_mark);
4290 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4291 if(end_mark != start_mark)
4293 csubstr leading = _buf().range(start_mark, end_mark);
4294 _c4dbgpf(
"block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading,
true));
4295 if(leading.find(
'\t') !=
npos)
4296 _c4err(
"invalid tab character to the left");
4304 template<
class EventHandler>
4305 void ParseEngine<EventHandler>::_handle_colon()
4307 size_t curr = m_evt_handler->m_curr->pos.line;
4308 if(C4_UNLIKELY(m_prev_colon !=
npos && curr == m_prev_colon))
4310 _c4dbgpf(
"colon: prevline={} currline={}", m_prev_colon, curr);
4311 _c4err(
"two colons on same line");
4313 _c4dbgpf(
"colon: set prevline={}->{}", m_prev_colon, curr);
4314 m_prev_colon = curr;
4317 template<
class EventHandler>
4318 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4320 _c4dbgpf(
"store annotation[{}]: {}", dst->num_entries, _prs(str));
4321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4322 dst->annotations[dst->num_entries].str = str;
4323 dst->annotations[dst->num_entries].indentation = {};
4324 dst->annotations[dst->num_entries].line = {};
4325 dst->annotations[dst->num_entries].orig = {};
4329 template<
class EventHandler>
4330 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4332 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4333 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4334 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4338 dst->annotations[dst->num_entries].str = str;
4339 dst->annotations[dst->num_entries].indentation = indentation;
4340 dst->annotations[dst->num_entries].line = line;
4341 dst->annotations[dst->num_entries].orig = {};
4345 template<
class EventHandler>
4346 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line, csubstr orig)
4348 _c4dbgpf(
"store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4349 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4350 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4354 dst->annotations[dst->num_entries].str = str;
4355 dst->annotations[dst->num_entries].indentation = indentation;
4356 dst->annotations[dst->num_entries].line = line;
4357 dst->annotations[dst->num_entries].orig = orig;
4361 template<
class EventHandler>
4362 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4364 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4367 template<
class EventHandler>
4368 bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4370 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4372 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4373 if(m_pending_tags.num_entries)
4375 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4376 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4378 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4379 _clear_annotations(&m_pending_tags);
4386 if(m_pending_anchors.num_entries)
4388 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4389 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4391 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4392 _clear_annotations(&m_pending_anchors);
4396 _c4err(
"too many anchors");
4399 m_evt_handler->set_key_scalar_plain_empty();
4400 m_evt_handler->set_val_scalar_plain_empty();
4404 template<
class EventHandler>
4405 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4407 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4408 if(m_pending_tags.num_entries)
4410 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4411 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4413 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4414 _clear_annotations(&m_pending_tags);
4421 if(m_pending_anchors.num_entries)
4423 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4424 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4426 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4427 _clear_annotations(&m_pending_anchors);
4431 _c4err(
"too many anchors");
4436 template<
class EventHandler>
4437 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4439 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4440 if(m_pending_tags.num_entries)
4442 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4443 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4445 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4446 _clear_annotations(&m_pending_tags);
4453 if(m_pending_anchors.num_entries)
4455 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4456 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4458 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4459 _clear_annotations(&m_pending_anchors);
4463 _c4err(
"too many anchors");
4468 template<
class EventHandler>
4469 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4471 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4472 if(m_pending_tags.num_entries == 2)
4474 _c4dbgp(
"2 tags, setting entry 0");
4475 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4477 else if(m_pending_tags.num_entries == 1)
4479 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4480 if(m_pending_tags.annotations[0].line < current_line)
4482 _c4dbgp(
"...tag is for the map. setting it.");
4483 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4484 _clear_annotations(&m_pending_tags);
4488 if(m_pending_anchors.num_entries == 2)
4490 _c4dbgp(
"2 anchors, setting entry 0");
4491 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4493 else if(m_pending_anchors.num_entries == 1)
4495 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4496 if(m_pending_anchors.annotations[0].line < current_line)
4498 _c4dbgp(
"...anchor is for the map. setting it.");
4499 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4500 _clear_annotations(&m_pending_anchors);
4505 template<
class EventHandler>
4506 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4508 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4509 switch(m_pending_tags.num_entries)
4512 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4513 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4515 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map tag");
4516 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4517 _clear_annotations(&m_pending_tags);
4521 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4522 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4525 switch(m_pending_anchors.num_entries)
4528 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4529 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4531 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map anchor");
4532 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4533 _clear_annotations(&m_pending_anchors);
4537 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4538 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4543 template<
class EventHandler>
4544 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4546 _c4dbgp(
"annotations_after_start_mapblck");
4547 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4548 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4549 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4551 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4552 switch(m_pending_tags.num_entries)
4555 _c4dbgpf(
"annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4556 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4557 _clear_annotations(&m_pending_tags);
4560 _c4dbgpf(
"annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4561 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4562 _clear_annotations(&m_pending_tags);
4565 switch(m_pending_anchors.num_entries)
4568 _c4dbgpf(
"annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4569 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4570 _clear_annotations(&m_pending_anchors);
4573 _c4dbgpf(
"annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4574 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4575 _clear_annotations(&m_pending_anchors);
4579 _set_indentation(key_indentation);
4582 template<
class EventHandler>
4583 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4585 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4587 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4588 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4590 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4591 if(ann.line > curr->line)
4593 else if(ann.indentation < curr->indentation)
4596 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4598 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4599 if(ann.line > curr->line)
4601 else if(ann.indentation < curr->indentation)
4604 return curr->line < val_line ? val_indentation : curr->indentation;
4607 template<
class EventHandler>
4608 void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4610 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4611 m_evt_handler->set_key_ref(alias);
4613 _c4err(
"aliases cannot have anchors or tags");
4616 template<
class EventHandler>
4617 void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4619 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4620 m_evt_handler->set_val_ref(alias);
4622 _c4err(
"aliases cannot have anchors or tags");
4625 template<
class EventHandler>
4626 csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4628 _c4dbgpf(
"resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4630 TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4633 _c4dbgpf(
"resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4634 return ret.resolved;
4636 _c4dbgpf(
"resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4638 substr buf = m_evt_handler->arena_rem();
4639 TagDirectives
const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4640 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4641 m_evt_handler->m_curr->pos,
4642 m_evt_handler->m_stack.m_callbacks);
4643 _c4dbgpf(
"resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4644 _c4assert((bufsz > buf.len) == (!ttag.str));
4645 _c4assert(!!bufsz == (ttag.len == bufsz));
4649 _c4dbgpf(
"tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4651 buf = _alloc_arena(bufsz, &tag);
4654 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4655 m_evt_handler->m_curr->pos,
4656 m_evt_handler->m_stack.m_callbacks);
4659 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4663 _c4dbgp(
"tag required arena. update size");
4666 (void)_alloc_arena(bufsz);
4668 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127)
4669 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers)
4671 _c4dbgpf(
"handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4673 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4675 _c4dbgpf(
"copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4676 buf = _alloc_arena(ttag.len, &tag);
4678 memcpy(buf.str, ttag.str, ttag.len);
4680 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4683 C4_SUPPRESS_WARNING_MSVC_POP
4684 _c4dbgpf(
"resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4687 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4691 template<
class EventHandler>
4692 bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT
version)
const
4694 _c4assert(directive->begins_with(
"%YAML"));
4695 size_t version_start = directive->first_not_of(
" \t", 5);
4696 if(version_start !=
npos)
4698 csubstr digits =
"0123456789";
4699 size_t major_end = directive->first_not_of(digits, version_start);
4700 if(major_end !=
npos && directive->str[major_end] ==
'.')
4702 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4703 if(minor_end ==
npos)
4704 minor_end = directive->len;
4705 _set_first_strict(*directive, minor_end);
4706 *
version = directive->range(version_start, minor_end);
4707 _c4dbgpf(
"%YAML: version={} full={}", *
version, _prs(*directive,
true));
4714 template<
class EventHandler>
4715 bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix)
const
4717 _c4assert(directive->begins_with(
"%TAG"));
4718 csubstr whitespace =
" \t";
4719 size_t handle_start = directive->first_not_of(whitespace, 4);
4720 if(handle_start !=
npos && directive->str[handle_start] ==
'!')
4722 size_t handle_end = directive->first_of(whitespace, handle_start);
4723 if(handle_end !=
npos)
4725 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4726 if(prefix_start !=
npos)
4728 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4729 if(prefix_end ==
npos)
4730 prefix_end = directive->len;
4731 _set_first_strict(*directive, prefix_end);
4732 *handle = directive->range(handle_start, handle_end);
4733 *prefix = directive->range(prefix_start, prefix_end);
4734 _c4dbgpf(
"%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive,
true));
4743 template<
class EventHandler>
4744 void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4746 _c4dbgpf(
"handle_directive: rem={}", _prs(directive,
true));
4747 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with(
'%'));
4748 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4749 const char *err =
nullptr;
4752 auto isdirective = [](csubstr str, csubstr dir) {
4753 if(str.begins_with(dir))
4755 csubstr rest = str.sub(dir.len);
4756 return (!rest.len || rest.str[0] ==
' ' || rest.str[0] ==
'\t');
4760 if(isdirective(directive,
"%TAG"))
4764 if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4766 err =
"invalid %TAG directive";
4767 goto directive_error;
4769 m_evt_handler->add_directive_tag(handle, prefix);
4771 else if(isdirective(directive,
"%YAML"))
4774 if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &
version)))
4776 err =
"invalid %YAML directive";
4777 goto directive_error;
4779 if(C4_UNLIKELY(m_has_directives_yaml))
4781 err =
"multiple %YAML directives";
4782 goto directive_error;
4784 m_has_directives_yaml =
true;
4785 m_evt_handler->add_directive_yaml(
version);
4787 m_has_directives =
true;
4788 rem = m_evt_handler->m_curr->line_contents.rem;
4789 pos = rem.first_not_of(
" \t", directive.len);
4790 pos = pos !=
npos ? pos : rem.len;
4791 _line_progressed(pos);
4793 _c4dbgpf(
"handle_directive: rest={}", _prs(rem));
4794 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
4796 err =
"invalid tokens after directive";
4797 goto directive_error;
4800 if(C4_UNLIKELY(err !=
nullptr))
4804 template<
class EventHandler>
4805 bool ParseEngine<EventHandler>::_handle_bom()
4807 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4810 const csubstr rest = rem.sub(1);
4812 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4813 if(rem.begins_with(csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4815 _c4dbgp(
"byte order mark: UTF32BE");
4817 _line_progressed(4);
4821 else if(rem.begins_with(csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4823 _c4dbgp(
"byte order mark: UTF32LE");
4825 _line_progressed(4);
4829 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4831 _c4dbgp(
"byte order mark: UTF16BE");
4833 _line_progressed(2);
4837 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4839 _c4dbgp(
"byte order mark: UTF16LE");
4841 _line_progressed(2);
4845 else if(rem.begins_with(
"\xef\xbb\xbf"))
4847 _c4dbgp(
"byte order mark: UTF8");
4849 _line_progressed(3);
4858 template<
class EventHandler>
4859 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4861 if(m_encoding ==
NOBOM)
4863 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4866 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4868 else if(enc != m_encoding)
4870 _c4err(
"byte order mark can only be set once");
4877 template<
class EventHandler>
4878 void ParseEngine<EventHandler>::_handle_seq_json()
4881 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4883 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
4884 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
4885 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
4886 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
4887 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
4889 _handle_flow_skip_whitespace();
4890 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4896 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
4897 const char first = rem.str[0];
4898 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4903 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4904 ScannedScalar sc = _scan_scalar_dquot();
4905 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4906 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4912 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4914 m_evt_handler->begin_seq_val_flow();
4916 _line_progressed(1);
4921 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4923 m_evt_handler->begin_map_val_flow();
4925 _line_progressed(1);
4926 goto seqjson_finish;
4930 _c4dbgp(
"seqjson[RVAL]: end!");
4933 _line_progressed(1);
4935 goto seqjson_finish;
4941 if(_scan_scalar_seq_json(&sc))
4943 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4944 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4945 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4957 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
4958 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
4959 const char first = rem.str[0];
4960 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4965 _c4dbgp(
"seqjson[RNXT]: expect next val");
4967 m_evt_handler->add_sibling();
4968 _line_progressed(1);
4973 _c4dbgp(
"seqjson[RNXT]: end!");
4975 _line_progressed(1);
4976 goto seqjson_finish;
4984 _c4dbgt(
"seqjson: go again", 0);
4985 if(_finished_line())
4987 if(C4_LIKELY(!_finished_file()))
4995 _c4err(
"missing terminating ]");
5001 _c4dbgp(
"seqjson: finish");
5007 template<
class EventHandler>
5008 void ParseEngine<EventHandler>::_handle_map_json()
5011 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5013 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5014 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5015 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5016 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5017 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
5019 _handle_flow_skip_whitespace();
5020 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5026 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5027 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5028 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5029 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5030 const char first = rem.str[0];
5031 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
5036 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
5037 ScannedScalar sc = _scan_scalar_dquot();
5038 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5039 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5045 _c4dbgp(
"mapjson[RKEY]: end!");
5047 _line_progressed(1);
5048 goto mapjson_finish;
5054 else if(has_any(
RVAL))
5056 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5057 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5058 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5059 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5060 const char first = rem.str[0];
5061 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
5066 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
5067 ScannedScalar sc = _scan_scalar_dquot();
5068 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5069 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5075 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
5077 m_evt_handler->begin_seq_val_flow();
5078 _set_indentation(m_evt_handler->m_parent->indref);
5080 _line_progressed(1);
5081 goto mapjson_finish;
5085 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
5087 m_evt_handler->begin_map_val_flow();
5088 _set_indentation(m_evt_handler->m_parent->indref);
5090 _line_progressed(1);
5097 if(_scan_scalar_map_json(&sc))
5099 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
5100 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5101 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5112 else if(has_any(
RKCL))
5114 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5115 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5116 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5117 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5118 const char first = rem.str[0];
5119 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
5122 _c4dbgp(
"mapjson[RKCL]: found the colon");
5124 _line_progressed(1);
5131 else if(has_any(
RNXT))
5133 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5134 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5135 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5136 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5137 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
5138 if(rem.begins_with(
','))
5140 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
5141 m_evt_handler->add_sibling();
5143 _line_progressed(1);
5145 else if(rem.begins_with(
'}'))
5147 _c4dbgp(
"mapjson[RNXT]: end!");
5149 _line_progressed(1);
5150 goto mapjson_finish;
5159 _c4dbgt(
"mapjson: go again", 0);
5160 if(_finished_line())
5162 if(C4_LIKELY(!_finished_file()))
5170 _c4err(
"missing terminating }");
5176 _c4dbgp(
"mapjson: finish");
5182 template<
class EventHandler>
5183 void ParseEngine<EventHandler>::_handle_seq_imap()
5186 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5188 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP), m_evt_handler->m_curr->pos);
5189 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5190 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL), m_evt_handler->m_curr->pos);
5191 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL), m_evt_handler->m_curr->pos);
5192 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5194 _handle_flow_skip_whitespace();
5195 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5201 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
5202 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5203 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5204 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5205 const char first = rem.str[0];
5206 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
5210 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
5211 sc = _scan_scalar_squot();
5212 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5213 _handle_annotations_before_blck_val_scalar();
5214 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5216 goto seqimap_finish;
5218 else if(first ==
'"')
5220 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
5221 sc = _scan_scalar_dquot();
5222 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5223 _handle_annotations_before_blck_val_scalar();
5224 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5226 goto seqimap_finish;
5229 else if(_scan_scalar_plain_map_flow(&sc))
5231 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
5232 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5233 _handle_annotations_before_blck_val_scalar();
5234 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5236 goto seqimap_finish;
5238 else if(first ==
'[')
5240 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
5242 _handle_annotations_before_blck_val_scalar();
5243 m_evt_handler->begin_seq_val_flow();
5245 _set_indentation(m_evt_handler->m_parent->indref);
5246 _line_progressed(1);
5247 goto seqimap_finish;
5249 else if(first ==
'{')
5251 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
5253 _handle_annotations_before_blck_val_scalar();
5254 m_evt_handler->begin_map_val_flow();
5256 _set_indentation(m_evt_handler->m_parent->indref);
5257 _line_progressed(1);
5258 goto seqimap_finish;
5260 else if(first ==
',' || first ==
']')
5262 _c4dbgp(
"seqimap[RVAL]: finish without val.");
5263 _handle_annotations_before_blck_val_scalar();
5264 m_evt_handler->set_val_scalar_plain_empty();
5266 goto seqimap_finish;
5268 else if(first ==
'*')
5270 csubstr ref = _scan_ref_seq();
5271 _c4dbgpf(
"seqimap[RVAL]: ref! {}", _prs(ref));
5272 _handle_valref(ref);
5275 else if(first ==
'&')
5277 csubstr anchor = _scan_anchor();
5278 _c4dbgpf(
"seqimap[RVAL]: anchor! {}", _prs(anchor));
5279 _add_annotation(&m_pending_anchors, anchor);
5281 else if(first ==
'!')
5283 csubstr tag = _scan_tag();
5284 _c4dbgpf(
"seqimap[RVAL]: tag! {}", _prs(tag));
5285 _add_annotation(&m_pending_tags, tag);
5292 else if(has_any(
RNXT))
5294 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5295 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5296 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5297 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5298 const char first = rem.str[0];
5299 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
5300 if(first ==
',' || first ==
']')
5304 _c4dbgp(
"seqimap: done");
5306 goto seqimap_finish;
5313 else if(has_any(
QMRK))
5315 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK), m_evt_handler->m_curr->pos);
5316 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5317 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5319 const char first = rem.str[0];
5320 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
5324 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
5325 sc = _scan_scalar_squot();
5326 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5327 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5331 else if(first ==
'"')
5333 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
5334 sc = _scan_scalar_dquot();
5335 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5336 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5341 else if(_scan_scalar_plain_map_flow(&sc))
5343 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
5344 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5345 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5349 else if(first ==
'[')
5351 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
5353 m_evt_handler->begin_seq_key_flow();
5355 _set_indentation(m_evt_handler->m_parent->indref);
5356 _line_progressed(1);
5357 goto seqimap_finish;
5359 else if(first ==
'{')
5361 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
5363 m_evt_handler->begin_map_key_flow();
5365 _set_indentation(m_evt_handler->m_parent->indref);
5366 _line_progressed(1);
5367 goto seqimap_finish;
5369 else if(first ==
',' || first ==
']')
5371 _c4dbgp(
"seqimap[QMRK]: finish without key.");
5372 m_evt_handler->set_key_scalar_plain_empty();
5373 m_evt_handler->set_val_scalar_plain_empty();
5375 goto seqimap_finish;
5377 else if(first ==
'&')
5379 csubstr anchor = _scan_anchor();
5380 _c4dbgp(
"seqimap[QMRK]: anchor!");
5381 m_evt_handler->set_key_anchor(anchor);
5383 else if(first ==
'*')
5385 csubstr ref = _scan_ref_seq();
5386 _c4dbgp(
"seqimap[QMRK]: ref!");
5387 _handle_keyref(ref);
5395 else if(has_any(
RKCL))
5397 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5398 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5399 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL), m_evt_handler->m_curr->pos);
5401 const char first = rem.str[0];
5402 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
5405 _c4dbgp(
"seqimap[RKCL]: found ':'");
5407 _line_progressed(1);
5410 else if(first ==
',' || first ==
']')
5412 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
5413 m_evt_handler->set_val_scalar_plain_empty();
5415 goto seqimap_finish;
5424 _c4dbgt(
"seqimap: go again", 0);
5425 if(_finished_line())
5427 if(C4_LIKELY(!_finished_file()))
5441 _c4dbgp(
"seqimap: finish");
5447 template<
class EventHandler>
5448 void ParseEngine<EventHandler>::_handle_seq_flow()
5451 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5453 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5454 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
5455 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5456 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5457 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
5458 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
5460 if(m_evt_handler->m_curr->at_line_beginning())
5462 _handle_flow_line_beginning();
5465 _handle_flow_skip_whitespace();
5466 if(!m_evt_handler->m_curr->line_contents.rem.len)
5471 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5472 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5476 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5477 sc = _scan_scalar_squot();
5478 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5479 _handle_annotations_before_blck_val_scalar();
5480 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5482 _mark_seqflow_val_end();
5484 else if(first ==
'"')
5486 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5487 sc = _scan_scalar_dquot();
5488 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5489 _handle_annotations_before_blck_val_scalar();
5490 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5492 _mark_seqflow_val_end();
5495 else if(_scan_scalar_plain_seq_flow(&sc))
5497 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5498 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5499 _handle_annotations_before_blck_val_scalar();
5500 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5502 _mark_seqflow_val_end();
5504 else if(first ==
'[')
5506 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5508 _handle_annotations_before_blck_val_scalar();
5509 m_evt_handler->begin_seq_val_flow();
5510 _set_indentation(m_evt_handler->m_parent->indref);
5512 _line_progressed(1);
5514 else if(first ==
'{')
5516 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5518 _handle_annotations_before_blck_val_scalar();
5519 m_evt_handler->begin_map_val_flow();
5520 _set_indentation(m_evt_handler->m_parent->indref);
5522 _line_progressed(1);
5523 goto seqflow_finish;
5525 else if(first ==
']')
5527 _c4dbgp(
"seqflow[RVAL]: end!");
5528 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5530 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5531 _handle_annotations_before_blck_val_scalar();
5532 m_evt_handler->set_val_scalar_plain_empty();
5534 _line_progressed(1);
5536 goto seqflow_finish;
5538 else if(first ==
'*')
5540 csubstr ref = _scan_ref_seq();
5541 _c4dbgpf(
"seqflow[RVAL]: ref! {}", _prs(ref));
5542 _handle_valref(ref);
5545 else if(first ==
'&')
5547 csubstr anchor = _scan_anchor();
5548 _c4dbgpf(
"seqflow[RVAL]: anchor! {}", _prs(anchor));
5549 _add_annotation(&m_pending_anchors, anchor);
5551 else if(first ==
'!')
5553 csubstr tag = _scan_tag();
5554 _c4dbgpf(
"seqflow[RVAL]: tag! {}", _prs(tag));
5555 _add_annotation(&m_pending_tags, tag);
5557 else if(first ==
':')
5559 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5561 m_evt_handler->begin_map_val_flow();
5562 _set_indentation(m_evt_handler->m_parent->indref);
5563 _handle_annotations_before_blck_key_scalar();
5564 m_evt_handler->set_key_scalar_plain_empty();
5566 _line_progressed(1);
5567 goto seqflow_finish;
5569 else if(first ==
'?')
5571 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5573 m_evt_handler->begin_map_val_flow();
5574 _set_indentation(m_evt_handler->m_parent->indref);
5576 _line_progressed(1);
5577 _maybe_skip_whitespace_tokens();
5578 goto seqflow_finish;
5580 else if(first ==
',')
5582 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5584 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5585 _handle_annotations_before_blck_val_scalar();
5586 m_evt_handler->set_val_scalar_plain_empty();
5588 _mark_seqflow_val_end();
5602 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5603 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5604 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5607 _c4dbgp(
"seqflow[RNXT]: expect next val");
5609 m_evt_handler->add_sibling();
5610 _line_progressed(1);
5611 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5613 _c4err(
"parse error: invalid comment after comma");
5615 _mark_seqflow_val_end();
5617 else if(first ==
']')
5619 _c4dbgp(
"seqflow[RNXT]: end!");
5620 _line_progressed(1);
5622 goto seqflow_finish;
5624 else if(first ==
':')
5626 _c4dbgpf(
"seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5627 if(m_prev_val_end !=
NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5629 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5630 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5631 _set_indentation(m_evt_handler->m_parent->indref);
5632 _line_progressed(1);
5634 goto seqflow_finish;
5648 _c4dbgt(
"seqflow: go again", 0);
5649 if(_finished_line())
5651 if(C4_LIKELY(!_finished_file()))
5659 _c4err(
"missing terminating ]");
5665 _c4dbgp(
"seqflow: finish");
5671 template<
class EventHandler>
5672 void ParseEngine<EventHandler>::_handle_map_flow()
5675 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5677 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5678 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5679 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
5680 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
5682 if(m_evt_handler->m_curr->at_line_beginning())
5684 _handle_flow_line_beginning();
5687 _handle_flow_skip_whitespace();
5688 if(!m_evt_handler->m_curr->line_contents.rem.len)
5693 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5694 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5695 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5696 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5697 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5698 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5702 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5703 sc = _scan_scalar_squot();
5704 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5705 _handle_annotations_before_blck_key_scalar();
5706 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5709 else if(first ==
'"')
5711 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5712 sc = _scan_scalar_dquot();
5713 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5714 _handle_annotations_before_blck_key_scalar();
5715 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5719 else if(_scan_scalar_plain_map_flow(&sc))
5721 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5722 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5723 _handle_annotations_before_blck_key_scalar();
5724 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5727 else if(first ==
'?')
5729 _c4dbgp(
"mapflow[RKEY]: explicit key");
5730 _handle_annotations_before_blck_key_scalar();
5732 _line_progressed(1);
5733 _maybe_skip_whitespace_tokens();
5735 else if(first ==
':')
5737 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5738 _handle_annotations_before_blck_key_scalar();
5739 m_evt_handler->set_key_scalar_plain_empty();
5741 _line_progressed(1);
5742 _maybe_skip_whitespace_tokens();
5744 else if(first ==
',')
5746 _c4dbgp(
"mapflow[RKEY]: comma!");
5747 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5748 _c4err(
"unexpected comma");
5752 else if(first ==
'}')
5754 _c4dbgp(
"mapflow[RKEY]: end!");
5755 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5756 _line_progressed(1);
5758 goto mapflow_finish;
5760 else if(first ==
'&')
5762 csubstr anchor = _scan_anchor();
5763 _c4dbgpf(
"mapflow[RKEY]: key anchor! {}", _prs(anchor));
5764 _add_annotation(&m_pending_anchors, anchor);
5766 else if(first ==
'!')
5768 csubstr tag = _scan_tag();
5769 _c4dbgpf(
"mapflow[RKEY]: tag! {}", _prs(tag));
5770 _add_annotation(&m_pending_tags, tag);
5772 else if(first ==
'*')
5774 csubstr ref = _scan_ref_map();
5775 _c4dbgpf(
"mapflow[RKEY]: key ref! {}", _prs(ref));
5776 _handle_keyref(ref);
5779 else if(first ==
'[')
5784 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5785 _handle_annotations_before_blck_key_scalar();
5787 m_evt_handler->begin_seq_key_flow();
5789 _set_indentation(m_evt_handler->m_parent->indref);
5790 _line_progressed(1);
5791 goto mapflow_finish;
5793 else if(first ==
'{')
5798 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5799 _handle_annotations_before_blck_key_scalar();
5801 m_evt_handler->begin_map_key_flow();
5803 _set_indentation(m_evt_handler->m_parent->indref);
5804 _line_progressed(1);
5812 else if(has_any(
RKCL))
5814 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5815 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5816 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5817 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5818 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5819 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5822 _c4dbgp(
"mapflow[RKCL]: found the colon");
5824 _line_progressed(1);
5826 else if(first ==
'}')
5828 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5830 m_evt_handler->set_val_scalar_plain_empty();
5831 _line_progressed(1);
5833 goto mapflow_finish;
5835 else if(first ==
',')
5837 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5838 m_evt_handler->set_val_scalar_plain_empty();
5839 m_evt_handler->add_sibling();
5841 _line_progressed(1);
5842 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5844 _c4err(
"parse error: invalid comment after comma");
5852 else if(has_any(
RVAL))
5854 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5855 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5856 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5857 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5858 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5859 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5863 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5864 sc = _scan_scalar_squot();
5865 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5866 _handle_annotations_before_blck_val_scalar();
5867 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5870 else if(first ==
'"')
5872 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5873 sc = _scan_scalar_dquot();
5874 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5875 _handle_annotations_before_blck_val_scalar();
5876 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5880 else if(_scan_scalar_plain_map_flow(&sc))
5882 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5883 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5884 _handle_annotations_before_blck_val_scalar();
5885 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5888 else if(first ==
'[')
5890 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5892 _handle_annotations_before_blck_val_scalar();
5893 m_evt_handler->begin_seq_val_flow();
5894 _set_indentation(m_evt_handler->m_parent->indref);
5896 _line_progressed(1);
5897 goto mapflow_finish;
5899 else if(first ==
'{')
5901 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5903 _handle_annotations_before_blck_val_scalar();
5904 m_evt_handler->begin_map_val_flow();
5905 _set_indentation(m_evt_handler->m_parent->indref);
5907 _line_progressed(1);
5910 else if(first ==
'}')
5912 _c4dbgp(
"mapflow[RVAL]: end!");
5913 _handle_annotations_before_blck_val_scalar();
5914 m_evt_handler->set_val_scalar_plain_empty();
5915 _line_progressed(1);
5917 goto mapflow_finish;
5919 else if(first ==
',')
5921 _c4dbgp(
"mapflow[RVAL]: empty val!");
5922 _handle_annotations_before_blck_val_scalar();
5923 m_evt_handler->set_val_scalar_plain_empty();
5927 else if(first ==
'*')
5929 csubstr ref = _scan_ref_map();
5930 _c4dbgpf(
"mapflow[RVAL]: key ref! {}", _prs(ref));
5931 _handle_valref(ref);
5934 else if(first ==
'&')
5936 csubstr anchor = _scan_anchor();
5937 _c4dbgpf(
"mapflow[RVAL]: key anchor! {}", _prs(anchor));
5938 _add_annotation(&m_pending_anchors, anchor);
5940 else if(first ==
'!')
5942 csubstr tag = _scan_tag();
5943 _c4dbgpf(
"mapflow[RVAL]: tag! {}", _prs(tag));
5944 _add_annotation(&m_pending_tags, tag);
5951 else if(has_any(
RNXT))
5953 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5954 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5955 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5956 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5957 _c4dbgpf(
"mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5958 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
','))
5960 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5961 m_evt_handler->add_sibling();
5963 _line_progressed(1);
5964 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5966 _c4err(
"parse error: invalid comment after comma");
5969 else if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'}'))
5971 _c4dbgp(
"mapflow[RNXT]: end!");
5972 _line_progressed(1);
5974 goto mapflow_finish;
5981 else if(has_any(
QMRK))
5983 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5984 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5985 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5986 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5987 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5988 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5992 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5993 sc = _scan_scalar_squot();
5994 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5995 _handle_annotations_before_blck_key_scalar();
5996 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5999 else if(first ==
'"')
6001 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
6002 sc = _scan_scalar_dquot();
6003 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6004 _handle_annotations_before_blck_key_scalar();
6005 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6009 else if(_scan_scalar_plain_map_flow(&sc))
6011 _c4dbgp(
"mapflow[QMRK]: plain scalar");
6012 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6013 _handle_annotations_before_blck_key_scalar();
6014 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6017 else if(first ==
':')
6019 _c4dbgp(
"mapflow[QMRK]: setting empty key");
6020 _handle_annotations_before_blck_key_scalar();
6021 m_evt_handler->set_key_scalar_plain_empty();
6023 _line_progressed(1);
6024 _maybe_skip_whitespace_tokens();
6026 else if(first ==
'}')
6028 _c4dbgp(
"mapflow[QMRK]: end!");
6029 _handle_annotations_before_blck_key_scalar();
6030 m_evt_handler->set_key_scalar_plain_empty();
6031 m_evt_handler->set_val_scalar_plain_empty();
6033 _line_progressed(1);
6034 goto mapflow_finish;
6036 else if(first ==
',')
6038 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
6039 _handle_annotations_before_blck_key_scalar();
6040 m_evt_handler->set_key_scalar_plain_empty();
6041 m_evt_handler->set_val_scalar_plain_empty();
6044 else if(first ==
'&')
6046 csubstr anchor = _scan_anchor();
6047 _c4dbgpf(
"mapflow[QMRK]: key anchor! {}", _prs(anchor));
6048 _add_annotation(&m_pending_anchors, anchor);
6050 else if(first ==
'*')
6052 csubstr ref = _scan_ref_map();
6053 _c4dbgpf(
"mapflow[QMRK]: key ref! {}", _prs(ref));
6054 _handle_keyref(ref);
6057 else if(first ==
'[')
6062 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
6064 _handle_annotations_before_blck_key_scalar();
6065 m_evt_handler->begin_seq_key_flow();
6067 _set_indentation(m_evt_handler->m_parent->indref);
6068 _line_progressed(1);
6069 goto mapflow_finish;
6071 else if(first ==
'{')
6076 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
6078 _handle_annotations_before_blck_key_scalar();
6079 m_evt_handler->begin_map_key_flow();
6080 _set_indentation(m_evt_handler->m_parent->indref);
6082 _line_progressed(1);
6085 else if(first ==
'!')
6087 csubstr tag = _scan_tag();
6088 _c4dbgpf(
"mapflow[QMRK]: tag! {}", _prs(tag));
6089 _add_annotation(&m_pending_tags, tag);
6098 _c4dbgt(
"mapflow: go again", 0);
6099 if(_finished_line())
6101 if(C4_LIKELY(!_finished_file()))
6109 _c4err(
"missing terminating }");
6115 _c4dbgp(
"mapflow: finish");
6121 template<
class EventHandler>
6122 void ParseEngine<EventHandler>::_handle_seq_block()
6125 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6127 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
6128 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6129 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
6130 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
6132 _maybe_skip_comment_strict();
6133 if(!m_evt_handler->m_curr->line_contents.rem.len)
6138 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6139 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6140 if(m_evt_handler->m_curr->at_line_beginning())
6142 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6143 if(m_evt_handler->m_curr->indentation_ge_extra())
6145 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6146 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6147 if(!m_evt_handler->m_curr->line_contents.rem.len)
6150 else if(m_evt_handler->m_curr->indentation_lt_extra())
6152 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6153 if(m_evt_handler->m_curr->indentation_eq())
6155 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6156 _handle_annotations_before_blck_val_scalar();
6157 m_evt_handler->set_val_scalar_plain_empty();
6163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6164 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
6165 _handle_indentation_pop_from_block_seq();
6166 goto seqblck_finish;
6169 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6171 _c4dbgp(
"seqblck[RVAL]: empty line!");
6172 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6176 _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6177 const size_t startmark = _handle_block_skip_leading_whitespace();
6178 _c4dbgpf(
"seqblck[RVAL]: startmark={}", startmark);
6179 if(startmark ==
npos)
6181 _c4dbgp(
"seqblck[RVAL]: whitespace only");
6184 const size_t tabmark = _handle_block_get_whitespace_mark();
6185 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6186 _c4dbgpf(
"seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6187 const size_t startline = m_evt_handler->m_curr->pos.line;
6188 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6192 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
6193 sc = _scan_scalar_squot();
6194 if(!_maybe_scan_following_colon())
6196 _c4dbgp(
"seqblck[RVAL]: set as val");
6197 _handle_annotations_before_blck_val_scalar();
6198 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6199 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6204 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6205 _handle_block_check_leading_tabs(startmark);
6207 _handle_annotations_before_start_mapblck(startline);
6209 m_evt_handler->begin_map_val_block();
6210 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6211 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6212 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6214 _maybe_skip_whitespace_tokens();
6215 goto seqblck_finish;
6218 else if(first ==
'"')
6220 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
6221 sc = _scan_scalar_dquot();
6222 if(!_maybe_scan_following_colon())
6224 _c4dbgp(
"seqblck[RVAL]: set as val");
6225 _handle_annotations_before_blck_val_scalar();
6226 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6227 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6232 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6234 _handle_block_check_leading_tabs(startmark);
6235 _handle_annotations_before_start_mapblck(startline);
6237 m_evt_handler->begin_map_val_block();
6238 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6239 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6240 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6242 _maybe_skip_whitespace_tokens();
6243 goto seqblck_finish;
6249 else if(first ==
'|')
6251 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
6253 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6254 _handle_annotations_before_blck_val_scalar();
6255 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6256 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6259 else if(first ==
'>')
6261 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
6263 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6264 _handle_annotations_before_blck_val_scalar();
6265 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6266 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6269 else if(_scan_scalar_plain_seq_blck(&sc))
6271 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
6272 if(!_maybe_scan_following_colon())
6274 _c4dbgp(
"seqblck[RVAL]: set as val");
6275 _handle_annotations_before_blck_val_scalar();
6276 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6277 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6282 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6283 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6284 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6285 _handle_block_check_leading_tabs(startmark, tabmark);
6287 _handle_annotations_before_start_mapblck(startline);
6289 m_evt_handler->begin_map_val_block();
6290 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6291 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6292 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6294 _maybe_skip_whitespace_tokens();
6295 goto seqblck_finish;
6298 else if(first ==
'[')
6300 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
6302 _handle_annotations_before_blck_val_scalar();
6303 m_evt_handler->begin_seq_val_flow();
6305 _line_progressed(1);
6306 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6307 goto seqblck_finish;
6309 else if(first ==
'{')
6311 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
6313 _handle_annotations_before_blck_val_scalar();
6314 m_evt_handler->begin_map_val_flow();
6316 _line_progressed(1);
6317 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6318 goto seqblck_finish;
6320 else if(first ==
'-')
6322 _c4dbgp(
"seqblck[RVAL]: dash");
6323 _handle_block_check_leading_tabs(startmark);
6324 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6325 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6326 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
6327 _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6329 _handle_annotations_before_blck_val_scalar();
6330 m_evt_handler->begin_seq_val_block();
6332 _set_indentation(startindent);
6334 _line_progressed(1);
6336 else if(first ==
':')
6338 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
6340 _handle_annotations_before_start_mapblck(startline);
6342 m_evt_handler->begin_map_val_block();
6343 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6344 m_evt_handler->set_key_scalar_plain_empty();
6346 _line_progressed(1);
6347 _maybe_skip_whitespace_tokens();
6348 goto seqblck_finish;
6350 else if(first ==
'&')
6352 const csubstr anchor = _scan_anchor();
6353 _c4dbgpf(
"seqblck[RVAL]: anchor! {}", _prs(anchor));
6356 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6358 else if(first ==
'*')
6360 csubstr ref = _scan_ref_seq();
6361 _c4dbgpf(
"seqblck[RVAL]: ref! {}", _prs(ref));
6362 if(!_maybe_scan_following_colon())
6364 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
6365 _handle_valref(ref);
6370 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
6372 _handle_annotations_before_start_mapblck(startline);
6373 m_evt_handler->begin_map_val_block();
6374 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6375 _handle_keyref(ref);
6377 _set_indentation(startindent);
6378 _maybe_skip_whitespace_tokens();
6379 goto seqblck_finish;
6382 else if(first ==
'!')
6384 csubstr tag = _scan_tag();
6385 _c4dbgpf(
"seqblck[RVAL]: val tag! {}", _prs(tag));
6388 _add_annotation(&m_pending_tags, tag, startindent, startline);
6390 else if(first ==
'?')
6392 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
6394 m_evt_handler->begin_map_val_block();
6396 _set_indentation(startindent);
6397 _line_progressed(1);
6398 _maybe_skipchars(
' ');
6399 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6401 _c4dbgp(
"seqblck[RVAL]: seqblck starts after ?");
6403 m_evt_handler->begin_seq_key_block();
6405 _save_indentation();
6406 _line_progressed(1);
6407 _maybe_skipchars(
' ');
6409 goto seqblck_finish;
6418 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6419 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6423 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6424 if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6426 _c4dbgp(
"seqblck[RNXT]: at line begin");
6427 if(m_evt_handler->m_curr->indentation_ge())
6429 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6430 _line_progressed(m_evt_handler->m_curr->indref);
6431 if(!m_evt_handler->m_curr->line_contents.rem.len)
6434 else if(m_evt_handler->m_curr->indentation_lt())
6436 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
6437 _handle_indentation_pop_from_block_seq();
6440 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
6441 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6442 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6443 if(!m_evt_handler->m_curr->line_contents.rem.len)
6448 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
6449 goto seqblck_finish;
6452 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6454 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6455 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6456 if(!m_evt_handler->m_curr->line_contents.rem.len)
6462 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6463 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
6470 if(!m_evt_handler->m_curr->line_contents.rem.len)
6472 _c4dbgp(
"seqblck[RNXT]: again");
6480 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6481 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6482 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6485 if(m_evt_handler->m_curr->indref > 0
6486 || m_evt_handler->m_curr->line_contents.indentation > 0
6487 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6489 if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6491 _c4dbgp(
"seqblck[RNXT]: expect next val");
6493 m_evt_handler->add_sibling();
6494 _line_progressed(1);
6503 _c4dbgp(
"seqblck[RNXT]: start doc");
6504 _start_doc_suddenly();
6505 _line_progressed(3);
6506 _maybe_skip_whitespace_tokens();
6507 goto seqblck_finish;
6510 else if(first ==
':')
6516 if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags &
RMAP)))
6518 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6519 m_evt_handler->end_seq_block();
6520 goto seqblck_finish;
6527 else if(first ==
'.')
6529 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6530 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6532 _c4dbgp(
"seqblck[RNXT]: end doc");
6533 _end_doc_suddenly();
6534 _line_progressed(3);
6535 _maybe_skip_whitespace_tokens();
6536 _check_doc_end_tokens();
6537 goto seqblck_finish;
6548 _print_state_stack();
6550 if(m_evt_handler->m_parent
6551 && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent)
6552 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6554 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6555 _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6556 _handle_indentation_pop(m_evt_handler->m_parent);
6557 _RYML_ASSERT_PARSE_(this->callbacks(), has_all(
RMAP|
RBLCK), m_evt_handler->m_curr->pos);
6558 m_evt_handler->add_sibling();
6560 goto seqblck_finish;
6562 else if(first ==
'\t')
6564 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
'\t');
6567 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6576 _c4dbgt(
"seqblck: go again", 0);
6577 if(_finished_line())
6582 if(_finished_file())
6584 _c4dbgp(
"seqblck: finish!");
6586 goto seqblck_finish;
6593 _c4dbgp(
"seqblck: finish");
6599 template<
class EventHandler>
6600 void ParseEngine<EventHandler>::_handle_map_block()
6603 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6607 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
6608 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6609 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
6610 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
6612 _maybe_skip_comment();
6613 if(!m_evt_handler->m_curr->line_contents.rem.len)
6618 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6619 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6620 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6621 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6625 if(m_evt_handler->m_curr->at_line_beginning())
6627 if(m_evt_handler->m_curr->indentation_eq())
6629 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6630 _line_progressed(m_evt_handler->m_curr->indref);
6631 if(!m_evt_handler->m_curr->line_contents.rem.len)
6634 else if(m_evt_handler->m_curr->indentation_lt())
6636 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6637 _handle_indentation_pop_from_block_map();
6638 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6641 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6642 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY), m_evt_handler->m_curr->pos);
6643 if(!m_evt_handler->m_curr->line_contents.rem.len)
6648 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6649 goto mapblck_finish;
6654 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6655 _c4err(
"invalid indentation");
6661 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6662 const size_t startline = m_evt_handler->m_curr->pos.line;
6663 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6664 _c4dbgpf(
"mapblck[RKEY]: '{}'", _c4prc(first));
6668 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6669 sc = _scan_scalar_squot();
6670 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6671 _handle_annotations_before_blck_key_scalar();
6672 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6674 if(!_maybe_scan_following_colon())
6675 _c4err(
"could not find ':' colon after key");
6677 _maybe_skip_whitespace_tokens();
6679 else if(first ==
'"')
6681 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6682 sc = _scan_scalar_dquot();
6683 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6684 _handle_annotations_before_blck_key_scalar();
6685 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6687 if(!_maybe_scan_following_colon())
6688 _c4err(
"could not find ':' colon after key");
6690 _maybe_skip_whitespace_tokens();
6694 else if(C4_UNLIKELY(first ==
'|'))
6696 _c4err(
"block map: literal keys must be enclosed in '?'");
6698 else if(C4_UNLIKELY(first ==
'>'))
6700 _c4err(
"block map: folded keys must be enclosed in '?'");
6702 else if(_scan_scalar_plain_map_blck(&sc))
6704 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6705 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6706 _handle_annotations_before_blck_key_scalar();
6707 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6709 if(!_maybe_scan_following_colon())
6710 _c4err(
"could not find ':' colon after key");
6712 _maybe_skip_whitespace_tokens();
6714 else if(first ==
'?')
6716 _c4dbgp(
"mapblck[RKEY]: key token!");
6718 _line_progressed(1);
6719 _maybe_skipchars(
' ');
6720 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6722 _c4dbgp(
"mapblck[RKEY]: seqblck starts after ?");
6724 m_evt_handler->begin_seq_key_block();
6726 _save_indentation();
6727 _line_progressed(1);
6728 _maybe_skipchars(
' ');
6729 goto mapblck_finish;
6733 else if(first ==
':')
6735 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6736 _handle_annotations_before_blck_key_scalar();
6737 m_evt_handler->set_key_scalar_plain_empty();
6739 _line_progressed(1);
6741 _maybe_skip_whitespace_tokens();
6743 else if(first ==
'*')
6745 csubstr ref = _scan_ref_map();
6746 _c4dbgpf(
"mapblck[RKEY]: key ref! {}", _prs(ref));
6747 _handle_keyref(ref);
6749 if(!_maybe_scan_following_colon())
6750 _c4err(
"could not find ':' colon after key");
6752 _maybe_skip_whitespace_tokens();
6754 else if(first ==
'&')
6756 csubstr anchor = _scan_anchor();
6757 _c4dbgpf(
"mapblck[RKEY]: key anchor! {}", _prs(anchor));
6758 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6760 else if(first ==
'!')
6762 csubstr tag = _scan_tag();
6763 _c4dbgpf(
"mapblck[RKEY]: key tag! {}", _prs(tag));
6764 _add_annotation(&m_pending_tags, tag, startindent, startline);
6766 else if(first ==
'[')
6771 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6772 _handle_annotations_before_blck_key_scalar();
6773 m_evt_handler->begin_seq_key_flow();
6775 _line_progressed(1);
6776 _set_indentation(startindent);
6777 goto mapblck_finish;
6779 else if(first ==
'{')
6784 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6785 _handle_annotations_before_blck_key_scalar();
6786 m_evt_handler->begin_map_key_flow();
6788 _line_progressed(1);
6789 _set_indentation(startindent);
6790 goto mapblck_finish;
6792 else if(first ==
'-')
6794 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6795 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6797 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6798 _start_doc_suddenly();
6799 _line_progressed(3);
6800 _maybe_skip_whitespace_tokens();
6801 goto mapblck_finish;
6808 else if(first ==
'.')
6810 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6811 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6813 _c4dbgp(
"mapblck[RKEY]: end doc");
6814 _end_doc_suddenly();
6815 _line_progressed(3);
6816 _maybe_skip_whitespace_tokens();
6817 _check_doc_end_tokens();
6818 goto mapblck_finish;
6830 else if(has_any(
RVAL))
6832 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6833 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6834 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6835 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6839 if(m_evt_handler->m_curr->at_line_beginning())
6841 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6842 m_evt_handler->m_curr->more_indented =
false;
6843 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6844 if(m_evt_handler->m_curr->indentation_eq_extra())
6846 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6847 _line_progressed(m_evt_handler->m_curr->indref + 1);
6848 if(!m_evt_handler->m_curr->line_contents.rem.len)
6851 else if(m_evt_handler->m_curr->indentation_gt_extra())
6853 _c4dbgp(
"mapblck[RVAL]: more indented!");
6854 m_evt_handler->m_curr->more_indented =
true;
6855 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6856 if(!m_evt_handler->m_curr->line_contents.rem.len)
6859 else if(m_evt_handler->m_curr->indentation_lt_extra())
6861 if(m_evt_handler->m_curr->indentation_eq())
6863 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6865 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6867 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6868 _handle_annotations_before_blck_val_scalar();
6869 m_evt_handler->set_val_scalar_plain_empty();
6876 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RKEY!");
6877 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6878 _handle_indentation_pop_from_block_map();
6881 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6882 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6885 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6886 m_evt_handler->add_sibling();
6893 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6894 goto mapblck_finish;
6899 const size_t startcol = _handle_block_skip_leading_whitespace();
6900 if(startcol ==
npos)
6902 _c4dbgp(
"mapblck[RVAL]: whitespace only");
6905 const size_t tabmark = _handle_block_get_whitespace_mark();
6909 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6910 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6911 const size_t startline = m_evt_handler->m_curr->pos.line;
6912 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6913 _c4dbgpf(
"mapblck[RVAL]: '{}'", _c4prc(first));
6917 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6918 sc = _scan_scalar_squot();
6919 if(!_maybe_scan_following_colon())
6921 _c4dbgp(
"mapblck[RVAL]: set as val");
6922 _handle_annotations_before_blck_val_scalar();
6923 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6924 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6930 _c4assert(startindent > m_evt_handler->m_curr->indref);
6931 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6932 _handle_block_check_leading_tabs(startcol);
6933 _handle_annotations_before_start_mapblck(startline);
6936 m_evt_handler->begin_map_val_block();
6937 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6938 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6939 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6940 _maybe_skip_whitespace_tokens();
6945 else if(first ==
'"')
6947 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6948 sc = _scan_scalar_dquot();
6949 if(!_maybe_scan_following_colon())
6951 _c4dbgp(
"mapblck[RVAL]: set as val");
6952 _handle_annotations_before_blck_val_scalar();
6953 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6954 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6960 _c4assert(startindent > m_evt_handler->m_curr->indref);
6961 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6962 _handle_block_check_leading_tabs(startcol);
6963 _handle_annotations_before_start_mapblck(startline);
6966 m_evt_handler->begin_map_val_block();
6967 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6968 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6969 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6970 _maybe_skip_whitespace_tokens();
6977 else if(first ==
'|')
6979 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6981 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6982 _handle_annotations_before_blck_val_scalar();
6983 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6984 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6987 else if(first ==
'>')
6989 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6991 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6992 _handle_annotations_before_blck_val_scalar();
6993 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6994 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6997 else if(_scan_scalar_plain_map_blck(&sc))
6999 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
7000 if(!_maybe_scan_following_colon())
7002 _c4dbgp(
"mapblck[RVAL]: set as val");
7003 _handle_annotations_before_blck_val_scalar();
7004 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
7005 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7011 _c4assert(startindent > m_evt_handler->m_curr->indref);
7012 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7013 _handle_block_check_leading_tabs(startcol, tabmark);
7015 _handle_annotations_before_start_mapblck(startline);
7017 m_evt_handler->begin_map_val_block();
7018 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7019 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7020 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7021 _maybe_skip_whitespace_tokens();
7026 else if(first ==
'-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7028 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7030 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
7031 _handle_block_check_leading_tabs(startcol);
7033 _handle_annotations_before_blck_val_scalar();
7034 m_evt_handler->begin_seq_val_block();
7036 _set_indentation(startindent);
7037 _line_progressed(1);
7038 _maybe_skip_whitespace_tokens();
7039 goto mapblck_finish;
7041 else if(first ==
'[')
7043 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
7045 _handle_annotations_before_blck_val_scalar();
7046 m_evt_handler->begin_seq_val_flow();
7048 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7049 _line_progressed(1);
7050 goto mapblck_finish;
7052 else if(first ==
'{')
7054 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
7056 _handle_annotations_before_blck_val_scalar();
7057 m_evt_handler->begin_map_val_flow();
7059 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7060 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7061 _line_progressed(1);
7062 goto mapblck_finish;
7064 else if(first ==
'*')
7066 csubstr ref = _scan_ref_map();
7067 _c4dbgpf(
"mapblck[RVAL]: ref! {}", _prs(ref));
7068 if(_maybe_scan_following_colon())
7070 _c4dbgp(
"mapblck[RVAL]: start child map, block");
7072 _handle_annotations_before_blck_val_scalar();
7073 m_evt_handler->begin_map_val_block();
7074 _handle_keyref(ref);
7075 _set_indentation(startindent);
7081 _c4dbgp(
"mapblck[RVAL]: was val ref");
7082 _handle_valref(ref);
7085 _maybe_skip_whitespace_tokens();
7087 else if(first ==
'&')
7089 csubstr anchor = _scan_anchor();
7090 _c4dbgpf(
"mapblck[RVAL]: anchor! {}", _prs(anchor));
7093 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7095 else if(first ==
'!')
7097 csubstr tag = _scan_tag();
7098 _c4dbgpf(
"mapblck[RVAL]: tag! {}", _prs(tag));
7101 _add_annotation(&m_pending_tags, tag, startindent, startline);
7103 else if(first ==
'?')
7105 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7107 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7109 _handle_annotations_before_blck_val_scalar();
7110 m_evt_handler->begin_map_val_block();
7112 _set_indentation(startindent);
7113 _line_progressed(1);
7114 _maybe_skipchars(
' ');
7115 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7117 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7119 m_evt_handler->begin_seq_key_block();
7121 _save_indentation();
7122 _line_progressed(1);
7123 _maybe_skipchars(
' ');
7124 goto mapblck_finish;
7128 else if(first ==
':')
7130 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7132 _handle_annotations_before_start_mapblck(startline);
7134 m_evt_handler->begin_map_val_block();
7135 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7136 m_evt_handler->set_key_scalar_plain_empty();
7139 _line_progressed(1);
7140 _maybe_skip_whitespace_tokens();
7148 else if(has_any(
RNXT))
7150 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7151 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7152 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7157 if(m_evt_handler->m_curr->at_line_beginning())
7159 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7160 if(m_evt_handler->m_curr->indentation_eq())
7162 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7163 _line_progressed(m_evt_handler->m_curr->indref);
7164 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7165 m_evt_handler->add_sibling();
7169 else if(m_evt_handler->m_curr->indentation_lt())
7171 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
7172 _handle_indentation_pop_from_block_map();
7175 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7178 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7179 m_evt_handler->add_sibling();
7186 goto mapblck_finish;
7192 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
7193 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
7200 if(!m_evt_handler->m_curr->line_contents.rem.len)
7202 _c4dbgp(
"seqblck[RNXT]: again");
7210 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7211 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7212 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
7215 _c4dbgp(
"mapblck[RNXT]: skip spaces");
7216 _maybe_skip_whitespace_tokens();
7223 else if(has_any(
QMRK))
7225 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7226 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7227 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7228 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7229 if(_handle_map_block_qmrk())
7232 goto mapblck_finish;
7234 else if(has_any(
RKCL))
7236 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7237 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7238 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7239 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7240 if(_handle_map_block_rkcl())
7243 goto mapblck_finish;
7247 _c4dbgt(
"mapblck: again", 0);
7248 if(_finished_line())
7252 if(_finished_file())
7254 _c4dbgp(
"mapblck: file finished!");
7256 goto mapblck_finish;
7263 _c4dbgp(
"mapblck: finish");
7270 template<
class EventHandler>
7271 bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7276 if(m_evt_handler->m_curr->at_line_beginning())
7278 _c4dbgpf(
"mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7279 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos, m_evt_handler->m_curr->pos);
7280 if(m_evt_handler->m_curr->indentation_eq_extra())
7282 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7283 _line_progressed(m_evt_handler->m_curr->indref + 1);
7284 if(!m_evt_handler->m_curr->line_contents.rem.len)
7288 else if(m_evt_handler->m_curr->indentation_gt_extra())
7290 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7291 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7292 if(!m_evt_handler->m_curr->line_contents.rem.len)
7297 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7298 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7299 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7300 if(m_evt_handler->m_curr->indentation_eq()
7302 && m_evt_handler->m_curr->line_contents.rem.str[0] !=
'-')
7304 _c4dbgp(
"mapblck[QMRK]: QMRK finished!");
7305 _handle_annotations_before_blck_key_scalar();
7306 m_evt_handler->set_key_scalar_plain_empty();
7310 else if(m_evt_handler->m_curr->indentation_lt())
7312 _c4dbgp(
"mapblck[QMRK]: indentation pop!");
7313 _handle_indentation_pop_from_block_map();
7314 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7317 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7322 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7331 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7332 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7333 const size_t startline = m_evt_handler->m_curr->pos.line;
7334 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7335 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7339 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7340 sc = _scan_scalar_squot();
7341 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7343 if(!_maybe_scan_following_colon())
7345 _c4dbgp(
"mapblck[QMRK]: set as key");
7346 _handle_annotations_before_blck_key_scalar();
7347 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7351 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7352 _handle_annotations_before_start_mapblck_as_key();
7353 m_evt_handler->begin_map_key_block();
7354 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7355 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7356 _maybe_skip_whitespace_tokens();
7357 _set_indentation(startindent);
7362 else if(first ==
'"')
7364 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7365 sc = _scan_scalar_dquot();
7366 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7368 if(!_maybe_scan_following_colon())
7370 _c4dbgp(
"mapblck[QMRK]: set as key");
7371 _handle_annotations_before_blck_key_scalar();
7372 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7376 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7377 _handle_annotations_before_start_mapblck_as_key();
7378 m_evt_handler->begin_map_key_block();
7379 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7380 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7381 _maybe_skip_whitespace_tokens();
7382 _set_indentation(startindent);
7387 else if(first ==
'|')
7389 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7391 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7392 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7393 _handle_annotations_before_blck_key_scalar();
7394 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7397 else if(first ==
'>')
7399 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7401 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7402 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7403 _handle_annotations_before_blck_key_scalar();
7404 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7407 else if(_scan_scalar_plain_map_blck(&sc))
7409 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7410 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7412 if(!_maybe_scan_following_colon())
7414 _c4dbgp(
"mapblck[QMRK]: set as key");
7415 _handle_annotations_before_blck_key_scalar();
7416 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7420 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7421 _handle_annotations_before_start_mapblck_as_key();
7422 m_evt_handler->begin_map_key_block();
7423 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7424 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7425 _maybe_skip_whitespace_tokens();
7426 _set_indentation(startindent);
7431 else if(first ==
':')
7433 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7435 _handle_annotations_before_start_mapblck_as_key();
7436 m_evt_handler->begin_map_key_block();
7437 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7438 m_evt_handler->set_key_scalar_plain_empty();
7439 _line_progressed(1);
7440 _maybe_skip_whitespace_tokens();
7441 _set_indentation(startindent);
7445 else if(first ==
'*')
7447 csubstr ref = _scan_ref_map();
7448 _c4dbgpf(
"mapblck[QMRK]: key ref! {}", _prs(ref));
7450 if(!_maybe_scan_following_colon())
7452 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7453 _handle_keyref(ref);
7457 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7458 _handle_annotations_before_start_mapblck_as_key();
7459 m_evt_handler->begin_map_key_block();
7460 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7461 _handle_keyref(ref);
7462 _set_indentation(startindent);
7466 _maybe_skip_whitespace_tokens();
7468 else if(first ==
'&')
7470 csubstr anchor = _scan_anchor();
7471 _c4dbgpf(
"mapblck[QMRK]: key anchor! {}", _prs(anchor));
7472 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7474 else if(first ==
'!')
7476 csubstr tag = _scan_tag();
7477 _c4dbgpf(
"mapblck[QMRK]: key tag! {}", _prs(tag));
7478 _add_annotation(&m_pending_tags, tag, startindent, startline);
7480 else if(first ==
'-')
7482 _c4dbgp(
"mapblck[QMRK]: maybe seq or doc?");
7483 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7485 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7487 _handle_annotations_before_blck_key_scalar();
7488 m_evt_handler->begin_seq_key_block();
7490 _set_indentation(startindent);
7491 _line_progressed(1);
7495 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7496 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7497 _start_doc_suddenly();
7498 _line_progressed(3);
7500 _maybe_skip_whitespace_tokens();
7503 else if(first ==
'[')
7505 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7507 _handle_annotations_before_blck_key_scalar();
7508 m_evt_handler->begin_seq_key_flow();
7510 _set_indentation(m_evt_handler->m_parent->indref + 1);
7511 _line_progressed(1);
7514 else if(first ==
'{')
7516 _c4dbgp(
"mapblck[QMRK]: start child mapflow (!)");
7518 _handle_annotations_before_blck_key_scalar();
7519 m_evt_handler->begin_map_key_flow();
7521 _set_indentation(m_evt_handler->m_parent->indref + 1);
7522 _line_progressed(1);
7525 else if(first ==
'?')
7527 _c4dbgpf(
"mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7528 _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7529 _c4dbgp(
"mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7531 _handle_annotations_before_blck_key_scalar();
7532 m_evt_handler->begin_map_key_block();
7534 _set_indentation(startindent);
7536 _line_progressed(1);
7537 _maybe_skipchars(
' ');
7538 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7540 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7542 m_evt_handler->begin_seq_key_block();
7544 _save_indentation();
7545 _line_progressed(1);
7546 _maybe_skipchars(
' ');
7561 template<
class EventHandler>
7562 bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7567 if(m_evt_handler->m_curr->at_line_beginning())
7569 if(m_evt_handler->m_curr->indentation_eq())
7571 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7572 _line_progressed(m_evt_handler->m_curr->indref);
7573 if(!m_evt_handler->m_curr->line_contents.rem.len)
7576 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7578 _c4err(
"invalid indentation");
7581 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7582 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
7585 _c4dbgp(
"mapblck[RKCL]: found the colon");
7586 _line_progressed(1);
7587 _maybe_skipchars(
' ');
7588 #if defined(__GNUC__) && (__GNUC__ >= 12) \
7589 && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))
7590 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7593 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7600 _c4dbgp(
"mapblck[RKCL]: start val seqblck");
7602 m_evt_handler->begin_seq_val_block();
7604 _save_indentation();
7605 _line_progressed(1);
7606 _maybe_skipchars(
' ');
7610 else if(first ==
'?')
7612 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
7613 m_evt_handler->set_val_scalar_plain_empty();
7614 m_evt_handler->add_sibling();
7616 _line_progressed(1);
7617 _maybe_skipchars(
' ');
7618 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7620 _c4dbgp(
"mapblck[RKCL]: seqblck starts after ?");
7622 m_evt_handler->begin_seq_key_block();
7624 _save_indentation();
7625 _line_progressed(1);
7626 _maybe_skipchars(
' ');
7630 else if(first ==
'-')
7632 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7634 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7635 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7636 _start_doc_suddenly();
7637 _line_progressed(3);
7638 _maybe_skip_whitespace_tokens();
7646 else if(first ==
'.')
7648 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
7649 csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7650 if(rs ==
".." || rs.begins_with(
".. "))
7652 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7653 _end_doc_suddenly();
7654 _line_progressed(3);
7655 _maybe_skip_whitespace_tokens();
7656 _check_doc_end_tokens();
7666 _c4dbgp(
"mapblck[RKCL]: missing :");
7667 if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7669 m_evt_handler->set_val_scalar_plain_empty();
7670 m_evt_handler->add_sibling();
7679 template<
class EventHandler>
7680 void ParseEngine<EventHandler>::_handle_unk_json()
7682 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7684 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7685 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7687 _maybe_skip_comment();
7688 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7692 size_t pos = rem.first_not_of(
" \t");
7695 pos = pos !=
npos ? pos : rem.len;
7696 _c4dbgpf(
"skipping indentation of {}", pos);
7697 _line_progressed(pos);
7698 rem = m_evt_handler->m_curr->line_contents.rem;
7701 _c4dbgpf(
"rem is now {}", _prs(rem));
7704 if(rem.begins_with(
'['))
7706 _c4dbgp(
"it's a seq");
7707 _check_trailing_doc_token();
7709 m_evt_handler->begin_seq_val_flow();
7711 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7712 m_doc_empty =
false;
7713 _line_progressed(1);
7715 else if(rem.begins_with(
'{'))
7717 _c4dbgp(
"it's a map");
7718 _check_trailing_doc_token();
7720 m_evt_handler->begin_map_val_flow();
7722 m_doc_empty =
false;
7723 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7724 _line_progressed(1);
7726 else if(_handle_bom())
7728 _c4dbgp(
"byte order mark");
7732 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
7733 _maybe_skip_whitespace_tokens();
7734 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7737 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7738 const char first = s.str[0];
7742 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7743 _check_trailing_doc_token();
7746 m_doc_empty =
false;
7747 sc = _scan_scalar_dquot();
7748 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7749 if(!_maybe_scan_following_colon())
7751 _c4dbgp(
"runk_json: set as val");
7752 _handle_annotations_before_blck_val_scalar();
7753 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7760 else if(_scan_scalar_plain_unk(&sc))
7762 _c4dbgp(
"runk_json: got a plain scalar");
7763 _check_trailing_doc_token();
7766 m_doc_empty =
false;
7767 if(!_maybe_scan_following_colon())
7769 _c4dbgp(
"runk_json: set as val");
7770 _handle_annotations_before_blck_val_scalar();
7771 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7772 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7789 template<
class EventHandler>
7790 void ParseEngine<EventHandler>::_handle_unk()
7792 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7794 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7795 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7797 _maybe_skipchars(
' ');
7798 _maybe_skip_comment();
7800 if(!m_evt_handler->m_curr->line_contents.rem.len)
7803 _c4dbgpf(
"runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7805 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7807 _c4dbgpf(
"runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7808 _c4dbgp(
"runk: check BOM");
7811 m_bom_line = m_evt_handler->m_curr->pos.line;
7812 _c4dbgpf(
"runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7815 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7816 _c4dbgpf(
"runk: rtop: first={}", _c4prc(first));
7819 _c4dbgp(
"runk: rtop: suspecting doc");
7820 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7822 _c4dbgp(
"runk: rtop: begin doc");
7825 _set_indentation(0);
7827 _line_progressed(3u);
7828 _maybe_skip_whitespace_tokens();
7832 else if(first ==
'.')
7834 _c4dbgp(
"runk: rtop: suspecting doc end");
7835 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7837 _c4dbgp(
"runk: rtop: end doc");
7844 _c4dbgp(
"runk: rtop: ignore end doc");
7847 _line_progressed(3u);
7848 _maybe_skip_whitespace_tokens();
7849 _check_doc_end_tokens();
7853 else if(first ==
'%')
7855 _c4dbgpf(
"directive: {}", m_evt_handler->m_curr->line_contents.rem);
7856 if(C4_UNLIKELY(has_any(
RDOC) || (!m_doc_empty && has_none(
NDOC))))
7857 _c4err(
"need document footer before directives");
7858 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7865 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7866 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7869 _c4dbgpf(
"runk: prev BOMlen={}", m_bom_len);
7870 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7872 _c4dbgpf(
"runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7873 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7874 remindent -= m_bom_len;
7882 size_t startcol = _handle_block_skip_leading_whitespace();
7883 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7887 _c4dbgp(
"runk: flow seq?");
7888 _handle_unk_begin_doc();
7889 if(C4_LIKELY( ! _annotations_require_key_container()))
7891 _c4dbgp(
"runk: it's a seq, flow");
7892 _handle_annotations_before_blck_val_scalar();
7893 m_evt_handler->begin_seq_val_flow();
7895 _set_indentation(0);
7899 _c4dbgp(
"runk: start new block map, set flow seq as key (!)");
7900 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7901 m_evt_handler->begin_map_val_block();
7903 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7904 m_evt_handler->begin_seq_key_flow();
7906 _set_indentation(0);
7908 _line_progressed(1);
7910 else if(first ==
'{')
7912 _c4dbgp(
"runk: flow map?");
7913 _handle_unk_begin_doc();
7914 if(C4_LIKELY( ! _annotations_require_key_container()))
7916 _c4dbgp(
"runk: it's a map, flow");
7917 _handle_annotations_before_blck_val_scalar();
7918 m_evt_handler->begin_map_val_flow();
7920 _set_indentation(0);
7924 _c4dbgp(
"runk: start new block map, set flow map as key (!)");
7925 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7926 m_evt_handler->begin_map_val_block();
7928 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7929 m_evt_handler->begin_map_key_flow();
7931 _set_indentation(0);
7933 _line_progressed(1);
7935 else if(first ==
'-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7937 _c4dbgp(
"runk: it's a seq, block");
7938 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7939 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7940 _handle_unk_begin_doc();
7941 _handle_annotations_before_blck_val_scalar();
7942 m_evt_handler->begin_seq_val_block();
7944 _set_indentation(startindent);
7945 _line_progressed(1);
7946 _maybe_skipchars(
' ');
7948 else if(first ==
'?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7950 _c4dbgp(
"runk: it's a map + this key is complex");
7951 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7952 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7953 _handle_block_check_leading_tabs(startcol);
7954 _handle_unk_begin_doc();
7955 _handle_annotations_before_blck_val_scalar();
7956 m_evt_handler->begin_map_val_block();
7958 _set_indentation(startindent);
7959 _line_progressed(1);
7960 _maybe_skipchars(
' ');
7961 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7963 _c4dbgp(
"runk: seqblck key starts after ?");
7965 m_evt_handler->begin_seq_key_block();
7967 _save_indentation();
7968 _line_progressed(1);
7969 _maybe_skipchars(
' ');
7972 else if(first ==
':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7974 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7976 _c4dbgp(
"runk: it's a map with an empty key");
7977 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7978 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7979 _handle_block_check_leading_tabs(startcol);
7980 const size_t startline = m_evt_handler->m_curr->pos.line;
7981 _handle_unk_begin_doc();
7982 _handle_annotations_before_start_mapblck(startline);
7984 m_evt_handler->begin_map_val_block();
7985 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7986 m_evt_handler->set_key_scalar_plain_empty();
7987 _set_indentation(startindent);
7991 _c4err(
"block colon cannot occur on a new line unless ? is used");
7994 _line_progressed(1);
7995 _maybe_skip_whitespace_tokens();
7997 else if(first ==
'&')
7999 csubstr anchor = _scan_anchor();
8000 _c4dbgpf(
"anchor! {}", _prs(anchor));
8001 const size_t line = m_evt_handler->m_curr->pos.line;
8002 _handle_unk_begin_doc();
8003 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8004 _set_indentation(0);
8006 else if(first ==
'*')
8008 csubstr ref = _scan_ref_map();
8009 _c4dbgpf(
"runk: ref! {}", _prs(ref));
8010 _handle_unk_begin_doc();
8011 if(!_maybe_scan_following_colon())
8013 _c4dbgp(
"runk: set val ref");
8014 _handle_valref(ref);
8018 _c4dbgp(
"runk: start new block map, set ref as key");
8019 _handle_block_check_leading_tabs(startcol);
8020 const size_t startline = m_evt_handler->m_curr->pos.line;
8021 _handle_annotations_before_start_mapblck(startline);
8022 m_evt_handler->begin_map_val_block();
8023 _handle_keyref(ref);
8024 _maybe_skip_whitespace_tokens();
8025 _set_indentation(0);
8029 else if(first ==
'!')
8032 csubstr tag = _scan_tag(&tag_orig);
8033 _c4dbgpf(
"runk: val tag! {}", _prs(tag));
8036 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8037 const size_t line = m_evt_handler->m_curr->pos.line;
8038 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8043 const size_t startscalar = _handle_block_get_whitespace_mark();
8044 const size_t startline = m_evt_handler->m_curr->pos.line;
8045 auto beginmap = [&](
size_t startindent_){
8046 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8047 _c4err(
"multiline scalars cannot be used as implicit keys");
8048 _handle_block_check_leading_tabs(startcol, startscalar);
8049 _handle_annotations_before_start_mapblck(startline);
8051 m_evt_handler->begin_map_val_block();
8052 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8054 auto after_beginmap = [&](
size_t startindent_){
8055 _maybe_skip_whitespace_tokens();
8056 _set_indentation(startindent_);
8061 _c4dbgp(
"runk: block-literal scalar");
8062 _handle_unk_begin_doc();
8064 _scan_block(&sb, startindent);
8065 _handle_annotations_before_blck_val_scalar();
8066 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8067 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8069 else if(first ==
'>')
8071 _c4dbgp(
"runk: block-folded scalar");
8072 _handle_unk_begin_doc();
8074 _scan_block(&sb, startindent);
8075 _handle_annotations_before_blck_val_scalar();
8076 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8077 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8079 else if(first ==
'\'')
8081 _c4dbgp(
"runk: single-quoted scalar");
8082 _handle_unk_begin_doc();
8083 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8084 size_t col = m_evt_handler->m_curr->pos.col;
8085 ScannedScalar sc = _scan_scalar_squot();
8086 if(!_maybe_scan_following_colon())
8088 _c4dbgp(
"runk: set as val");
8089 _handle_annotations_before_blck_val_scalar();
8090 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8091 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8095 _c4dbgp(
"runk: start new block map, set single-quoted scalar as key");
8097 startindent = _handle_unk_check_left_tokens(startindent, col);
8098 beginmap(startindent);
8099 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8100 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8101 after_beginmap(startindent);
8104 else if(first ==
'"')
8106 _c4dbgp(
"runk: double-quoted scalar");
8107 _handle_unk_begin_doc();
8108 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8109 size_t col = m_evt_handler->m_curr->pos.col;
8110 ScannedScalar sc = _scan_scalar_dquot();
8111 if(!_maybe_scan_following_colon())
8113 _c4dbgp(
"runk: set as val");
8114 _handle_annotations_before_blck_val_scalar();
8115 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8116 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8120 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
8122 startindent = _handle_unk_check_left_tokens(startindent, col);
8123 beginmap(startindent);
8124 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8125 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8126 after_beginmap(startindent);
8131 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8132 size_t col = m_evt_handler->m_curr->pos.col;
8134 if(_scan_scalar_plain_unk(&sc))
8136 _c4dbgp(
"runk: plain scalar");
8137 _handle_unk_begin_doc();
8138 if(!_maybe_scan_following_colon())
8140 _c4dbgp(
"runk: set as val");
8141 _handle_annotations_before_blck_val_scalar();
8142 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8143 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8147 _c4dbgp(
"runk: start new block map, set plain scalar as key");
8149 startindent = _handle_unk_check_left_tokens(startindent, col);
8150 beginmap(startindent);
8151 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8152 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8153 after_beginmap(startindent);
8164 template<
class EventHandler>
8165 void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8167 _c4dbgp(
"runk: begin doc");
8168 _check_trailing_doc_token();
8171 m_doc_empty =
false;
8174 template<
class EventHandler>
8175 size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(
size_t realindent,
size_t col,
bool skip_annotations)
8180 csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8182 _c4dbgpf(
"runk: check left tokens: s={}", _prs(s,
true));
8183 if(skip_annotations)
8185 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8186 _c4dbgpf(
"runk: skip annotations: realindent={} pos={}", realindent, pos);
8188 size_t firstns = s.first_not_of(
' ', pos);
8191 _c4dbgpf(
"runk: check left tokens:\n"
8192 " tokens={} skipped={}\n"
8193 " bomlen={} first={} col={}\n"
8194 " (bomlen+first)={} vs {}=col\n"
8195 " startindent={} lineindent={}"
8196 , _prs(s,
true), _prs(s.sub(firstns),
true)
8197 , m_bom_len, firstns, col
8198 , m_bom_len+firstns, col,
8199 realindent, m_evt_handler->m_curr->line_contents.indentation);
8200 if(m_bom_len + firstns != col)
8202 if(!skip_annotations)
8203 realindent = firstns;
8204 _c4dbgpf(
"runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8210 template<
class EventHandler>
8211 void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s,
size_t *indent,
size_t *first_non_token_pos)
8213 csubstr first, second;
8214 uint32_t total = _get_annotations_same_line(s, &first, &second);
8215 _c4dbgpf(
"runk: before skip: {}", _prs(s,
true));
8216 size_t pos = s.first_not_of(
" \t");
8221 *indent = *first_non_token_pos = pos;
8224 _c4assert(!s.sub(pos).begins_with_any(
" \t"));
8225 _c4dbgpf(
"runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos),
true));
8226 _c4dbgpf(
"runk: first annotation: {}", first);
8230 _c4assert(s.sub(pos).begins_with(first));
8233 _c4dbgpf(
"runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos),
true));
8236 _c4dbgpf(
"runk: second annotation: {}", second);
8241 csubstr spos = s.sub(pos);
8242 size_t more = spos.first_not_of(
" \t");
8244 _c4dbgpf(
"runk: next nonspace: {}", pos + more);
8246 _c4dbgpf(
"runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos),
true));
8247 _c4assert(s.sub(pos).begins_with(second));
8249 _c4dbgpf(
"runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos),
true));
8251 *first_non_token_pos = pos;
8255 template<
class EventHandler>
8256 uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_)
const
8258 _c4assert(!m_evt_handler->m_curr->at_first_token());
8260 using EntryPtr =
typename Annotation::Entry
const* C4_RESTRICT;
8261 EntryPtr first =
nullptr;
8262 EntryPtr second =
nullptr;
8263 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8266 _c4dbgpf(
"there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8267 auto valid_if_same_line = [
this](EntryPtr entry){
8268 _c4dbgpf(
"pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8269 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry :
nullptr;
8273 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8274 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8275 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8276 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8277 _c4dbgpf(
"{} annotations on same line", total);
8282 auto get_first_on_same_line = [
this](EntryPtr not_this_one){
8283 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8284 if(&m_pending_anchors.annotations[i] != not_this_one
8285 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8286 return &m_pending_anchors.annotations[i];
8287 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8288 if(&m_pending_tags.annotations[i] != not_this_one
8289 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8290 return &m_pending_tags.annotations[i];
8291 return (EntryPtr)
nullptr;
8295 first = get_first_on_same_line(
nullptr);
8297 _c4dbgpf(
"first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8302 second = get_first_on_same_line(first);
8304 _c4dbgpf(
"second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8306 auto extract_string = [&](EntryPtr e){
8308 if(!e->str.str || e->str.begins_with_any(
"!<"))
8310 csubstr tag = e->orig;
8314 _c4dbgpf(
"tag: {} -> {}", _maybe_null_str(e->str), tag);
8317 csubstr anchor = e->str;
8322 _c4assert(anchor.str - token_soup.str > 0);
8327 _c4dbgpf(
"anchor: {} -> {}", e->str, anchor);
8330 *first_ = first ? extract_string(first) : nullptr;
8331 *second_ = second ? extract_string(second) : nullptr;
8332 if(total > 1 && (first_->str > second_->str))
8334 csubstr tmp = *first_;
8337 _c4dbgpf(
"swap first and second: {} -> {}", *first_, *second_);
8346 template<
class EventHandler>
8347 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
8349 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8351 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW), m_evt_handler->m_curr->pos);
8353 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8356 _c4dbgp(
"usty[RNXT]: finishing!");
8361 _maybe_skip_comment();
8362 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8366 size_t pos = rem.first_not_of(
" \t");
8369 pos = pos !=
npos ? pos : rem.len;
8370 _c4dbgpf(
"skipping indentation of {}", pos);
8371 _line_progressed(pos);
8372 rem = m_evt_handler->m_curr->line_contents.rem;
8375 _c4dbgpf(
"rem is now {}", _prs(rem));
8378 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8379 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
8380 char first = rem.str[0];
8383 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP), m_evt_handler->m_curr->pos);
8384 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
8387 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
8389 m_evt_handler->_push();
8391 _set_indentation(startindent);
8392 _line_progressed(1);
8393 _maybe_skip_whitespace_tokens();
8395 else if(first ==
'-' && _is_blck_token(rem))
8397 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
8399 m_evt_handler->_push();
8401 _set_indentation(startindent);
8402 _line_progressed(1);
8403 _maybe_skip_whitespace_tokens();
8407 _c4err(
"can only parse a seq into an existing seq");
8410 else if(has_any(
RMAP))
8412 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8413 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
8416 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
8418 _handle_annotations_before_blck_val_scalar();
8419 m_evt_handler->_push();
8421 _set_indentation(startindent);
8422 _line_progressed(1);
8423 _maybe_skip_whitespace_tokens();
8425 else if(first ==
'?' && _is_blck_token(rem))
8427 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
8429 _handle_annotations_before_blck_val_scalar();
8430 m_evt_handler->_push();
8432 _save_indentation();
8433 _line_progressed(1);
8434 _maybe_skip_whitespace_tokens();
8436 else if(first ==
':' && _is_blck_token(rem))
8438 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
8440 _handle_annotations_before_blck_val_scalar();
8441 m_evt_handler->_push();
8442 m_evt_handler->set_key_scalar_plain_empty();
8444 _save_indentation();
8445 _line_progressed(1);
8446 _maybe_skip_whitespace_tokens();
8448 else if(rem.begins_with(
'&'))
8450 csubstr anchor = _scan_anchor();
8451 _c4dbgpf(
"usty[RMAP]: anchor! {}", _prs(anchor));
8452 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8453 const size_t line = m_evt_handler->m_curr->pos.line;
8454 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8455 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8457 else if(first ==
'*')
8459 csubstr ref = _scan_ref_map();
8460 _c4dbgpf(
"usty[RMAP]: ref! {}", _prs(ref));
8461 if(!_maybe_scan_following_colon())
8463 _c4err(
"cannot read a VAL to a map");
8467 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
8468 const size_t startline = m_evt_handler->m_curr->pos.line;
8470 _handle_annotations_before_start_mapblck(startline);
8471 m_evt_handler->_push();
8472 _handle_keyref(ref);
8473 _maybe_skip_whitespace_tokens();
8474 _set_indentation(startindent);
8478 else if(first ==
'!')
8480 csubstr tag = _scan_tag();
8481 _c4dbgpf(
"usty[RMAP]: val tag! {}", _prs(tag));
8484 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8485 const size_t line = m_evt_handler->m_curr->pos.line;
8486 _add_annotation(&m_pending_tags, tag, indentation, line);
8488 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
8490 _c4err(
"cannot parse a seq into an existing map");
8494 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8495 startindent = m_evt_handler->m_curr->line_contents.indentation;
8496 const size_t startline = m_evt_handler->m_curr->pos.line;
8498 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8501 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
8502 sc = _scan_scalar_squot();
8503 if(!_maybe_scan_following_colon())
8505 _c4err(
"cannot read a VAL to a map");
8509 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8511 _handle_annotations_before_start_mapblck(startline);
8512 m_evt_handler->_push();
8513 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8514 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8515 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8516 _set_indentation(startindent);
8518 _maybe_skip_whitespace_tokens();
8521 else if(first ==
'"')
8523 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
8524 sc = _scan_scalar_dquot();
8525 if(!_maybe_scan_following_colon())
8527 _c4err(
"cannot read a VAL to a map");
8531 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
8533 _handle_annotations_before_start_mapblck(startline);
8534 m_evt_handler->_push();
8535 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8536 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8537 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8538 _set_indentation(startindent);
8540 _maybe_skip_whitespace_tokens();
8543 else if(first ==
'|')
8545 _c4err(
"block literal keys must be enclosed in '?'");
8547 else if(first ==
'>')
8549 _c4err(
"block literal keys must be enclosed in '?'");
8551 else if(_scan_scalar_plain_unk(&sc))
8553 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8554 if(!_maybe_scan_following_colon())
8556 _c4err(
"cannot read a VAL to a map");
8560 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8562 _handle_annotations_before_start_mapblck(startline);
8563 m_evt_handler->_push();
8564 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8565 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8566 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8567 _set_indentation(startindent);
8569 _maybe_skip_whitespace_tokens();
8580 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8581 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8584 _c4dbgp(
"usty[UNK]: it's a flow seq");
8586 _handle_annotations_before_blck_val_scalar();
8587 m_evt_handler->begin_seq_val_flow();
8589 _set_indentation(startindent);
8590 _line_progressed(1);
8591 _maybe_skip_whitespace_tokens();
8593 else if(first ==
'-' && _is_blck_token(rem))
8595 _c4dbgp(
"usty[UNK]: it's a block seq");
8597 _handle_annotations_before_blck_val_scalar();
8598 m_evt_handler->begin_seq_val_block();
8600 _set_indentation(startindent);
8601 _line_progressed(1);
8602 _maybe_skip_whitespace_tokens();
8604 else if(first ==
'{')
8606 _c4dbgp(
"usty[UNK]: it's a flow map");
8608 _handle_annotations_before_blck_val_scalar();
8609 m_evt_handler->begin_map_val_flow();
8611 _set_indentation(startindent);
8612 _line_progressed(1);
8613 _maybe_skip_whitespace_tokens();
8615 else if(first ==
'?' && _is_blck_token(rem))
8617 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8619 _handle_annotations_before_blck_val_scalar();
8620 m_evt_handler->begin_map_val_block();
8622 _save_indentation();
8623 _line_progressed(1);
8624 _maybe_skip_whitespace_tokens();
8626 else if(first ==
':' && _is_blck_token(rem))
8628 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8630 _handle_annotations_before_blck_val_scalar();
8631 m_evt_handler->begin_map_val_block();
8632 m_evt_handler->set_key_scalar_plain_empty();
8634 _save_indentation();
8635 _line_progressed(1);
8636 _maybe_skip_whitespace_tokens();
8638 else if(first ==
'&')
8640 csubstr anchor = _scan_anchor();
8641 _c4dbgpf(
"usty[UNK]: anchor! {}", _prs(anchor));
8642 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8643 const size_t line = m_evt_handler->m_curr->pos.line;
8644 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8645 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8647 else if(first ==
'*')
8649 csubstr ref = _scan_ref_map();
8650 _c4dbgpf(
"usty[UNK]: ref! {}", _prs(ref));
8651 if(!_maybe_scan_following_colon())
8653 _c4dbgp(
"usty[UNK]: set val ref");
8654 _handle_valref(ref);
8658 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8659 const size_t startline = m_evt_handler->m_curr->pos.line;
8661 _handle_annotations_before_start_mapblck(startline);
8662 m_evt_handler->begin_map_val_block();
8663 _handle_keyref(ref);
8664 _maybe_skip_whitespace_tokens();
8665 _set_indentation(startindent);
8669 else if(first ==
'!')
8671 csubstr tag = _scan_tag();
8672 _c4dbgpf(
"usty[UNK]: val tag! {}", _prs(tag));
8675 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8676 const size_t line = m_evt_handler->m_curr->pos.line;
8677 _add_annotation(&m_pending_tags, tag, indentation, line);
8681 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8682 startindent = m_evt_handler->m_curr->line_contents.indentation;
8683 const size_t startline = m_evt_handler->m_curr->pos.line;
8686 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8689 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8690 sc = _scan_scalar_squot();
8691 if(!_maybe_scan_following_colon())
8693 _c4dbgp(
"usty[UNK]: set as val");
8694 _handle_annotations_before_blck_val_scalar();
8695 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8696 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8701 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8703 _handle_annotations_before_start_mapblck(startline);
8704 m_evt_handler->begin_map_val_block();
8705 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8706 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8707 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8708 _set_indentation(startindent);
8710 _maybe_skip_whitespace_tokens();
8713 else if(first ==
'"')
8715 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8716 sc = _scan_scalar_dquot();
8717 if(!_maybe_scan_following_colon())
8719 _c4dbgp(
"usty[UNK]: set as val");
8720 _handle_annotations_before_blck_val_scalar();
8721 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8722 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8727 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8729 _handle_annotations_before_start_mapblck(startline);
8730 m_evt_handler->begin_map_val_block();
8731 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8732 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8733 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8734 _set_indentation(startindent);
8736 _maybe_skip_whitespace_tokens();
8739 else if(first ==
'|')
8741 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8743 _scan_block(&sb, startindent);
8744 _c4dbgp(
"usty[UNK]: set as val");
8745 _handle_annotations_before_blck_val_scalar();
8746 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8747 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8750 else if(first ==
'>')
8752 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8754 _scan_block(&sb, startindent);
8755 _c4dbgp(
"usty[UNK]: set as val");
8756 _handle_annotations_before_blck_val_scalar();
8757 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8758 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8761 else if(_scan_scalar_plain_unk(&sc))
8763 _c4dbgp(
"usty[UNK]: got a plain scalar");
8764 if(!_maybe_scan_following_colon())
8766 _c4dbgp(
"usty[UNK]: set as val");
8767 _handle_annotations_before_blck_val_scalar();
8768 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8769 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8774 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8776 _handle_annotations_before_start_mapblck(startline);
8777 m_evt_handler->begin_map_val_block();
8778 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8779 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8780 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8781 _set_indentation(startindent);
8783 _maybe_skip_whitespace_tokens();
8797 template<
class EventHandler>
8800 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8802 m_evt_handler->start_parse(filename.str, src);
8803 m_evt_handler->begin_stream();
8805 while( ! _finished_file())
8808 while( ! _finished_line())
8811 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8816 else if(has_any(
RMAP))
8820 else if(has_any(
RUNK))
8826 _c4err(
"internal error");
8829 if(_finished_file())
8834 m_evt_handler->finish_parse();
8840 template<
class EventHandler>
8843 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8845 m_evt_handler->start_parse(filename.str, src);
8846 m_evt_handler->begin_stream();
8848 while( ! _finished_file())
8851 while( ! _finished_line())
8854 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8865 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8874 else if(has_any(
RBLCK))
8878 _handle_seq_block();
8882 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8883 _handle_map_block();
8886 else if(has_any(
RUNK))
8890 else if(has_any(
USTY))
8896 _c4err(
"internal error");
8899 if(_finished_file())
8904 m_evt_handler->finish_parse();
8913 #undef _c4dbgnextline
8917 #if defined(_MSC_VER)
8918 # pragma warning(pop)
8919 #elif defined(__clang__)
8920 # pragma clang diagnostic pop
8921 #elif defined(__GNUC__)
8922 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
bool is_valid_tag_handle(csubstr handle)
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
@ UTF16BE
UTF16, Big-Endian.
@ UTF16LE
UTF16, Little-Endian.
@ NOBOM
No Byte Order Mark was found.
@ UTF32BE
UTF32, Big-Endian.
@ UTF32LE
UTF32, Little-Endian.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
#define _ryml_relocate(s)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
csubstr name
name of the file
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark