1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
4 #ifndef _C4_YML_PARSE_ENGINE_HPP_
7 #ifndef _C4_CHARCONV_HPP_
13 #ifndef _C4_YML_FILTER_PROCESSOR_HPP_
16 #ifndef _C4_YML_TAG_HPP_
19 #ifndef _C4_YML_NODE_TYPE_HPP_
23 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24 #include "c4/yml/detail/dbgprint.hpp"
29 #include <c4/dump.hpp>
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
37 #define _c4assert(...) \
38 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
41 #if defined(RYML_WITH_TAB_TOKENS)
42 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43 #define _RYML_WITHOUT_TAB_TOKENS(...)
44 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
46 #define _RYML_WITH_TAB_TOKENS(...)
47 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
52 #ifndef RYML_SAVE_TEST_YAML
53 #define _RYML_SAVE_TEST_YAML(filename, src)
54 #define _RYML_SAVE_TEST_JSON(filename, src)
56 #define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57 #define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
60 void ryml_save_test_yaml(csubstr filename, csubstr src);
61 void ryml_save_test_json(csubstr filename, csubstr src);
68 #define _c4dbgnextline() \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
78 # pragma warning(push)
79 # pragma warning(disable: 4296)
80 # pragma warning(disable: 4702)
81 #elif defined(__clang__)
82 # pragma clang diagnostic push
83 # pragma clang diagnostic ignored "-Wtype-limits"
84 # pragma clang diagnostic ignored "-Wformat-nonliteral"
85 # pragma clang diagnostic ignored "-Wold-style-cast"
86 #elif defined(__GNUC__)
87 # pragma GCC diagnostic push
88 # pragma GCC diagnostic ignored "-Wtype-limits"
89 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
90 # pragma GCC diagnostic ignored "-Wold-style-cast"
92 # pragma GCC diagnostic ignored "-Wduplicated-branches"
103 C4_HOT C4_ALWAYS_INLINE
void _set_first(substr &C4_RESTRICT subject,
size_t pos) noexcept
106 subject.len = pos !=
npos ? pos : subject.len;
108 C4_HOT C4_ALWAYS_INLINE
void _set_first(csubstr &C4_RESTRICT subject,
size_t pos) noexcept
111 subject.len = pos !=
npos ? pos : subject.len;
113 C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(substr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
116 _RYML_ASSERT_BASIC(pos !=
npos);
119 C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(csubstr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
122 _RYML_ASSERT_BASIC(pos !=
npos);
126 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s)
RYML_NOEXCEPT
128 _RYML_ASSERT_BASIC(s.len > 0);
129 _RYML_ASSERT_BASIC(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
133 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_seq_token_maybe(csubstr
const& C4_RESTRICT s) noexcept
135 return ((s.len >= 1) && (s.str[0] ==
'-') && ((s.len == 1) || ((s.str[1] ==
' ')
_RYML_WITH_TAB_TOKENS( || (s.str[1] ==
'\t')))));
140 _RYML_ASSERT_BASIC(s.begins_with(
'-'));
141 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
142 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
143 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
149 _RYML_ASSERT_BASIC(s.begins_with(
'.'));
150 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
151 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
152 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
156 inline bool _is_doc_token(csubstr s) noexcept
164 return (s.str[1] ==
'-' && s.str[2] ==
'-')
168 return (s.str[1] ==
'.' && s.str[2] ==
'.')
175 inline size_t _begins_with_special_json_scalar(csubstr s)
RYML_NOEXCEPT
177 _RYML_ASSERT_BASIC(s.len);
181 return s.begins_with(
"false") ? 5u : 0u;
183 return s.begins_with(
"true") ? 4u : 0u;
185 return s.begins_with(
"null") ? 4u : 0u;
193 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
195 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
199 inline substr _from_next_line(substr rem)
201 size_t nlpos = rem.first_of(
"\r\n");
204 const char nl = rem[nlpos];
205 rem = rem.right_of(nlpos);
208 if(_extend_from_combined_newline(nl, rem.front()))
216 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
218 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
219 size_t numnl_following = 0;
221 for( ; *i < r.len; ++(*i))
223 if(r.str[*i] ==
'\n')
226 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
231 return numnl_following;
236 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
238 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
239 size_t numnl_following = 0;
243 for( ; *i < r.len; ++(*i))
245 const char c = r.str[*i];
249 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
255 for( ; *i < r.len; ++(*i))
262 size_t stop = *i + indentation;
263 for( ; *i < r.len; ++(*i))
266 if(c !=
' ' && c !=
'\r')
268 _RYML_ASSERT_BASIC(*i < stop);
273 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
279 return numnl_following;
289 template<
class EventHandler>
296 template<
class EventHandler>
299 , m_evt_handler(evt_handler)
300 , m_pending_anchors()
302 , m_has_directives_yaml(false)
303 , m_has_directives(false)
306 , m_prev_val_end(
npos)
308 , m_newline_offsets()
309 , m_newline_offsets_size(0)
310 , m_newline_offsets_capacity(0)
312 _RYML_CHECK_BASIC(evt_handler);
315 template<
class EventHandler>
317 : m_options(that.m_options)
318 , m_evt_handler(that.m_evt_handler)
319 , m_pending_anchors(that.m_pending_anchors)
320 , m_pending_tags(that.m_pending_tags)
321 , m_has_directives_yaml(that.m_has_directives_yaml)
322 , m_has_directives(that.m_has_directives)
323 , m_doc_empty(that.m_doc_empty)
325 , m_prev_val_end(
npos)
327 , m_newline_offsets(that.m_newline_offsets)
328 , m_newline_offsets_size(that.m_newline_offsets_size)
329 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
334 template<
class EventHandler>
336 : m_options(that.m_options)
337 , m_evt_handler(that.m_evt_handler)
338 , m_pending_anchors(that.m_pending_anchors)
339 , m_pending_tags(that.m_pending_tags)
340 , m_has_directives_yaml(that.m_has_directives_yaml)
341 , m_has_directives(that.m_has_directives)
342 , m_doc_empty(that.m_doc_empty)
344 , m_prev_val_end(
npos)
346 , m_newline_offsets()
347 , m_newline_offsets_size()
348 , m_newline_offsets_capacity()
350 if(that.m_newline_offsets_capacity)
352 _resize_locations(that.m_newline_offsets_capacity);
353 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
354 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
355 m_newline_offsets_size = that.m_newline_offsets_size;
359 template<
class EventHandler>
363 m_options = (that.m_options);
364 m_evt_handler = that.m_evt_handler;
365 m_pending_anchors = that.m_pending_anchors;
366 m_pending_tags = that.m_pending_tags;
367 m_has_directives_yaml = that.m_has_directives_yaml;
368 m_has_directives = that.m_has_directives;
369 m_doc_empty = that.m_doc_empty;
370 m_prev_colon = that.m_prev_colon;
371 m_prev_val_end = that.m_prev_val_end;
372 m_encoding = that.m_encoding;
373 m_newline_offsets = (that.m_newline_offsets);
374 m_newline_offsets_size = (that.m_newline_offsets_size);
375 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
380 template<
class EventHandler>
386 m_options = (that.m_options);
387 m_evt_handler = that.m_evt_handler;
388 m_pending_anchors = that.m_pending_anchors;
389 m_pending_tags = that.m_pending_tags;
390 m_has_directives_yaml = that.m_has_directives_yaml;
391 m_has_directives = that.m_has_directives;
392 m_doc_empty = that.m_doc_empty;
393 m_prev_colon = that.m_prev_colon;
394 m_prev_val_end = that.m_prev_val_end;
395 m_encoding = that.m_encoding;
396 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
397 _resize_locations(that.m_newline_offsets_capacity);
398 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
399 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
400 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
401 m_newline_offsets_size = that.m_newline_offsets_size;
406 template<
class EventHandler>
411 m_pending_anchors = {};
413 m_has_directives_yaml =
false;
414 m_has_directives =
false;
417 m_prev_val_end =
npos;
419 m_newline_offsets = {};
420 m_newline_offsets_size = {};
421 m_newline_offsets_capacity = {};
424 template<
class EventHandler>
425 void ParseEngine<EventHandler>::_free()
427 if(m_newline_offsets)
429 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
430 m_newline_offsets =
nullptr;
431 m_newline_offsets_size = 0u;
432 m_newline_offsets_capacity = 0u;
439 template<
class EventHandler>
440 void ParseEngine<EventHandler>::_reset()
442 m_pending_anchors = {};
444 m_has_directives_yaml =
false;
445 m_has_directives =
false;
448 m_prev_val_end =
npos;
452 if(m_options.locations())
454 _prepare_locations();
461 template<
class EventHandler>
462 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
464 _c4dbgp(
"relocate to new arena");
465 const char *pb = prev_arena.str;
466 const char *pe = prev_arena.str + prev_arena.len;
467 #define _ryml_relocate(s) \
468 if((s).str >= pb && (s).str <= pe) \
470 (s).str = next_arena.str + ((s).str - pb); \
472 for(ParserState &st : m_evt_handler->m_stack)
478 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
483 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
489 TagDirectives &tds = m_evt_handler->tag_directives();
490 for(
size_t i = 0, sz = tds.size(); i < sz; ++i)
497 TagCache &tch = m_evt_handler->tag_cache();
498 for(
id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
508 #undef _ryml_relocate
512 template<
class EventHandler>
513 substr ParseEngine<EventHandler>::_alloc_arena(
size_t len, substr *other)
515 csubstr prev = m_evt_handler->arena();
516 substr out = m_evt_handler->alloc_arena(len);
517 substr curr = m_evt_handler->arena();
518 if(curr.str != prev.str)
519 _relocate_arena(prev, curr, other);
528 template<
class EventHandler>
529 template<
class DumpFn>
530 C4_NO_INLINE
void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
532 ParserState
const *
const C4_RESTRICT st = m_evt_handler->m_curr;
533 LineContents
const& C4_RESTRICT lc = st->line_contents;
534 csubstr contents = lc.full.first(lc.num_cols);
538 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
539 csubstr m_file = m_evt_handler->m_curr->pos.name;
542 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
543 offs += m_file.len + 1;
545 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
546 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
547 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
548 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", escaped_scalar(maybe_full_content,
true), maybe_ellipsis, contents.len);
550 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
551 size_t lastcol = firstcol + lc.rem.len;
554 for(
size_t i = 0; i < offs + firstcol_adj; ++i)
555 std::forward<DumpFn>(dumpfn)(
" ");
556 std::forward<DumpFn>(dumpfn)(
"^");
557 for(
size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
558 std::forward<DumpFn>(dumpfn)(
"~");
559 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
563 std::forward<DumpFn>(dumpfn)(
"\n");
568 _dbg_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
572 template<
class EventHandler>
573 void ParseEngine<EventHandler>::_print_state_stack(substr buf)
const
577 for(ParserState
const& s : m_evt_handler->m_stack)
578 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
582 template<
class EventHandler>
583 void ParseEngine<EventHandler>::_print_state_stack()
const
586 _print_state_stack(buf);
593 template<
class EventHandler>
594 template<
class ...Args>
595 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc, Location
const& ymlloc,
const char* fmt, Args
const& ...args)
const
597 m_evt_handler->cancel_parse();
598 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
601 template<
class EventHandler>
602 template<
class ...Args>
603 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc,
const char *fmt, Args
const& ...args)
const
605 m_evt_handler->cancel_parse();
606 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
612 template<
class EventHandler>
613 template<
class ...Args>
614 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& ...args)
const
618 _dbg_printf(fmt, args...);
620 _fmt_msg(_dbg_dumper);
627 template<
class EventHandler>
628 bool ParseEngine<EventHandler>::_finished_file()
const
630 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
633 _c4dbgp(
"finished file!!!");
638 template<
class EventHandler>
639 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line() const
641 return m_evt_handler->m_curr->line_contents.rem.empty();
647 template<
class EventHandler>
648 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
650 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')))
654 pos = m_evt_handler->m_curr->line_contents.rem.len;
655 _c4dbgpf(
"skip {} whitespace characters", pos);
656 _line_progressed(pos);
660 template<
class EventHandler>
661 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
663 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
665 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
667 pos = m_evt_handler->m_curr->line_contents.rem.len;
668 _c4dbgpf(
"skip {}x'{}'", pos, _c4prc(c));
669 _line_progressed(pos);
673 template<
class EventHandler>
675 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
677 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
678 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
680 pos = m_evt_handler->m_curr->line_contents.rem.len;
681 _c4dbgpf(
"skip {} characters", pos);
682 _line_progressed(pos);
685 template<
class EventHandler>
686 void ParseEngine<EventHandler>::_skip_comment()
688 LineContents
const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
689 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
690 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with(
'#'), m_evt_handler->m_curr->pos);
691 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
692 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos);
693 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((
size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
695 if(lc.rem.str != lc.full.str)
697 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
698 const char prev = lc.full.str[col - 1u];
699 if(C4_UNLIKELY(prev !=
' ' && prev !=
'\t'))
700 _c4err(
"comment not preceded by whitespace");
702 _c4dbgpf(
"comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
703 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
706 template<
class EventHandler>
707 void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
709 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
712 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
714 _line_progressed(pos);
720 template<
class EventHandler>
721 void ParseEngine<EventHandler>::_maybe_skip_comment()
723 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
726 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
728 _line_progressed(pos);
734 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
738 template<
class EventHandler>
739 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
741 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
744 if(
':' == m_evt_handler->m_curr->line_contents.rem[pos])
748 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
750 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
756 _line_progressed(pos);
762 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
770 template<
class EventHandler>
771 csubstr ParseEngine<EventHandler>::_scan_anchor()
773 csubstr s = m_evt_handler->m_curr->line_contents.rem;
774 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'), m_evt_handler->m_curr->pos);
775 csubstr anchor = s.range(1, s.first_of(
" ,]}\t"));
776 _line_progressed(1u + anchor.len);
777 _maybe_skipchars(
' ');
781 template<
class EventHandler>
782 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
784 csubstr s = m_evt_handler->m_curr->line_contents.rem;
785 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
786 _set_first(s, s.first_of(
" ,]\t"));
787 _line_progressed(s.len);
791 template<
class EventHandler>
792 csubstr ParseEngine<EventHandler>::_scan_ref_map()
794 csubstr s = m_evt_handler->m_curr->line_contents.rem;
795 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
796 _set_first(s, s.first_of(
" ,}\t"));
797 _line_progressed(s.len);
801 template<
class EventHandler>
802 csubstr ParseEngine<EventHandler>::_scan_tag()
804 csubstr t = m_evt_handler->m_curr->line_contents.rem;
805 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
806 if(!t.begins_with(
"!<"))
808 _c4dbgp(
"begins with '!'");
809 _set_first(t, t.first_of(
" ,]}\t"));
810 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
812 _line_progressed(t.len);
813 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
818 _c4dbgp(
"begins with '!<'");
819 size_t pos = t.find(
'>');
820 if(C4_UNLIKELY(pos ==
npos))
822 _set_first_strict(t, pos+1);
823 _line_progressed(t.len);
826 _maybe_skip_whitespace_tokens();
830 template<
class EventHandler>
831 csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
833 csubstr t = m_evt_handler->m_curr->line_contents.rem;
834 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
835 if(!t.begins_with(
"!<"))
837 _c4dbgp(
"begins with '!'");
838 _set_first(t, t.first_of(
" ,\t"));
839 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
841 _line_progressed(t.len);
843 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
848 _c4dbgp(
"begins with '!<'");
849 size_t pos = t.find(
'>');
850 if(C4_UNLIKELY(pos ==
npos))
852 _set_first_strict(t, pos+1);
853 _line_progressed(t.len);
857 _maybe_skip_whitespace_tokens();
864 template<
class EventHandler>
865 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
868 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
":-"), m_evt_handler->m_curr->pos);
869 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
870 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
882 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
892 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
907 template<
class EventHandler>
908 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'?', m_evt_handler->m_curr->pos);
912 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
913 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
920 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
926 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
940 template<
class EventHandler>
941 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
943 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
959 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
964 _c4dbgpf(
"suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
965 return _is_valid_start_scalar_plain_flow_check_block_token(s);
967 _c4dbgpf(
"qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
968 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
976 template<
class EventHandler>
977 bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s,
size_t offs)
979 _c4dbgpf(
"newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs),
true));
982 _c4dbgp(
"newl[PLAIN]: buffer continues");
983 csubstr next_line = s.sub(offs + 1);
984 size_t next_line_indentation = next_line.first_not_of(
' ');
985 if(next_line_indentation !=
npos)
987 _c4dbgpf(
"newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
988 next_line = next_line.first(next_line.first_of(
"\n\r"));
989 _c4dbgpf(
"newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
990 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
991 if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
993 _c4dbgp(
"newl[PLAIN]: larger indentation");
994 next_line = next_line.sub(next_line_indentation);
996 else if(C4_UNLIKELY(next_line.len && next_line.triml(
' ').len))
998 _c4dbgp(
"newl[PLAIN]: err, smaller indentation");
999 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1002 if(m_evt_handler->m_curr->line_contents.indentation !=
npos)
1003 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1006 _c4dbgpf(
"newl[PLAIN]: next_line.len={}", next_line.len);
1009 next_line = next_line.triml(
" \t");
1010 if(next_line.begins_with_any(
",]#:"))
1012 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[0]);
1018 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1024 template<
class EventHandler>
1025 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1027 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1028 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1029 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP), m_evt_handler->m_curr->pos);
1030 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1031 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1033 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1034 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1036 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1039 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1042 _c4dbgp(
"scanning seqflow scalar...");
1044 bool needs_filter =
false;
1047 for( ; offs < s.len; ++offs, ++col)
1049 const char c = s.str[offs];
1054 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1055 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1058 _c4dbgpf(
"found '\\n' at col={}", col);
1059 if(!_scan_scalar_plain_handle_newline(s, offs))
1062 needs_filter =
true;
1066 needs_filter =
true;
1069 _c4dbgp(
"found suspicious ':'");
1070 if(s.len > offs + 1)
1072 char next = s.str[offs + 1];
1073 _c4dbgpf(
"next char is '{}'", _c4prc(next));
1076 csubstr after = s.sub(offs + 1).triml(
'\r');
1079 next = after.str[0];
1080 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
1084 if(next ==
' ' _RYML_WITH_TAB_TOKENS(|| next ==
'\t') || next ==
',' || next ==
'\n' || next ==
']')
1086 _c4dbgp(
"map starting!");
1091 _c4dbgp(
"':' nothing to see here");
1096 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1097 _line_progressed(col);
1098 _c4err(
"missing termination: '{}'", c);
1103 _c4dbgp(
"found suspicious '#'");
1104 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1105 char prev = s.str[offs - 1];
1108 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1116 _line_progressed(col);
1117 _c4err(
"invalid character: '{}'", c);
1120 _c4dbgpf(
"doc token character: '{}', offs={}", c, offs);
1121 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1123 _c4dbgp(
"at line beginning");
1124 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1136 _line_progressed(col);
1137 _set_first(s, offs);
1139 sc->needs_filter = needs_filter;
1141 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1146 template<
class EventHandler>
1147 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1149 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1150 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1151 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP), m_evt_handler->m_curr->pos);
1152 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1156 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1158 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1161 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1164 _c4dbgp(
"scanning mapflow scalar...");
1166 bool needs_filter =
false;
1169 for( ; offs < s.len; ++offs, ++col)
1171 const char c = s.str[offs];
1176 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1177 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1180 _c4dbgpf(
"found '\\n' at col={}", col);
1181 if(!_scan_scalar_plain_handle_newline(s, offs))
1184 needs_filter =
true;
1188 needs_filter =
true;
1191 _c4dbgpf(
"found ':'", c);
1195 const char next = s.str[offs+1];
1196 _c4dbgpf(
"next='{}'", c);
1197 if(next ==
' ' || next ==
',' || next ==
'}' || next ==
'\n' || next ==
'\r' _RYML_WITH_TAB_TOKENS(|| next ==
'\t'))
1199 _c4dbgpf(
"found terminating character: '{}'", c);
1206 _line_progressed(col);
1207 _c4err(
"invalid character: '{}'", c);
1210 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1219 _line_progressed(col);
1222 sc->needs_filter = needs_filter;
1224 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1226 return sc->scalar.len > 0u;
1229 template<
class EventHandler>
1230 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1232 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1233 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1234 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1235 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1237 substr s = m_evt_handler->m_curr->line_contents.rem;
1238 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1239 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1241 _c4dbgp(
"seq_json: scanning scalar...");
1248 _c4dbgp(
"seq_json: not a scalar.");
1253 const size_t len = _begins_with_special_json_scalar(s);
1256 char c = s.len > len ? s.str[len] :
',';
1257 if(c ==
',' || c ==
']' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1259 sc->scalar = s.first(len);
1260 sc->needs_filter =
false;
1261 _c4dbgpf(
"seq_json: special scalar: '{}'", sc->scalar);
1262 _line_progressed(len);
1274 for( ; i < s.len; ++i)
1276 const char c = s.str[i];
1283 _c4dbgpf(
"seq_json: found terminating character: '{}'", c);
1292 if(C4_LIKELY(i > 0))
1294 _line_progressed(i);
1295 sc->scalar = s.first(i);
1296 sc->needs_filter =
false;
1297 _c4dbgpf(
"seq_json: scalar was {}", _prs(sc->scalar,
true));
1303 template<
class EventHandler>
1304 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1306 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1307 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1308 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1309 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1310 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL), m_evt_handler->m_curr->pos);
1312 substr s = m_evt_handler->m_curr->line_contents.rem;
1313 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1314 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1316 _c4dbgp(
"scanning scalar...");
1319 const size_t len = _begins_with_special_json_scalar(s);
1322 char c = s.len > len ? s.str[len] :
',';
1323 _c4dbgpf(
"begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1324 if(c ==
',' || c ==
'}' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1326 sc->scalar = s.first(len);
1327 sc->needs_filter =
false;
1328 _c4dbgpf(
"special json scalar: '{}'", _prs(sc->scalar));
1329 _line_progressed(len);
1341 for( ; i < s.len; ++i)
1343 const char c = s.str[i];
1350 _c4dbgpf(
"found terminating character: '{}'", c);
1359 if(C4_LIKELY(i > 0))
1361 _line_progressed(i);
1362 sc->scalar = s.first(i);
1363 sc->needs_filter =
false;
1364 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1371 template<
class EventHandler>
1372 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1374 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-', m_evt_handler->m_curr->pos);
1375 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1378 template<
class EventHandler>
1379 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1381 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.', m_evt_handler->m_curr->pos);
1382 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1385 template<
class EventHandler>
1386 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1388 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1389 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1390 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY), m_evt_handler->m_curr->pos);
1392 substr s = m_evt_handler->m_curr->line_contents.rem;
1393 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1394 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1399 if(_is_blck_token(s))
1403 else if(_is_doc_begin(s))
1405 _c4dbgp(
"token is doc start");
1411 if(_is_blck_token(s))
1426 _c4dbgp(
"token is doc end");
1432 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1434 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1435 const size_t start_line = m_evt_handler->m_curr->pos.line;
1437 bool needs_filter =
false;
1440 _c4dbgpf(
"plain scalar line: {}", _prs(s));
1441 for(
size_t i = 0; i < s.len; ++i)
1443 const char curr = s.str[i];
1448 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1452 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1453 _line_progressed(i);
1455 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1457 _c4dbgp(
"start line. scalar ends here");
1462 _c4err(
"multiline scalars cannot be used as implicit keys");
1468 while(j + 1 < s.len && s.str[j+1] ==
':')
1470 _c4dbgp(
"skip colon");
1473 i = j > i ? j-1 : i;
1474 _c4dbgp(
"nothing to see here");
1478 _c4dbgp(
"got suspicious '#'");
1479 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1481 _c4dbgp(
"comment! scalar ends here");
1482 _line_progressed(i);
1487 _c4dbgp(
"nothing to see here");
1492 _line_progressed(s.len);
1493 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1494 next_peeked = next_peeked.trimr(
"\n\r");
1495 const size_t next_indentation = next_peeked.first_not_of(
' ');
1496 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1497 if(next_indentation < indentation)
1499 _c4dbgp(
"smaller indentation! scalar ended");
1502 else if(next_indentation == 0 && next_peeked.len > 0)
1504 const char first = next_peeked.str[0];
1508 _c4dbgpf(
"doc begin? peeked={}", _prs(next_peeked,
size_t(3)));
1509 if(_is_doc_begin_token(next_peeked))
1511 _c4dbgp(
"doc begin! scalar ended");
1516 _c4dbgpf(
"doc end? peeked={}", _prs(next_peeked,
size_t(3)));
1517 if(_is_doc_end_token(next_peeked))
1519 _c4dbgp(
"doc end! scalar ended");
1526 _c4dbgp(
"next line!");
1527 if(!_finished_file())
1529 _c4dbgp(
"next line!");
1535 _c4dbgp(
"file finished!");
1538 s = m_evt_handler->m_curr->line_contents.rem;
1539 needs_filter =
true;
1544 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1545 sc->needs_filter = needs_filter;
1547 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1552 template<
class EventHandler>
1553 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1555 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1556 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1557 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1558 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1559 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1560 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1561 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1564 template<
class EventHandler>
1565 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1567 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1568 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1569 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1570 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1571 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1572 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1575 template<
class EventHandler>
1576 C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1578 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY), m_evt_handler->m_curr->pos);
1579 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1585 template<
class EventHandler>
1586 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1590 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1591 if(pos >= _buf().len)
1595 rem = _from_next_line(_buf().sub(pos));
1600 nlpos = rem.first_of(
"\r\n");
1602 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1603 rem = rem.left_of(nlpos,
true);
1605 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1609 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1615 template<
class EventHandler>
1616 void ParseEngine<EventHandler>::_scan_line()
1618 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1619 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1621 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1624 template<
class EventHandler>
1625 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1627 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1628 m_evt_handler->m_curr->pos.line,
1629 m_evt_handler->m_curr->line_contents.full.len,
1630 ahead, m_evt_handler->m_curr->pos.col,
1631 m_evt_handler->m_curr->pos.col+ahead,
1632 m_evt_handler->m_curr->pos.offset,
1633 m_evt_handler->m_curr->pos.offset+ahead);
1634 m_evt_handler->m_curr->pos.offset += ahead;
1635 m_evt_handler->m_curr->pos.col += ahead;
1636 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1637 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1640 template<
class EventHandler>
1641 void ParseEngine<EventHandler>::_line_ended()
1643 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1644 m_evt_handler->m_curr->pos.line,
1645 m_evt_handler->m_curr->line_contents.full.len,
1646 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1647 m_evt_handler->m_curr->pos.col, 1);
1648 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1649 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1650 ++m_evt_handler->m_curr->pos.line;
1651 m_evt_handler->m_curr->pos.col = 1;
1654 template<
class EventHandler>
1655 void ParseEngine<EventHandler>::_line_ended_undo()
1657 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1658 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1659 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1660 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1661 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1662 m_evt_handler->m_curr->pos.offset -= delta;
1663 --m_evt_handler->m_curr->pos.line;
1664 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1667 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1672 template<
class EventHandler>
1673 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation) noexcept
1675 m_evt_handler->m_curr->indref = indentation;
1676 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1679 template<
class EventHandler>
1680 void ParseEngine<EventHandler>::_save_indentation()
1682 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1683 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1684 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1687 template<
class EventHandler>
1688 void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1690 _c4dbgpf(
"SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1691 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1697 template<
class EventHandler>
1698 void ParseEngine<EventHandler>::_flow_container_was_a_key(
size_t orig_indent)
1700 _c4dbgpf(
"flow container is followed by colon! orig_indent={}", orig_indent);
1701 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1703 _set_indentation(orig_indent);
1704 _maybe_skip_whitespace_tokens();
1707 template<
class EventHandler>
1708 void ParseEngine<EventHandler>::_end_flow_container(
size_t orig_indent,
bool multiline)
1714 _c4dbgp(
"flow container: end as vanilla block map key!");
1715 if(C4_UNLIKELY(multiline))
1716 _c4err(
"multiline key is invalid");
1717 if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1718 _c4err(
"could not find ':' colon after key");
1719 _maybe_skip_whitespace_tokens();
1722 else if(has_none(
RFLOW))
1724 _c4dbgp(
"end_flow_container: now not in flow!");
1725 if(has_any(
RUNK|
RSEQ|
RKCL) && _maybe_scan_following_colon())
1727 if(C4_UNLIKELY(multiline))
1728 _c4err(
"multiline key is invalid");
1729 _flow_container_was_a_key(orig_indent);
1733 _c4dbgp(
"end_flow_container: end map as key!");
1736 else if(has_any(
RSEQ))
1738 _c4dbgp(
"end_flow_container: now in a flow seq");
1739 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1740 _mark_seqflow_val_end();
1744 template<
class EventHandler>
1745 void ParseEngine<EventHandler>::_end_map_flow()
1747 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1748 size_t orig_indent = m_evt_handler->m_curr->indref;
1749 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1750 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1751 _end_flow_container(orig_indent, multiline);
1754 template<
class EventHandler>
1755 void ParseEngine<EventHandler>::_end_seq_flow()
1757 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1758 size_t orig_indent = m_evt_handler->m_curr->indref;
1759 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1760 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1761 _end_flow_container(orig_indent, multiline);
1764 template<
class EventHandler>
1765 void ParseEngine<EventHandler>::_end_map_blck()
1767 _c4dbgp(
"mapblck: end");
1770 _c4dbgp(
"mapblck: set missing val");
1771 _handle_annotations_before_blck_val_scalar();
1772 m_evt_handler->set_val_scalar_plain_empty();
1774 else if(has_any(
QMRK))
1776 _c4dbgp(
"mapblck: set missing keyval");
1777 _handle_annotations_before_blck_key_scalar();
1778 m_evt_handler->set_key_scalar_plain_empty();
1779 _handle_annotations_before_blck_val_scalar();
1780 m_evt_handler->set_val_scalar_plain_empty();
1782 m_evt_handler->end_map_block();
1785 template<
class EventHandler>
1786 void ParseEngine<EventHandler>::_end_seq_blck()
1790 _c4dbgp(
"seqblck: set missing val");
1791 _handle_annotations_before_blck_val_scalar();
1792 m_evt_handler->set_val_scalar_plain_empty();
1794 m_evt_handler->end_seq_block();
1797 template<
class EventHandler>
1798 void ParseEngine<EventHandler>::_end2_map()
1800 _c4dbgp(
"map: end");
1801 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1808 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1809 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1810 m_evt_handler->_pop();
1814 template<
class EventHandler>
1815 void ParseEngine<EventHandler>::_end2_seq()
1817 _c4dbgp(
"seq: end");
1818 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1825 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1826 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1827 m_evt_handler->_pop();
1831 template<
class EventHandler>
1832 void ParseEngine<EventHandler>::_begin2_doc()
1834 _c4dbgp(
"begin_doc");
1835 m_has_directives_yaml =
false;
1836 m_has_directives =
false;
1839 m_evt_handler->begin_doc();
1840 m_evt_handler->m_curr->indref = 0;
1843 template<
class EventHandler>
1844 void ParseEngine<EventHandler>::_begin2_doc_expl()
1846 _c4dbgp(
"begin_doc_expl");
1847 m_has_directives_yaml =
false;
1848 m_has_directives =
false;
1851 m_evt_handler->begin_doc_expl();
1852 m_evt_handler->m_curr->indref = 0;
1855 template<
class EventHandler>
1856 void ParseEngine<EventHandler>::_end2_doc()
1858 _c4dbgp(
"doc: end");
1859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1860 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1862 _c4dbgp(
"doc was empty; add empty val");
1863 _handle_annotations_before_blck_val_scalar();
1864 m_evt_handler->set_val_scalar_plain_empty();
1866 m_evt_handler->end_doc();
1870 template<
class EventHandler>
1871 void ParseEngine<EventHandler>::_end2_doc_expl()
1873 _c4dbgp(
"doc: end");
1874 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1876 _c4dbgp(
"doc: no children; add empty val");
1877 _handle_annotations_before_blck_val_scalar();
1878 m_evt_handler->set_val_scalar_plain_empty();
1880 m_evt_handler->end_doc_expl();
1884 template<
class EventHandler>
1885 void ParseEngine<EventHandler>::_maybe_begin_doc()
1889 _c4dbgp(
"doc must be started");
1893 template<
class EventHandler>
1894 void ParseEngine<EventHandler>::_maybe_end_doc()
1898 _c4dbgp(
"doc must be finished");
1901 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1903 _c4dbgp(
"no doc to finish, but pending annotations");
1904 m_evt_handler->begin_doc();
1905 _handle_annotations_before_blck_val_scalar();
1906 m_evt_handler->set_val_scalar_plain_empty();
1907 m_evt_handler->end_doc();
1911 template<
class EventHandler>
1912 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1914 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1915 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags &
RDOC, m_evt_handler->m_curr->pos);
1916 _c4dbgp(
"root is RDOC");
1917 if(m_evt_handler->m_curr->level != 0)
1918 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1919 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1924 template<
class EventHandler>
1925 void ParseEngine<EventHandler>::_check_trailing_doc_token()
1927 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1928 const bool isndoc = (m_evt_handler->m_curr->flags &
NDOC) != 0;
1929 const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1930 _c4dbgpf(
"target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1931 if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1935 template<
class EventHandler>
1936 void ParseEngine<EventHandler>::_end_doc_suddenly()
1938 _c4dbgp(
"end doc suddenly");
1939 _end_doc_suddenly__pop();
1944 template<
class EventHandler>
1945 void ParseEngine<EventHandler>::_check_doc_end_tokens()
const
1947 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1948 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(
". \t"), m_evt_handler->m_curr->pos);
1949 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
1955 template<
class EventHandler>
1956 void ParseEngine<EventHandler>::_start_doc_suddenly()
1958 _c4dbgp(
"start doc suddenly");
1959 _end_doc_suddenly__pop();
1964 template<
class EventHandler>
1965 void ParseEngine<EventHandler>::_end_stream()
1967 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1969 _c4err(
"missing terminating ]");
1970 else if(C4_UNLIKELY(has_all(
RMAP|
RFLOW)))
1971 _c4err(
"missing terminating }");
1972 if(m_evt_handler->m_stack.size() > 1)
1973 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1980 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1984 m_evt_handler->begin_doc();
1985 _handle_annotations_before_blck_val_scalar();
1986 m_evt_handler->set_val_scalar_plain_empty();
1987 m_evt_handler->end_doc();
1991 m_evt_handler->end_stream();
1992 if(C4_UNLIKELY(m_has_directives))
1993 _c4err(
"directives cannot be used without a document");
1997 template<
class EventHandler>
1998 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
2000 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2001 while(m_evt_handler->m_curr != popto)
2005 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2008 else if(has_any(
RMAP))
2010 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2018 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2021 template<
class EventHandler>
2022 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2025 using state_type =
typename EventHandler::state;
2026 state_type
const* popto =
nullptr;
2027 auto &stack = m_evt_handler->m_stack;
2028 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2029 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2030 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2032 _print_state_stack();
2034 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2036 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2037 if(s->indref == ind)
2039 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
2044 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2046 _c4err(
"parse error: incorrect indentation?");
2048 _handle_indentation_pop(popto);
2051 template<
class EventHandler>
2052 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2055 using state_type =
typename EventHandler::state;
2056 auto &stack = m_evt_handler->m_stack;
2057 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2058 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2059 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2060 state_type
const* popto =
nullptr;
2063 _print_state_stack(flagbuf_);
2065 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
2067 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2072 else if(s->indref == ind)
2074 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
2075 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
2082 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2083 const size_t first = rem.first_not_of(
' ');
2084 _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first ==
npos, m_evt_handler->m_curr->pos);
2085 rem = rem.right_of(first,
true);
2086 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
2087 if(rem.begins_with(
'-') && _is_blck_token(rem))
2089 _c4dbgp(
"parent was indentless seq");
2095 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2097 _c4err(
"parse error: incorrect indentation?");
2099 _handle_indentation_pop(popto);
2104 template<
class EventHandler>
2105 void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2109 _c4err(
"multiline quoted keys are invalid");
2113 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(
RMAP|
RSEQ) && has_any(
RBLCK)));
2114 _c4dbgpf(
"indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2115 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2117 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2118 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(
' '),
2119 m_evt_handler->m_curr->pos);
2120 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2121 _c4dbgpf(
"trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem,
true));
2122 if(C4_UNLIKELY(!!trimmed.len))
2124 _c4err(
"bad indentation");
2132 template<
class EventHandler>
2133 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2138 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'\''), m_evt_handler->m_curr->pos);
2141 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2142 _line_progressed(1);
2143 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2145 bool needs_filter =
false;
2147 while( ! _finished_file())
2149 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2150 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2151 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2152 _c4err(
"token can not appear at line begin");
2153 for(
size_t i = 0; i < line.len; ++i)
2155 const char curr = line.str[i];
2158 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2161 _line_progressed(i + 1);
2162 pos = i + (size_t)(line.str - s.str);
2167 needs_filter =
true;
2173 needs_filter =
true;
2174 _line_progressed(line.len);
2177 _check_valid_newline_in_quoted_scalar();
2180 _c4err(
"reached end of file while looking for closing quote");
2184 _c4dbgpf(
"found closing quote at: {}", pos);
2185 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2186 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2187 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2188 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'\'', m_evt_handler->m_curr->pos);
2189 _set_first_strict(s, pos);
2191 _c4prscalar(
"scanned squoted scalar", s,
true);
2193 return ScannedScalar { s, needs_filter };
2198 template<
class EventHandler>
2199 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2204 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'"'), m_evt_handler->m_curr->pos);
2207 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2208 _line_progressed(1);
2209 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2211 bool needs_filter =
false;
2213 while( ! _finished_file())
2215 #if defined(__GNUC__) && (__GNUC__ == 13)
2216 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
2218 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2219 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2220 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2221 _c4err(
"token can not appear at line begin");
2222 for(
size_t i = 0; i < rem.len; ++i)
2224 const char curr = rem.str[i];
2228 const char next = i+1 < rem.len ? rem.str[i+1] :
'~';
2229 needs_filter =
true;
2230 if(next ==
'"' || next ==
'\\')
2233 else if(curr ==
'"')
2235 _line_progressed(i + 1);
2236 pos = i + (size_t)(rem.str - s.str);
2242 needs_filter =
true;
2243 _line_progressed(rem.len);
2246 _check_valid_newline_in_quoted_scalar();
2249 _c4err(
"reached end of file while looking for closing quote");
2253 _c4dbgpf(
"found closing quote at: {}", pos);
2254 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2255 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2256 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2257 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'"', m_evt_handler->m_curr->pos);
2258 _set_first_strict(s, pos);
2260 _c4prscalar(
"scanned dquoted scalar", s,
true);
2262 return ScannedScalar{s, needs_filter};
2267 template<
class EventHandler>
2268 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2270 _c4dbgpf(
"blck: indref={}", indref);
2271 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref !=
npos, m_evt_handler->m_curr->pos);
2274 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2275 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'), m_evt_handler->m_curr->pos);
2277 _c4dbgpf(
"blck: specs={}", _prs(s));
2280 BlockChomp_e chomp = CHOMP_CLIP;
2281 size_t indentation =
npos;
2284 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"), m_evt_handler->m_curr->pos);
2285 csubstr t = s.sub(1);
2286 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2287 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2288 size_t pos = t.first_of(
"-+");
2289 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2293 chomp = CHOMP_STRIP;
2294 else if(t[pos] ==
'+')
2302 pos = t.first_not_of(
"0123456789");
2303 csubstr digits = t.first(pos);
2304 if( ! digits.empty())
2306 if(C4_UNLIKELY(digits.len > 1))
2307 _c4err(
"parse error: invalid indentation");
2308 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2309 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2310 _c4err(
"parse error: could not read indentation as decimal");
2311 if(C4_UNLIKELY( ! indentation))
2312 _c4err(
"parse error: null indentation");
2313 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2314 indentation += m_evt_handler->m_curr->indref;
2318 if(C4_UNLIKELY(t.len && (!t.begins_with_any(
" \t") || !t.sub(pos).triml(
" \t").begins_with(
'#'))))
2319 _c4err(
"parse error: invalid token");
2323 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2326 _line_progressed(s.len);
2331 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2332 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2340 size_t num_lines = 0;
2341 size_t first = m_evt_handler->m_curr->pos.line;
2342 size_t provisional_indentation =
npos;
2344 while(( ! _finished_file()))
2347 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2348 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2349 C4_DONT_OPTIMIZE(lc.rem);
2351 _c4dbgpf(
"blck: peeking at {}", _prs(lc.rem.trimr(
"\r\n"),
true));
2353 if(indentation !=
npos)
2355 _c4dbgpf(
"blck: indentation={}", indentation);
2357 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2361 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2365 _c4err(
"indentation decreased without any scalar");
2369 else if(indentation == 0)
2371 _c4dbgpf(
"blck: noindent. lc.rem={}", _prs(lc.rem));
2372 if(_is_doc_token(lc.rem))
2374 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2381 const size_t fns = lc.rem.first_not_of(
' ');
2382 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2385 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2386 if(C4_UNLIKELY(lc.full.begins_with(
'\t')))
2388 if(provisional_indentation ==
npos)
2390 if(lc.indentation < indref)
2392 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2393 if(raw_block.len == 0)
2395 _c4dbgp(
"blck: was empty, undo next line");
2400 else if(lc.indentation == m_evt_handler->m_curr->indref)
2404 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2408 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2409 indentation = lc.indentation;
2413 if(lc.indentation >= provisional_indentation)
2415 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2417 indentation = lc.indentation;
2421 if(lc.indentation >= indref)
2422 _c4err(
"parse error: first non-empty block line should have at least the original indentation");
2423 _c4dbgp(
"blck: finished");
2430 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2431 if(provisional_indentation !=
npos)
2433 if(lc.rem.len >= provisional_indentation)
2435 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2436 provisional_indentation = lc.rem.len;
2441 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2442 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2443 if(provisional_indentation ==
npos)
2445 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2446 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2448 if(provisional_indentation < indref)
2450 provisional_indentation = indref;
2451 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2457 m_evt_handler->m_curr->line_contents = lc;
2458 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2459 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2460 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2464 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2465 C4_UNUSED(num_lines);
2468 if(indentation ==
npos)
2470 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2471 indentation = provisional_indentation;
2477 _c4prscalar(
"scanned block", raw_block,
true);
2479 sb->scalar = raw_block;
2480 sb->indentation = indentation;
2492 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2494 #define _c4dbgfws(...)
2497 template<
class EventHandler>
2498 template<
class FilterProcessor>
2499 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2501 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2502 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t', m_evt_handler->m_curr->pos);
2504 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2505 if(first_pos !=
npos)
2507 const char first_char = proc.src[first_pos];
2508 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2509 if(first_char ==
'\n' || first_char ==
'\r')
2511 _c4dbgfws(
"whitespace is trailing on line",
"");
2512 proc.skip(first_pos - proc.rpos);
2517 _c4dbgfws(
"legit whitespace. sofar={}", _prs(proc.sofar()));
2521 _c4dbgfws(
"whitespace is trailing on line",
"");
2525 template<
class EventHandler>
2526 template<
class FilterProcessor>
2527 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2529 if(!_filter_ws_handle_to_first_non_space(proc))
2531 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2532 proc.copy(proc.src.len - proc.rpos);
2536 template<
class EventHandler>
2537 template<
class FilterProcessor>
2538 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2540 if(!_filter_ws_handle_to_first_non_space(proc))
2542 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2543 proc.skip(proc.src.len - proc.rpos);
2557 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2559 #define _c4dbgfps(fmt, ...)
2562 template<
class EventHandler>
2563 template<
class FilterProcessor>
2564 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2566 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2568 _c4dbgfps(
"found newline. sofar={}", _prs(proc.sofar()));
2569 size_t ii = proc.rpos;
2570 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2573 proc.set(
'\n', numnl_following);
2574 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2578 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2582 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2586 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2593 template<
class EventHandler>
2594 template<
class FilterProcessor>
2595 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2597 _RYML_ASSERT_PARSE_(this->callbacks(), indentation !=
npos, m_evt_handler->m_curr->pos);
2598 _c4dbgfps(
"before={}", _prs(proc.src));
2600 while(proc.has_more_chars())
2602 const char curr = proc.curr();
2603 _c4dbgfps(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2608 _c4dbgfps(
"whitespace", curr);
2609 _filter_ws_skip_trailing(proc);
2612 _c4dbgfps(
"newline", curr);
2613 _filter_nl_plain(proc, indentation);
2616 _c4dbgfps(
"carriage return, ignore", curr);
2625 _c4dbgfps(
"after={}", _prs(proc.sofar()));
2627 return proc.result();
2633 template<
class EventHandler>
2636 FilterProcessorSrcDst proc(scalar, dst);
2637 return _filter_plain(proc, indentation);
2640 template<
class EventHandler>
2643 FilterProcessorInplaceEndExtending proc(dst, cap);
2644 return _filter_plain(proc, indentation);
2655 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2657 #define _c4dbgfsq(fmt, ...)
2660 template<
class EventHandler>
2661 template<
class FilterProcessor>
2662 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2664 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2666 _c4dbgfsq(
"found newline. sofar={}", _prs(proc.sofar()));
2667 size_t ii = proc.rpos;
2668 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2671 proc.set(
'\n', numnl_following);
2672 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2676 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2680 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2685 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2691 template<
class EventHandler>
2692 template<
class FilterProcessor>
2693 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2695 _c4dbgfsq(
"before={}", _prs(proc.src));
2699 while(proc.has_more_chars())
2701 const char curr = proc.curr();
2702 _c4dbgfsq(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2707 _c4dbgfsq(
"whitespace", curr);
2708 _filter_ws_copy_trailing(proc);
2711 _c4dbgfsq(
"newline", curr);
2712 _filter_nl_squoted(proc);
2715 _c4dbgfsq(
"skip cr", curr);
2719 _c4dbgfsq(
"squote", curr);
2720 if(proc.next() ==
'\'')
2722 _c4dbgfsq(
"two consecutive squotes", curr);
2737 _c4dbgfsq(
": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2739 return proc.result();
2744 template<
class EventHandler>
2747 FilterProcessorSrcDst proc(scalar, dst);
2748 return _filter_squoted(proc);
2751 template<
class EventHandler>
2754 FilterProcessorInplaceEndExtending proc(dst, cap);
2755 return _filter_squoted(proc);
2766 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2768 #define _c4dbgfdq(...)
2771 template<
class EventHandler>
2772 template<
class FilterProcessor>
2773 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2775 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2777 _c4dbgfdq(
"found newline. sofar={}", _prs(proc.sofar()));
2778 size_t ii = proc.rpos;
2779 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2782 proc.set(
'\n', numnl_following);
2783 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2787 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2791 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2796 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2798 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2800 _c4dbgfdq(
"backslash at [{}]", ii);
2801 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2802 if(next ==
' ' || next ==
'\t')
2804 _c4dbgfdq(
"extend skip to backslash",
"");
2812 template<
class EventHandler>
2813 template<
class FilterProcessor>
2814 void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc,
size_t sz)
2816 const size_t szp1 = sz + 1u;
2817 if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2818 _c4err(
"codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2820 csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2821 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2822 uint32_t codepoint_val = {};
2823 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2824 _c4err(
"failed to parse codepoint. scalar pos={}", proc.rpos);
2825 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2826 if(C4_UNLIKELY(numbytes == 0))
2827 _c4err(
"failed to decode code point={}", proc.rpos);
2828 _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2829 proc.translate_esc_bulk(readbuf, numbytes, szp1);
2830 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2833 template<
class EventHandler>
2834 template<
class FilterProcessor>
2835 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2837 char next = proc.next();
2838 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2841 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2845 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2851 size_t ii = proc.rpos + 2;
2852 for( ; ii < proc.src.len; ++ii)
2855 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2860 proc.skip(ii - proc.rpos);
2862 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2865 proc.translate_esc(next);
2866 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2868 else if(next ==
'\r')
2872 else if(next ==
'n')
2874 proc.translate_esc(
'\n');
2876 else if(next ==
'r')
2878 proc.translate_esc(
'\r');
2880 else if(next ==
't')
2882 proc.translate_esc(
'\t');
2884 else if(next ==
'\\')
2886 proc.translate_esc(
'\\');
2888 else if(next ==
'x')
2890 _filter_dquoted_backslash_decode(proc, 2u);
2892 else if(next ==
'u')
2894 _filter_dquoted_backslash_decode(proc, 4u);
2896 else if(next ==
'U')
2898 _filter_dquoted_backslash_decode(proc, 8u);
2901 else if(next ==
'0')
2903 proc.translate_esc(
'\0');
2905 else if(next ==
'b')
2907 proc.translate_esc(
'\b');
2909 else if(next ==
'f')
2911 proc.translate_esc(
'\f');
2913 else if(next ==
'a')
2915 proc.translate_esc(
'\a');
2917 else if(next ==
'v')
2919 proc.translate_esc(
'\v');
2921 else if(next ==
'e')
2923 proc.translate_esc(
'\x1b');
2925 else if(next ==
'_')
2928 const char payload[] = {
2929 _RYML_CHCONST(-0x3e, 0xc2),
2930 _RYML_CHCONST(-0x60, 0xa0),
2932 proc.translate_esc_bulk(payload, 2, 1);
2934 else if(next ==
'N')
2937 const char payload[] = {
2938 _RYML_CHCONST(-0x3e, 0xc2),
2939 _RYML_CHCONST(-0x7b, 0x85),
2941 proc.translate_esc_bulk(payload, 2, 1);
2943 else if(next ==
'L')
2946 const char payload[] = {
2947 _RYML_CHCONST(-0x1e, 0xe2),
2948 _RYML_CHCONST(-0x80, 0x80),
2949 _RYML_CHCONST(-0x58, 0xa8),
2951 proc.translate_esc_extending(payload, 3, 1);
2953 else if(next ==
'P')
2956 const char payload[] = {
2957 _RYML_CHCONST(-0x1e, 0xe2),
2958 _RYML_CHCONST(-0x80, 0x80),
2959 _RYML_CHCONST(-0x57, 0xa9),
2961 proc.translate_esc_extending(payload, 3, 1);
2963 else if(next ==
'\0')
2969 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2971 _c4dbgfdq(
"backslash...sofar={}", _prs(proc.sofar()));
2975 template<
class EventHandler>
2976 template<
class FilterProcessor>
2977 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2979 _c4dbgfdq(
"before={}", _prs(proc.src));
2982 while(proc.has_more_chars())
2984 const char curr = proc.curr();
2985 _c4dbgfdq(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
2991 _c4dbgfdq(
"whitespace", curr);
2992 _filter_ws_copy_trailing(proc);
2997 _c4dbgfdq(
"newline", curr);
2998 _filter_nl_dquoted(proc);
3003 _c4dbgfdq(
"carriage return, ignore", curr);
3009 _filter_dquoted_backslash(proc);
3019 _c4dbgfdq(
"after={}", _prs(proc.sofar()));
3020 return proc.result();
3026 template<
class EventHandler>
3029 FilterProcessorSrcDst proc(scalar, dst);
3030 return _filter_dquoted(proc);
3033 template<
class EventHandler>
3036 FilterProcessorInplaceMidExtending proc(dst, cap);
3037 return _filter_dquoted(proc);
3046 C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(csubstr s,
size_t indentation) noexcept
3048 if(indentation + 1 > s.len)
3050 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
3052 if(s.str[i] ==
'\n')
3054 csubstr rem = s.sub(i + 1);
3055 size_t first = rem.first_not_of(
' ');
3056 first = (first !=
npos) ? first : rem.len;
3057 if(first > indentation)
3064 template<
class EventHandler>
3065 template<
class FilterProcessor>
3066 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation)
3068 _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3069 _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos, m_evt_handler->m_curr->pos);
3073 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3075 #define _c4dbgchomp(...)
3080 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3083 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
3084 last = proc.rpos + last + size_t(1) + indentation;
3085 _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3087 while((proc.rpos < last) && proc.has_more_chars())
3089 const char curr = proc.curr();
3090 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
3095 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
3098 csubstr at_next_line = proc.rem();
3099 if(at_next_line.begins_with(
' '))
3101 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
3103 size_t first_non_space = at_next_line.first_not_of(
' ');
3104 _c4dbgchomp(
"first_non_space={}", first_non_space);
3105 if(first_non_space ==
npos)
3107 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
3108 first_non_space = at_next_line.len;
3110 if(first_non_space <= indentation)
3112 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
3113 proc.skip(first_non_space);
3117 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
3118 proc.skip(indentation);
3120 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3121 proc.copy(first_non_space - indentation);
3139 bool had_one =
false;
3140 while(proc.has_more_chars())
3142 const char curr = proc.curr();
3143 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
3148 _c4dbgchomp(
"copy newline!", curr);
3156 _c4dbgchomp(
"skip!", curr);
3163 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3170 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3171 while(proc.has_more_chars())
3173 const char curr = proc.curr();
3174 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3178 _c4dbgchomp(
"copy newline!", curr);
3183 _c4dbgchomp(
"skip!", curr);
3192 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3204 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3206 #define _c4dbgfb(...)
3209 template<
class EventHandler>
3210 template<
class FilterProcessor>
3211 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
3213 csubstr rem = proc.rem();
3216 size_t first = rem.first_not_of(
' ');
3219 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3220 if(first < indentation)
3222 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3227 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3228 proc.skip(indentation);
3231 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3234 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3238 if(first < indentation)
3240 _c4dbgfb(
"skip everything", first);
3241 proc.skip(proc.src.len - proc.rpos);
3245 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3246 proc.skip(indentation);
3254 template<
class EventHandler>
3255 template<
class FilterProcessor>
3256 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3258 csubstr contents = proc.src.trimr(
" \n\r");
3259 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3262 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3263 if(chomp == CHOMP_KEEP && proc.src.len)
3265 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3266 while(proc.has_more_chars())
3268 const char curr = proc.curr();
3280 return contents.len;
3283 template<
class EventHandler>
3284 template<
class FilterProcessor>
3285 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3287 _c4dbgfb(
"contents_len={}", contents_len);
3289 _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3293 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3294 if(firstnewl !=
npos)
3296 contents_len = firstnewl;
3297 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3301 contents_len = proc.src.len;
3302 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3305 return contents_len;
3317 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3319 #define _c4dbgfbl(...)
3322 template<
class EventHandler>
3323 template<
class FilterProcessor>
3324 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3326 _c4dbgfbl(
"indentation={} before={}", indentation, _prs(proc.src));
3328 size_t contents_len = _handle_all_whitespace(proc, chomp);
3330 return proc.result();
3332 contents_len = _extend_to_chomp(proc, contents_len);
3334 _c4dbgfbl(
"to filter={}", _prs(proc.src.first(contents_len)));
3336 _filter_block_indentation(proc, indentation);
3339 while(proc.has_more_chars(contents_len))
3341 const char curr = proc.curr();
3342 _c4dbgfbl(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3347 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3349 _filter_block_indentation(proc, indentation);
3361 _c4dbgfbl(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3363 _filter_chomp(proc, chomp, indentation);
3365 _c4dbgfbl(
"final={}", _prs(proc.sofar()));
3367 return proc.result();
3372 template<
class EventHandler>
3375 FilterProcessorSrcDst proc(scalar, dst);
3376 return _filter_block_literal(proc, indentation, chomp);
3379 template<
class EventHandler>
3382 FilterProcessorInplaceEndExtending proc(scalar, cap);
3383 return _filter_block_literal(proc, indentation, chomp);
3393 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3395 #define _c4dbgfbf(...)
3399 template<
class EventHandler>
3400 template<
class FilterProcessor>
3401 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3403 _filter_block_indentation(proc, indentation);
3404 while(proc.has_more_chars(len))
3406 const char curr = proc.curr();
3407 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3411 _c4dbgfbf(
"newline.", curr);
3413 _filter_block_indentation(proc, indentation);
3421 size_t first = proc.rem().first_not_of(
" \t");
3422 _c4dbgfbf(
"space. first={}", first);
3424 first = proc.rem().len;
3425 _c4dbgfbf(
"... indentation increased to {}", first);
3426 _filter_block_folded_indented_block(proc, indentation, len, first);
3430 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3436 template<
class EventHandler>
3437 template<
class FilterProcessor>
3438 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3443 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3444 wpos_at_first_newl = proc.wpos;
3449 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3450 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl !=
npos, m_evt_handler->m_curr->pos);
3451 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ', m_evt_handler->m_curr->pos);
3452 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3454 proc.set_at(wpos_at_first_newl,
'\n');
3455 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n', m_evt_handler->m_curr->pos);
3458 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3462 return wpos_at_first_newl;
3465 template<
class EventHandler>
3466 template<
class FilterProcessor>
3467 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3469 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
3470 size_t num_newl = 0;
3471 size_t wpos_at_first_newl =
npos;
3472 while(proc.has_more_chars(len))
3474 const char curr = proc.curr();
3475 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3480 _c4dbgfbf(
"newline. sofar={}", num_newl);
3516 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3517 _filter_block_indentation(proc, indentation);
3523 size_t first = proc.rem().first_not_of(
" \t");
3524 _c4dbgfbf(
"space. first={}", first);
3526 first = proc.rem().len;
3527 _c4dbgfbf(
"... indentation increased to {}", first);
3530 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3531 proc.set_at(wpos_at_first_newl,
'\n');
3535 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3538 _filter_block_folded_indented_block(proc, indentation, len, first);
3540 wpos_at_first_newl =
npos;
3547 _c4dbgfbf(
"not space, not newline. stop.", 0);
3554 template<
class EventHandler>
3555 template<
class FilterProcessor>
3556 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3558 _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos), m_evt_handler->m_curr->pos);
3559 if(curr_indentation)
3560 proc.copy(curr_indentation);
3561 while(proc.has_more_chars(len))
3563 const char curr = proc.curr();
3564 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3570 _filter_block_indentation(proc, indentation);
3571 csubstr rem = proc.rem();
3572 const size_t first = rem.first_not_of(
' ');
3573 _c4dbgfbf(
"newline. firstns={}", first);
3576 const char c = rem[first];
3577 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3578 if(c ==
'\n' || c ==
'\r')
3584 _c4dbgfbf(
"done with indented block", first);
3588 else if(first !=
npos)
3591 _c4dbgfbf(
"copy all {} spaces", first);
3609 template<
class EventHandler>
3610 template<
class FilterProcessor>
3611 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3613 _c4dbgfbf(
"indentation={} before={}", indentation, _prs(proc.src));
3615 size_t contents_len = _handle_all_whitespace(proc, chomp);
3617 return proc.result();
3619 contents_len = _extend_to_chomp(proc, contents_len);
3621 _c4dbgfbf(
"to filter={}", _prs(proc.src.first(contents_len)));
3623 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3626 while(proc.has_more_chars(contents_len))
3628 const char curr = proc.curr();
3629 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3634 _c4dbgfbf(
"found newline", curr);
3635 _filter_block_folded_newlines(proc, indentation, contents_len);
3647 _c4dbgfbf(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3649 _filter_chomp(proc, chomp, indentation);
3651 _c4dbgfbf(
"final={}", proc.sofar().len, _prs(proc.sofar()));
3653 return proc.result();
3658 template<
class EventHandler>
3661 FilterProcessorSrcDst proc(scalar, dst);
3662 return _filter_block_folded(proc, indentation, chomp);
3665 template<
class EventHandler>
3668 FilterProcessorInplaceEndExtending proc(scalar, cap);
3669 return _filter_block_folded(proc, indentation, chomp);
3677 template<
class EventHandler>
3678 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3680 _c4dbgpf(
"filtering plain scalar: s={}", _prs(s));
3681 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3682 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3683 _c4dbgpf(
"filtering plain scalar: success! s={}", _prs(r.get()));
3689 template<
class EventHandler>
3690 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3692 _c4dbgpf(
"filtering squo scalar: s={}", _prs(s));
3693 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3694 _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3695 _c4dbgpf(
"filtering squo scalar: success! s={}", _prs(r.get()));
3702 template<
class EventHandler>
3703 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3705 _c4dbgpf(
"filtering dquo scalar: s={}", _prs(s));
3706 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3707 if(C4_LIKELY(r.valid()))
3709 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(r.get()));
3714 const size_t len = r.required_len();
3715 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3716 substr dst = _alloc_arena(len, &s);
3717 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3720 _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3721 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3722 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3723 _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos);
3724 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3725 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(rsd.get()));
3735 template<
class EventHandler>
3736 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3738 if(s.is_sub(_buf()))
3740 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3741 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3743 memmove(s.str - 1, s.str, s.len);
3745 s.str[s.len] =
'\n';
3751 substr dst = _alloc_arena(s.len + 1, &s);
3753 memcpy(dst.str, s.str, s.len);
3759 template<
class EventHandler>
3760 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3762 _c4dbgpf(
"filtering block literal scalar: s={}", _prs(s));
3763 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3765 if(C4_LIKELY(r.valid()))
3771 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3772 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3775 result = _move_scalar_left_and_add_newline(s);
3777 _c4dbgpf(
"filtering block literal scalar: success! s={}", _prs(result));
3783 template<
class EventHandler>
3784 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3786 _c4dbgpf(
"filtering block folded scalar: s={}", _prs(s));
3787 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3789 if(C4_LIKELY(r.valid()))
3795 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3796 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3799 result = _move_scalar_left_and_add_newline(s);
3801 _c4dbgpf(
"filtering block folded scalar: success! s={}", _prs(result));
3808 template<
class EventHandler>
3809 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3813 if(m_options.scalar_filtering())
3815 return _filter_scalar_plain(sc.scalar, indentation);
3819 _c4dbgp(
"plain scalar left unfiltered");
3820 m_evt_handler->mark_key_scalar_unfiltered();
3825 _c4dbgp(
"plain scalar doesn't need filtering");
3830 template<
class EventHandler>
3831 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3835 if(m_options.scalar_filtering())
3837 return _filter_scalar_plain(sc.scalar, indentation);
3841 _c4dbgp(
"plain scalar left unfiltered");
3842 m_evt_handler->mark_val_scalar_unfiltered();
3847 _c4dbgp(
"plain scalar doesn't need filtering");
3855 template<
class EventHandler>
3856 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3860 if(m_options.scalar_filtering())
3862 return _filter_scalar_squot(sc.scalar);
3866 _c4dbgp(
"squo key scalar left unfiltered");
3867 m_evt_handler->mark_key_scalar_unfiltered();
3872 _c4dbgp(
"squo key scalar doesn't need filtering");
3877 template<
class EventHandler>
3878 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3882 if(m_options.scalar_filtering())
3884 return _filter_scalar_squot(sc.scalar);
3888 _c4dbgp(
"squo val scalar left unfiltered");
3889 m_evt_handler->mark_val_scalar_unfiltered();
3894 _c4dbgp(
"squo val scalar doesn't need filtering");
3902 template<
class EventHandler>
3903 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3907 if(m_options.scalar_filtering())
3909 return _filter_scalar_dquot(sc.scalar);
3913 _c4dbgp(
"dquo scalar left unfiltered");
3914 m_evt_handler->mark_key_scalar_unfiltered();
3919 _c4dbgp(
"dquo scalar doesn't need filtering");
3924 template<
class EventHandler>
3925 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3929 if(m_options.scalar_filtering())
3931 return _filter_scalar_dquot(sc.scalar);
3935 _c4dbgp(
"dquo scalar left unfiltered");
3936 m_evt_handler->mark_val_scalar_unfiltered();
3941 _c4dbgp(
"dquo scalar doesn't need filtering");
3949 template<
class EventHandler>
3950 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3952 if(m_options.scalar_filtering())
3954 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3958 _c4dbgp(
"literal scalar left unfiltered");
3959 m_evt_handler->mark_key_scalar_unfiltered();
3964 template<
class EventHandler>
3965 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3967 if(m_options.scalar_filtering())
3969 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3973 _c4dbgp(
"literal scalar left unfiltered");
3974 m_evt_handler->mark_val_scalar_unfiltered();
3982 template<
class EventHandler>
3983 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3985 if(m_options.scalar_filtering())
3987 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3991 _c4dbgp(
"folded scalar left unfiltered");
3992 m_evt_handler->mark_key_scalar_unfiltered();
3997 template<
class EventHandler>
3998 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
4000 if(m_options.scalar_filtering())
4002 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4006 _c4dbgp(
"folded scalar left unfiltered");
4007 m_evt_handler->mark_val_scalar_unfiltered();
4019 template<
class EventHandler>
4020 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on)
4022 ParserState *s = m_evt_handler->m_curr;
4023 char buf1_[64], buf2_[64], buf3_[64];
4024 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4025 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4026 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4027 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4031 template<
class EventHandler>
4034 ParserState *s = m_evt_handler->m_curr;
4035 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4036 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4037 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4038 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4039 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4040 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4046 template<
class EventHandler>
4047 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off)
4049 ParserState *s = m_evt_handler->m_curr;
4050 char buf1_[64], buf2_[64], buf3_[64];
4051 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4052 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4053 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4054 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4058 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
4061 bool gotone =
false;
4063 #define _prflag(fl) \
4064 if((flags & fl) == (fl)) \
4068 if(pos + 1 < buf.len) \
4072 csubstr fltxt = #fl; \
4073 if(pos + fltxt.len <= buf.len) \
4074 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4104 _RYML_CHECK_BASIC(pos <= buf.len);
4106 return buf.first(pos);
4116 template<
class EventHandler>
4119 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4120 return _buf().sub(loc.offset);
4123 template<
class EventHandler>
4126 if(C4_UNLIKELY(val ==
nullptr))
4127 return {m_evt_handler->m_curr->pos.
name, 0, 0, 0};
4128 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4131 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4132 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4133 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4134 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4136 csubstr src = _buf();
4137 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4138 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4140 using lineptr_type =
size_t const* C4_RESTRICT;
4141 lineptr_type lineptr =
nullptr;
4142 size_t offset = (size_t)(val - src.begin());
4146 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4161 size_t count = m_newline_offsets_size;
4162 lineptr = m_newline_offsets;
4165 size_t step = count >> 1;
4166 lineptr_type it = lineptr + step;
4178 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4179 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4180 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4182 loc.name = m_evt_handler->m_curr->pos.name;
4183 loc.offset = offset;
4184 loc.line = (size_t)(lineptr - m_newline_offsets);
4185 if(lineptr > m_newline_offsets)
4186 loc.col = (offset - *(lineptr-1) - 1u);
4192 template<
class EventHandler>
4193 void ParseEngine<EventHandler>::_prepare_locations()
4195 csubstr src = _buf();
4196 size_t numnewlines = 1u + src.count(
'\n');
4197 _resize_locations(numnewlines);
4198 m_newline_offsets_size = 0;
4199 for(
size_t i = 0; i < src.len; i++)
4200 if(src.str[i] ==
'\n')
4201 m_newline_offsets[m_newline_offsets_size++] = i;
4202 m_newline_offsets[m_newline_offsets_size++] = src.len;
4203 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4206 template<
class EventHandler>
4207 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4209 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4210 if(numnewlines > m_newline_offsets_capacity)
4212 if(m_newline_offsets)
4213 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4214 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4215 m_newline_offsets_capacity = numnewlines;
4219 template<
class EventHandler>
4220 bool ParseEngine<EventHandler>::_locations_dirty()
const
4222 return !m_newline_offsets_size;
4230 template<
class EventHandler>
4231 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4234 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4236 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4238 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4242 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4244 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4245 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4251 template<
class EventHandler>
4252 void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4254 _c4dbgpf(
"flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4255 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4256 if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4258 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4259 _c4dbgpf(
"flow: after indentation={}", _prs(trimmed));
4260 if(trimmed.len && trimmed.triml(
" \t").len)
4262 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4263 _c4err(
"bad indentation");
4268 template<
class EventHandler>
4269 size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4271 const size_t mark = m_evt_handler->m_curr->pos.offset;
4272 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
4273 _c4dbgpf(
"block: mark={} firstpos={}", mark, firstpos);
4274 if(firstpos !=
npos)
4276 _c4dbgp(
"block: non empty line");
4277 _line_progressed(firstpos);
4282 _c4dbgp(
"block: rest of line is whitespace");
4283 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4288 template<
class EventHandler>
4289 void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(
size_t start_mark,
size_t end_mark)
4291 _c4dbgpf(
"block: start_mark={} end_mark={}", start_mark, end_mark);
4292 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4293 if(end_mark != start_mark)
4295 csubstr leading = _buf().range(start_mark, end_mark);
4296 _c4dbgpf(
"block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading,
true));
4297 if(leading.find(
'\t') !=
npos)
4298 _c4err(
"invalid tab character to the left");
4306 template<
class EventHandler>
4307 void ParseEngine<EventHandler>::_handle_colon()
4309 size_t curr = m_evt_handler->m_curr->pos.line;
4310 if(C4_UNLIKELY(m_prev_colon !=
npos && curr == m_prev_colon))
4312 _c4dbgpf(
"colon: prevline={} currline={}", m_prev_colon, curr);
4313 _c4err(
"two colons on same line");
4315 _c4dbgpf(
"colon: set prevline={}->{}", m_prev_colon, curr);
4316 m_prev_colon = curr;
4319 template<
class EventHandler>
4320 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4322 _c4dbgpf(
"store annotation[{}]: {}", dst->num_entries, _prs(str));
4323 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4324 dst->annotations[dst->num_entries].str = str;
4325 dst->annotations[dst->num_entries].indentation = {};
4326 dst->annotations[dst->num_entries].line = {};
4327 dst->annotations[dst->num_entries].orig = {};
4331 template<
class EventHandler>
4332 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4334 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4335 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4336 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4340 dst->annotations[dst->num_entries].str = str;
4341 dst->annotations[dst->num_entries].indentation = indentation;
4342 dst->annotations[dst->num_entries].line = line;
4343 dst->annotations[dst->num_entries].orig = {};
4347 template<
class EventHandler>
4348 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line, csubstr orig)
4350 _c4dbgpf(
"store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4351 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4352 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4356 dst->annotations[dst->num_entries].str = str;
4357 dst->annotations[dst->num_entries].indentation = indentation;
4358 dst->annotations[dst->num_entries].line = line;
4359 dst->annotations[dst->num_entries].orig = orig;
4363 template<
class EventHandler>
4364 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4366 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4369 template<
class EventHandler>
4370 bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4372 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4374 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4375 if(m_pending_tags.num_entries)
4377 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4378 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4380 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4381 _clear_annotations(&m_pending_tags);
4388 if(m_pending_anchors.num_entries)
4390 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4391 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4393 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4394 _clear_annotations(&m_pending_anchors);
4398 _c4err(
"too many anchors");
4401 m_evt_handler->set_key_scalar_plain_empty();
4402 m_evt_handler->set_val_scalar_plain_empty();
4406 template<
class EventHandler>
4407 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4409 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4410 if(m_pending_tags.num_entries)
4412 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4413 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4415 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4416 _clear_annotations(&m_pending_tags);
4423 if(m_pending_anchors.num_entries)
4425 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4426 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4428 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4429 _clear_annotations(&m_pending_anchors);
4433 _c4err(
"too many anchors");
4438 template<
class EventHandler>
4439 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4441 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4442 if(m_pending_tags.num_entries)
4444 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4445 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4447 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4448 _clear_annotations(&m_pending_tags);
4455 if(m_pending_anchors.num_entries)
4457 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4458 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4460 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4461 _clear_annotations(&m_pending_anchors);
4465 _c4err(
"too many anchors");
4470 template<
class EventHandler>
4471 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4473 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4474 if(m_pending_tags.num_entries == 2)
4476 _c4dbgp(
"2 tags, setting entry 0");
4477 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4479 else if(m_pending_tags.num_entries == 1)
4481 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4482 if(m_pending_tags.annotations[0].line < current_line)
4484 _c4dbgp(
"...tag is for the map. setting it.");
4485 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4486 _clear_annotations(&m_pending_tags);
4490 if(m_pending_anchors.num_entries == 2)
4492 _c4dbgp(
"2 anchors, setting entry 0");
4493 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4495 else if(m_pending_anchors.num_entries == 1)
4497 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4498 if(m_pending_anchors.annotations[0].line < current_line)
4500 _c4dbgp(
"...anchor is for the map. setting it.");
4501 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4502 _clear_annotations(&m_pending_anchors);
4507 template<
class EventHandler>
4508 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4510 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4511 switch(m_pending_tags.num_entries)
4514 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4515 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4517 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map tag");
4518 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4519 _clear_annotations(&m_pending_tags);
4523 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4524 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4527 switch(m_pending_anchors.num_entries)
4530 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4531 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4533 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map anchor");
4534 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4535 _clear_annotations(&m_pending_anchors);
4539 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4540 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4545 template<
class EventHandler>
4546 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4548 _c4dbgp(
"annotations_after_start_mapblck");
4549 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4550 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4551 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4553 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4554 switch(m_pending_tags.num_entries)
4557 _c4dbgpf(
"annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4558 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4559 _clear_annotations(&m_pending_tags);
4562 _c4dbgpf(
"annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4563 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4564 _clear_annotations(&m_pending_tags);
4567 switch(m_pending_anchors.num_entries)
4570 _c4dbgpf(
"annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4571 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4572 _clear_annotations(&m_pending_anchors);
4575 _c4dbgpf(
"annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4576 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4577 _clear_annotations(&m_pending_anchors);
4581 _set_indentation(key_indentation);
4584 template<
class EventHandler>
4585 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4587 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4589 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4590 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4592 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4593 if(ann.line > curr->line)
4595 else if(ann.indentation < curr->indentation)
4598 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4600 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4601 if(ann.line > curr->line)
4603 else if(ann.indentation < curr->indentation)
4606 return curr->line < val_line ? val_indentation : curr->indentation;
4609 template<
class EventHandler>
4610 void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4612 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4613 m_evt_handler->set_key_ref(alias);
4615 _c4err(
"aliases cannot have anchors or tags");
4618 template<
class EventHandler>
4619 void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4621 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4622 m_evt_handler->set_val_ref(alias);
4624 _c4err(
"aliases cannot have anchors or tags");
4627 template<
class EventHandler>
4628 csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4630 _c4dbgpf(
"resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4632 TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4635 _c4dbgpf(
"resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4636 return ret.resolved;
4638 _c4dbgpf(
"resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4640 substr buf = m_evt_handler->arena_rem();
4641 TagDirectives
const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4642 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4643 m_evt_handler->m_curr->pos,
4644 m_evt_handler->m_stack.m_callbacks);
4645 _c4dbgpf(
"resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4646 _c4assert((bufsz > buf.len) == (!ttag.str));
4647 _c4assert(!!bufsz == (ttag.len == bufsz));
4651 _c4dbgpf(
"tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4653 buf = _alloc_arena(bufsz, &tag);
4656 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4657 m_evt_handler->m_curr->pos,
4658 m_evt_handler->m_stack.m_callbacks);
4661 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4665 _c4dbgp(
"tag required arena. update size");
4668 (void)_alloc_arena(bufsz);
4670 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127)
4671 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers)
4673 _c4dbgpf(
"handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4675 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4677 _c4dbgpf(
"copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4678 buf = _alloc_arena(ttag.len, &tag);
4680 memcpy(buf.str, ttag.str, ttag.len);
4682 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4685 C4_SUPPRESS_WARNING_MSVC_POP
4686 _c4dbgpf(
"resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4689 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4693 template<
class EventHandler>
4694 bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT
version)
const
4696 _c4assert(directive->begins_with(
"%YAML"));
4697 size_t version_start = directive->first_not_of(
" \t", 5);
4698 if(version_start !=
npos)
4700 csubstr digits =
"0123456789";
4701 size_t major_end = directive->first_not_of(digits, version_start);
4702 if(major_end !=
npos && directive->str[major_end] ==
'.')
4704 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4705 if(minor_end ==
npos)
4706 minor_end = directive->len;
4707 _set_first_strict(*directive, minor_end);
4708 *
version = directive->range(version_start, minor_end);
4709 _c4dbgpf(
"%YAML: version={} full={}", *
version, _prs(*directive,
true));
4716 template<
class EventHandler>
4717 bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix)
const
4719 _c4assert(directive->begins_with(
"%TAG"));
4720 csubstr whitespace =
" \t";
4721 size_t handle_start = directive->first_not_of(whitespace, 4);
4722 if(handle_start !=
npos && directive->str[handle_start] ==
'!')
4724 size_t handle_end = directive->first_of(whitespace, handle_start);
4725 if(handle_end !=
npos)
4727 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4728 if(prefix_start !=
npos)
4730 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4731 if(prefix_end ==
npos)
4732 prefix_end = directive->len;
4733 _set_first_strict(*directive, prefix_end);
4734 *handle = directive->range(handle_start, handle_end);
4735 *prefix = directive->range(prefix_start, prefix_end);
4736 _c4dbgpf(
"%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive,
true));
4745 template<
class EventHandler>
4746 void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4748 _c4dbgpf(
"handle_directive: rem={}", _prs(directive,
true));
4749 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with(
'%'));
4750 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4751 const char *err =
nullptr;
4754 auto isdirective = [](csubstr str, csubstr dir) {
4755 if(str.begins_with(dir))
4757 csubstr rest = str.sub(dir.len);
4758 return (!rest.len || rest.str[0] ==
' ' || rest.str[0] ==
'\t');
4762 if(isdirective(directive,
"%TAG"))
4766 if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4768 err =
"invalid %TAG directive";
4769 goto directive_error;
4771 m_evt_handler->add_directive_tag(handle, prefix);
4773 else if(isdirective(directive,
"%YAML"))
4776 if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &
version)))
4778 err =
"invalid %YAML directive";
4779 goto directive_error;
4781 if(C4_UNLIKELY(m_has_directives_yaml))
4783 err =
"multiple %YAML directives";
4784 goto directive_error;
4786 m_has_directives_yaml =
true;
4787 m_evt_handler->add_directive_yaml(
version);
4789 m_has_directives =
true;
4790 rem = m_evt_handler->m_curr->line_contents.rem;
4791 pos = rem.first_not_of(
" \t", directive.len);
4792 pos = pos !=
npos ? pos : rem.len;
4793 _line_progressed(pos);
4795 _c4dbgpf(
"handle_directive: rest={}", _prs(rem));
4796 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
4798 err =
"invalid tokens after directive";
4799 goto directive_error;
4802 if(C4_UNLIKELY(err !=
nullptr))
4806 template<
class EventHandler>
4807 bool ParseEngine<EventHandler>::_handle_bom()
4809 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4812 const csubstr rest = rem.sub(1);
4814 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4815 if(rem.begins_with(csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4817 _c4dbgp(
"byte order mark: UTF32BE");
4819 _line_progressed(4);
4823 else if(rem.begins_with(csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4825 _c4dbgp(
"byte order mark: UTF32LE");
4827 _line_progressed(4);
4831 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4833 _c4dbgp(
"byte order mark: UTF16BE");
4835 _line_progressed(2);
4839 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4841 _c4dbgp(
"byte order mark: UTF16LE");
4843 _line_progressed(2);
4847 else if(rem.begins_with(
"\xef\xbb\xbf"))
4849 _c4dbgp(
"byte order mark: UTF8");
4851 _line_progressed(3);
4860 template<
class EventHandler>
4861 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4863 if(m_encoding ==
NOBOM)
4865 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4868 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4870 else if(enc != m_encoding)
4872 _c4err(
"byte order mark can only be set once");
4879 template<
class EventHandler>
4880 void ParseEngine<EventHandler>::_handle_seq_json()
4883 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4885 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
4886 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
4887 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
4888 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
4889 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
4891 _handle_flow_skip_whitespace();
4892 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4898 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
4899 const char first = rem.str[0];
4900 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4905 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4906 ScannedScalar sc = _scan_scalar_dquot();
4907 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4908 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4914 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4916 m_evt_handler->begin_seq_val_flow();
4918 _line_progressed(1);
4923 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4925 m_evt_handler->begin_map_val_flow();
4927 _line_progressed(1);
4928 goto seqjson_finish;
4932 _c4dbgp(
"seqjson[RVAL]: end!");
4935 _line_progressed(1);
4937 goto seqjson_finish;
4943 if(_scan_scalar_seq_json(&sc))
4945 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4946 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4947 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4959 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
4960 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
4961 const char first = rem.str[0];
4962 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4967 _c4dbgp(
"seqjson[RNXT]: expect next val");
4969 m_evt_handler->add_sibling();
4970 _line_progressed(1);
4975 _c4dbgp(
"seqjson[RNXT]: end!");
4977 _line_progressed(1);
4978 goto seqjson_finish;
4986 _c4dbgt(
"seqjson: go again", 0);
4987 if(_finished_line())
4989 if(C4_LIKELY(!_finished_file()))
4997 _c4err(
"missing terminating ]");
5003 _c4dbgp(
"seqjson: finish");
5009 template<
class EventHandler>
5010 void ParseEngine<EventHandler>::_handle_map_json()
5013 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5015 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5016 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5017 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5018 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5019 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
5021 _handle_flow_skip_whitespace();
5022 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5028 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5029 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5030 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5031 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5032 const char first = rem.str[0];
5033 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
5038 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
5039 ScannedScalar sc = _scan_scalar_dquot();
5040 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5041 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5047 _c4dbgp(
"mapjson[RKEY]: end!");
5049 _line_progressed(1);
5050 goto mapjson_finish;
5056 else if(has_any(
RVAL))
5058 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5059 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5060 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5061 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5062 const char first = rem.str[0];
5063 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
5068 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
5069 ScannedScalar sc = _scan_scalar_dquot();
5070 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5071 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5077 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
5079 m_evt_handler->begin_seq_val_flow();
5080 _set_indentation(m_evt_handler->m_parent->indref);
5082 _line_progressed(1);
5083 goto mapjson_finish;
5087 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
5089 m_evt_handler->begin_map_val_flow();
5090 _set_indentation(m_evt_handler->m_parent->indref);
5092 _line_progressed(1);
5099 if(_scan_scalar_map_json(&sc))
5101 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
5102 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5103 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5114 else if(has_any(
RKCL))
5116 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5117 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5118 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5119 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5120 const char first = rem.str[0];
5121 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
5124 _c4dbgp(
"mapjson[RKCL]: found the colon");
5126 _line_progressed(1);
5133 else if(has_any(
RNXT))
5135 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5136 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5137 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5138 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5139 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
5140 if(rem.begins_with(
','))
5142 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
5143 m_evt_handler->add_sibling();
5145 _line_progressed(1);
5147 else if(rem.begins_with(
'}'))
5149 _c4dbgp(
"mapjson[RNXT]: end!");
5151 _line_progressed(1);
5152 goto mapjson_finish;
5161 _c4dbgt(
"mapjson: go again", 0);
5162 if(_finished_line())
5164 if(C4_LIKELY(!_finished_file()))
5172 _c4err(
"missing terminating }");
5178 _c4dbgp(
"mapjson: finish");
5184 template<
class EventHandler>
5185 void ParseEngine<EventHandler>::_handle_seq_imap()
5188 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5190 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP), m_evt_handler->m_curr->pos);
5191 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5192 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL), m_evt_handler->m_curr->pos);
5193 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL), m_evt_handler->m_curr->pos);
5194 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5196 _handle_flow_skip_whitespace();
5197 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5203 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
5204 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5205 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5206 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5207 const char first = rem.str[0];
5208 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
5212 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
5213 sc = _scan_scalar_squot();
5214 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5215 _handle_annotations_before_blck_val_scalar();
5216 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5218 goto seqimap_finish;
5220 else if(first ==
'"')
5222 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
5223 sc = _scan_scalar_dquot();
5224 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5225 _handle_annotations_before_blck_val_scalar();
5226 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5228 goto seqimap_finish;
5231 else if(_scan_scalar_plain_map_flow(&sc))
5233 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
5234 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5235 _handle_annotations_before_blck_val_scalar();
5236 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5238 goto seqimap_finish;
5240 else if(first ==
'[')
5242 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
5244 _handle_annotations_before_blck_val_scalar();
5245 m_evt_handler->begin_seq_val_flow();
5247 _set_indentation(m_evt_handler->m_parent->indref);
5248 _line_progressed(1);
5249 goto seqimap_finish;
5251 else if(first ==
'{')
5253 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
5255 _handle_annotations_before_blck_val_scalar();
5256 m_evt_handler->begin_map_val_flow();
5258 _set_indentation(m_evt_handler->m_parent->indref);
5259 _line_progressed(1);
5260 goto seqimap_finish;
5262 else if(first ==
',' || first ==
']')
5264 _c4dbgp(
"seqimap[RVAL]: finish without val.");
5265 _handle_annotations_before_blck_val_scalar();
5266 m_evt_handler->set_val_scalar_plain_empty();
5268 goto seqimap_finish;
5270 else if(first ==
'*')
5272 csubstr ref = _scan_ref_seq();
5273 _c4dbgpf(
"seqimap[RVAL]: ref! {}", _prs(ref));
5274 _handle_valref(ref);
5277 else if(first ==
'&')
5279 csubstr anchor = _scan_anchor();
5280 _c4dbgpf(
"seqimap[RVAL]: anchor! {}", _prs(anchor));
5281 _add_annotation(&m_pending_anchors, anchor);
5283 else if(first ==
'!')
5285 csubstr tag = _scan_tag();
5286 _c4dbgpf(
"seqimap[RVAL]: tag! {}", _prs(tag));
5287 _add_annotation(&m_pending_tags, tag);
5294 else if(has_any(
RNXT))
5296 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5297 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5298 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5299 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5300 const char first = rem.str[0];
5301 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
5302 if(first ==
',' || first ==
']')
5306 _c4dbgp(
"seqimap: done");
5308 goto seqimap_finish;
5315 else if(has_any(
QMRK))
5317 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK), m_evt_handler->m_curr->pos);
5318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5319 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5321 const char first = rem.str[0];
5322 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
5326 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
5327 sc = _scan_scalar_squot();
5328 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5329 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5333 else if(first ==
'"')
5335 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
5336 sc = _scan_scalar_dquot();
5337 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5338 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5343 else if(_scan_scalar_plain_map_flow(&sc))
5345 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
5346 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5347 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5351 else if(first ==
'[')
5353 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
5355 m_evt_handler->begin_seq_key_flow();
5357 _set_indentation(m_evt_handler->m_parent->indref);
5358 _line_progressed(1);
5359 goto seqimap_finish;
5361 else if(first ==
'{')
5363 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
5365 m_evt_handler->begin_map_key_flow();
5367 _set_indentation(m_evt_handler->m_parent->indref);
5368 _line_progressed(1);
5369 goto seqimap_finish;
5371 else if(first ==
',' || first ==
']')
5373 _c4dbgp(
"seqimap[QMRK]: finish without key.");
5374 m_evt_handler->set_key_scalar_plain_empty();
5375 m_evt_handler->set_val_scalar_plain_empty();
5377 goto seqimap_finish;
5379 else if(first ==
'&')
5381 csubstr anchor = _scan_anchor();
5382 _c4dbgp(
"seqimap[QMRK]: anchor!");
5383 m_evt_handler->set_key_anchor(anchor);
5385 else if(first ==
'*')
5387 csubstr ref = _scan_ref_seq();
5388 _c4dbgp(
"seqimap[QMRK]: ref!");
5389 _handle_keyref(ref);
5397 else if(has_any(
RKCL))
5399 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5401 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5402 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL), m_evt_handler->m_curr->pos);
5403 const char first = rem.str[0];
5404 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
5407 _c4dbgp(
"seqimap[RKCL]: found ':'");
5409 _line_progressed(1);
5412 else if(first ==
',' || first ==
']')
5414 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
5415 m_evt_handler->set_val_scalar_plain_empty();
5417 goto seqimap_finish;
5426 _c4dbgt(
"seqimap: go again", 0);
5427 if(_finished_line())
5429 if(C4_LIKELY(!_finished_file()))
5443 _c4dbgp(
"seqimap: finish");
5449 template<
class EventHandler>
5450 void ParseEngine<EventHandler>::_handle_seq_flow()
5453 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5455 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5456 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
5457 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5458 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5459 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
5460 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
5462 if(m_evt_handler->m_curr->at_line_beginning())
5464 _handle_flow_line_beginning();
5467 _handle_flow_skip_whitespace();
5468 if(!m_evt_handler->m_curr->line_contents.rem.len)
5473 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5474 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5478 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5479 sc = _scan_scalar_squot();
5480 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5481 _handle_annotations_before_blck_val_scalar();
5482 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5484 _mark_seqflow_val_end();
5486 else if(first ==
'"')
5488 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5489 sc = _scan_scalar_dquot();
5490 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5491 _handle_annotations_before_blck_val_scalar();
5492 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5494 _mark_seqflow_val_end();
5497 else if(_scan_scalar_plain_seq_flow(&sc))
5499 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5500 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5501 _handle_annotations_before_blck_val_scalar();
5502 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5504 _mark_seqflow_val_end();
5506 else if(first ==
'[')
5508 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5510 _handle_annotations_before_blck_val_scalar();
5511 m_evt_handler->begin_seq_val_flow();
5512 _set_indentation(m_evt_handler->m_parent->indref);
5514 _line_progressed(1);
5516 else if(first ==
'{')
5518 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5520 _handle_annotations_before_blck_val_scalar();
5521 m_evt_handler->begin_map_val_flow();
5522 _set_indentation(m_evt_handler->m_parent->indref);
5524 _line_progressed(1);
5525 goto seqflow_finish;
5527 else if(first ==
']')
5529 _c4dbgp(
"seqflow[RVAL]: end!");
5530 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5532 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5533 _handle_annotations_before_blck_val_scalar();
5534 m_evt_handler->set_val_scalar_plain_empty();
5536 _line_progressed(1);
5538 goto seqflow_finish;
5540 else if(first ==
'*')
5542 csubstr ref = _scan_ref_seq();
5543 _c4dbgpf(
"seqflow[RVAL]: ref! {}", _prs(ref));
5544 _handle_valref(ref);
5547 else if(first ==
'&')
5549 csubstr anchor = _scan_anchor();
5550 _c4dbgpf(
"seqflow[RVAL]: anchor! {}", _prs(anchor));
5551 _add_annotation(&m_pending_anchors, anchor);
5553 else if(first ==
'!')
5555 csubstr tag = _scan_tag();
5556 _c4dbgpf(
"seqflow[RVAL]: tag! {}", _prs(tag));
5557 _add_annotation(&m_pending_tags, tag);
5559 else if(first ==
':')
5561 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5563 m_evt_handler->begin_map_val_flow();
5564 _set_indentation(m_evt_handler->m_parent->indref);
5565 _handle_annotations_before_blck_key_scalar();
5566 m_evt_handler->set_key_scalar_plain_empty();
5568 _line_progressed(1);
5569 goto seqflow_finish;
5571 else if(first ==
'?')
5573 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5575 m_evt_handler->begin_map_val_flow();
5576 _set_indentation(m_evt_handler->m_parent->indref);
5578 _line_progressed(1);
5579 _maybe_skip_whitespace_tokens();
5580 goto seqflow_finish;
5582 else if(first ==
',')
5584 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5586 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5587 _handle_annotations_before_blck_val_scalar();
5588 m_evt_handler->set_val_scalar_plain_empty();
5590 _mark_seqflow_val_end();
5604 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5605 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5606 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5609 _c4dbgp(
"seqflow[RNXT]: expect next val");
5611 m_evt_handler->add_sibling();
5612 _line_progressed(1);
5613 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5615 _c4err(
"parse error: invalid comment after comma");
5617 _mark_seqflow_val_end();
5619 else if(first ==
']')
5621 _c4dbgp(
"seqflow[RNXT]: end!");
5622 _line_progressed(1);
5624 goto seqflow_finish;
5626 else if(first ==
':')
5628 _c4dbgpf(
"seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5629 if(m_prev_val_end !=
NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5631 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5632 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5633 _set_indentation(m_evt_handler->m_parent->indref);
5634 _line_progressed(1);
5636 goto seqflow_finish;
5650 _c4dbgt(
"seqflow: go again", 0);
5651 if(_finished_line())
5653 if(C4_LIKELY(!_finished_file()))
5661 _c4err(
"missing terminating ]");
5667 _c4dbgp(
"seqflow: finish");
5673 template<
class EventHandler>
5674 void ParseEngine<EventHandler>::_handle_map_flow()
5677 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5679 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5680 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5681 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
5682 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
5684 if(m_evt_handler->m_curr->at_line_beginning())
5686 _handle_flow_line_beginning();
5689 _handle_flow_skip_whitespace();
5690 if(!m_evt_handler->m_curr->line_contents.rem.len)
5695 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5696 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5697 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5698 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5699 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5700 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5704 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5705 sc = _scan_scalar_squot();
5706 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5707 _handle_annotations_before_blck_key_scalar();
5708 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5711 else if(first ==
'"')
5713 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5714 sc = _scan_scalar_dquot();
5715 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5716 _handle_annotations_before_blck_key_scalar();
5717 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5721 else if(_scan_scalar_plain_map_flow(&sc))
5723 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5724 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5725 _handle_annotations_before_blck_key_scalar();
5726 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5729 else if(first ==
'?')
5731 _c4dbgp(
"mapflow[RKEY]: explicit key");
5732 _handle_annotations_before_blck_key_scalar();
5734 _line_progressed(1);
5735 _maybe_skip_whitespace_tokens();
5737 else if(first ==
':')
5739 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5740 _handle_annotations_before_blck_key_scalar();
5741 m_evt_handler->set_key_scalar_plain_empty();
5743 _line_progressed(1);
5744 _maybe_skip_whitespace_tokens();
5746 else if(first ==
',')
5748 _c4dbgp(
"mapflow[RKEY]: comma!");
5749 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5750 _c4err(
"unexpected comma");
5754 else if(first ==
'}')
5756 _c4dbgp(
"mapflow[RKEY]: end!");
5757 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5758 _line_progressed(1);
5760 goto mapflow_finish;
5762 else if(first ==
'&')
5764 csubstr anchor = _scan_anchor();
5765 _c4dbgpf(
"mapflow[RKEY]: key anchor! {}", _prs(anchor));
5766 _add_annotation(&m_pending_anchors, anchor);
5768 else if(first ==
'!')
5770 csubstr tag = _scan_tag();
5771 _c4dbgpf(
"mapflow[RKEY]: tag! {}", _prs(tag));
5772 _add_annotation(&m_pending_tags, tag);
5774 else if(first ==
'*')
5776 csubstr ref = _scan_ref_map();
5777 _c4dbgpf(
"mapflow[RKEY]: key ref! {}", _prs(ref));
5778 _handle_keyref(ref);
5781 else if(first ==
'[')
5786 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5787 _handle_annotations_before_blck_key_scalar();
5789 m_evt_handler->begin_seq_key_flow();
5791 _set_indentation(m_evt_handler->m_parent->indref);
5792 _line_progressed(1);
5793 goto mapflow_finish;
5795 else if(first ==
'{')
5800 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5801 _handle_annotations_before_blck_key_scalar();
5803 m_evt_handler->begin_map_key_flow();
5805 _set_indentation(m_evt_handler->m_parent->indref);
5806 _line_progressed(1);
5814 else if(has_any(
RKCL))
5816 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5817 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5818 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5819 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5820 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5821 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5824 _c4dbgp(
"mapflow[RKCL]: found the colon");
5826 _line_progressed(1);
5828 else if(first ==
'}')
5830 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5832 m_evt_handler->set_val_scalar_plain_empty();
5833 _line_progressed(1);
5835 goto mapflow_finish;
5837 else if(first ==
',')
5839 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5840 m_evt_handler->set_val_scalar_plain_empty();
5841 m_evt_handler->add_sibling();
5843 _line_progressed(1);
5844 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5846 _c4err(
"parse error: invalid comment after comma");
5854 else if(has_any(
RVAL))
5856 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5857 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5858 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5860 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5861 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5865 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5866 sc = _scan_scalar_squot();
5867 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5868 _handle_annotations_before_blck_val_scalar();
5869 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5872 else if(first ==
'"')
5874 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5875 sc = _scan_scalar_dquot();
5876 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5877 _handle_annotations_before_blck_val_scalar();
5878 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5882 else if(_scan_scalar_plain_map_flow(&sc))
5884 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5885 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5886 _handle_annotations_before_blck_val_scalar();
5887 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5890 else if(first ==
'[')
5892 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5894 _handle_annotations_before_blck_val_scalar();
5895 m_evt_handler->begin_seq_val_flow();
5896 _set_indentation(m_evt_handler->m_parent->indref);
5898 _line_progressed(1);
5899 goto mapflow_finish;
5901 else if(first ==
'{')
5903 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5905 _handle_annotations_before_blck_val_scalar();
5906 m_evt_handler->begin_map_val_flow();
5907 _set_indentation(m_evt_handler->m_parent->indref);
5909 _line_progressed(1);
5912 else if(first ==
'}')
5914 _c4dbgp(
"mapflow[RVAL]: end!");
5915 _handle_annotations_before_blck_val_scalar();
5916 m_evt_handler->set_val_scalar_plain_empty();
5917 _line_progressed(1);
5919 goto mapflow_finish;
5921 else if(first ==
',')
5923 _c4dbgp(
"mapflow[RVAL]: empty val!");
5924 _handle_annotations_before_blck_val_scalar();
5925 m_evt_handler->set_val_scalar_plain_empty();
5929 else if(first ==
'*')
5931 csubstr ref = _scan_ref_map();
5932 _c4dbgpf(
"mapflow[RVAL]: key ref! {}", _prs(ref));
5933 _handle_valref(ref);
5936 else if(first ==
'&')
5938 csubstr anchor = _scan_anchor();
5939 _c4dbgpf(
"mapflow[RVAL]: key anchor! {}", _prs(anchor));
5940 _add_annotation(&m_pending_anchors, anchor);
5942 else if(first ==
'!')
5944 csubstr tag = _scan_tag();
5945 _c4dbgpf(
"mapflow[RVAL]: tag! {}", _prs(tag));
5946 _add_annotation(&m_pending_tags, tag);
5953 else if(has_any(
RNXT))
5955 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5956 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5957 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5958 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5959 _c4dbgpf(
"mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5960 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
','))
5962 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5963 m_evt_handler->add_sibling();
5965 _line_progressed(1);
5966 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5968 _c4err(
"parse error: invalid comment after comma");
5971 else if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'}'))
5973 _c4dbgp(
"mapflow[RNXT]: end!");
5974 _line_progressed(1);
5976 goto mapflow_finish;
5983 else if(has_any(
QMRK))
5985 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5986 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5987 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5988 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5989 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5990 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5994 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5995 sc = _scan_scalar_squot();
5996 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5997 _handle_annotations_before_blck_key_scalar();
5998 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6001 else if(first ==
'"')
6003 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
6004 sc = _scan_scalar_dquot();
6005 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6006 _handle_annotations_before_blck_key_scalar();
6007 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6011 else if(_scan_scalar_plain_map_flow(&sc))
6013 _c4dbgp(
"mapflow[QMRK]: plain scalar");
6014 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6015 _handle_annotations_before_blck_key_scalar();
6016 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6019 else if(first ==
':')
6021 _c4dbgp(
"mapflow[QMRK]: setting empty key");
6022 _handle_annotations_before_blck_key_scalar();
6023 m_evt_handler->set_key_scalar_plain_empty();
6025 _line_progressed(1);
6026 _maybe_skip_whitespace_tokens();
6028 else if(first ==
'}')
6030 _c4dbgp(
"mapflow[QMRK]: end!");
6031 _handle_annotations_before_blck_key_scalar();
6032 m_evt_handler->set_key_scalar_plain_empty();
6033 m_evt_handler->set_val_scalar_plain_empty();
6035 _line_progressed(1);
6036 goto mapflow_finish;
6038 else if(first ==
',')
6040 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
6041 _handle_annotations_before_blck_key_scalar();
6042 m_evt_handler->set_key_scalar_plain_empty();
6043 m_evt_handler->set_val_scalar_plain_empty();
6046 else if(first ==
'&')
6048 csubstr anchor = _scan_anchor();
6049 _c4dbgpf(
"mapflow[QMRK]: key anchor! {}", _prs(anchor));
6050 _add_annotation(&m_pending_anchors, anchor);
6052 else if(first ==
'*')
6054 csubstr ref = _scan_ref_map();
6055 _c4dbgpf(
"mapflow[QMRK]: key ref! {}", _prs(ref));
6056 _handle_keyref(ref);
6059 else if(first ==
'[')
6064 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
6066 _handle_annotations_before_blck_key_scalar();
6067 m_evt_handler->begin_seq_key_flow();
6069 _set_indentation(m_evt_handler->m_parent->indref);
6070 _line_progressed(1);
6071 goto mapflow_finish;
6073 else if(first ==
'{')
6078 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
6080 _handle_annotations_before_blck_key_scalar();
6081 m_evt_handler->begin_map_key_flow();
6082 _set_indentation(m_evt_handler->m_parent->indref);
6084 _line_progressed(1);
6087 else if(first ==
'!')
6089 csubstr tag = _scan_tag();
6090 _c4dbgpf(
"mapflow[QMRK]: tag! {}", _prs(tag));
6091 _add_annotation(&m_pending_tags, tag);
6100 _c4dbgt(
"mapflow: go again", 0);
6101 if(_finished_line())
6103 if(C4_LIKELY(!_finished_file()))
6111 _c4err(
"missing terminating }");
6117 _c4dbgp(
"mapflow: finish");
6123 template<
class EventHandler>
6124 void ParseEngine<EventHandler>::_handle_seq_block()
6127 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6129 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
6130 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6131 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
6132 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
6134 _maybe_skip_comment_strict();
6135 if(!m_evt_handler->m_curr->line_contents.rem.len)
6140 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6141 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6142 if(m_evt_handler->m_curr->at_line_beginning())
6144 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6145 if(m_evt_handler->m_curr->indentation_ge_extra())
6147 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6148 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6149 if(!m_evt_handler->m_curr->line_contents.rem.len)
6152 else if(m_evt_handler->m_curr->indentation_lt_extra())
6154 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6155 if(m_evt_handler->m_curr->indentation_eq())
6157 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6158 _handle_annotations_before_blck_val_scalar();
6159 m_evt_handler->set_val_scalar_plain_empty();
6165 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6166 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
6167 _handle_indentation_pop_from_block_seq();
6168 goto seqblck_finish;
6171 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6173 _c4dbgp(
"seqblck[RVAL]: empty line!");
6174 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6178 _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6179 const size_t startmark = _handle_block_skip_leading_whitespace();
6180 _c4dbgpf(
"seqblck[RVAL]: startmark={}", startmark);
6181 if(startmark ==
npos)
6183 _c4dbgp(
"seqblck[RVAL]: whitespace only");
6186 const size_t tabmark = _handle_block_get_whitespace_mark();
6187 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6188 _c4dbgpf(
"seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6189 const size_t startline = m_evt_handler->m_curr->pos.line;
6190 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6194 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
6195 sc = _scan_scalar_squot();
6196 if(!_maybe_scan_following_colon())
6198 _c4dbgp(
"seqblck[RVAL]: set as val");
6199 _handle_annotations_before_blck_val_scalar();
6200 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6201 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6206 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6207 _handle_block_check_leading_tabs(startmark);
6209 _handle_annotations_before_start_mapblck(startline);
6211 m_evt_handler->begin_map_val_block();
6212 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6213 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6214 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6216 _maybe_skip_whitespace_tokens();
6217 goto seqblck_finish;
6220 else if(first ==
'"')
6222 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
6223 sc = _scan_scalar_dquot();
6224 if(!_maybe_scan_following_colon())
6226 _c4dbgp(
"seqblck[RVAL]: set as val");
6227 _handle_annotations_before_blck_val_scalar();
6228 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6229 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6234 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6236 _handle_block_check_leading_tabs(startmark);
6237 _handle_annotations_before_start_mapblck(startline);
6239 m_evt_handler->begin_map_val_block();
6240 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6241 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6242 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6244 _maybe_skip_whitespace_tokens();
6245 goto seqblck_finish;
6251 else if(first ==
'|')
6253 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
6255 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6256 _handle_annotations_before_blck_val_scalar();
6257 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6258 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6261 else if(first ==
'>')
6263 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
6265 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6266 _handle_annotations_before_blck_val_scalar();
6267 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6268 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6271 else if(_scan_scalar_plain_seq_blck(&sc))
6273 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
6274 if(!_maybe_scan_following_colon())
6276 _c4dbgp(
"seqblck[RVAL]: set as val");
6277 _handle_annotations_before_blck_val_scalar();
6278 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6279 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6284 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6285 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6286 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6287 _handle_block_check_leading_tabs(startmark, tabmark);
6289 _handle_annotations_before_start_mapblck(startline);
6291 m_evt_handler->begin_map_val_block();
6292 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6293 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6294 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6296 _maybe_skip_whitespace_tokens();
6297 goto seqblck_finish;
6300 else if(first ==
'[')
6302 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
6304 _handle_annotations_before_blck_val_scalar();
6305 m_evt_handler->begin_seq_val_flow();
6307 _line_progressed(1);
6308 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6309 goto seqblck_finish;
6311 else if(first ==
'{')
6313 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
6315 _handle_annotations_before_blck_val_scalar();
6316 m_evt_handler->begin_map_val_flow();
6318 _line_progressed(1);
6319 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6320 goto seqblck_finish;
6322 else if(first ==
'-')
6324 _c4dbgp(
"seqblck[RVAL]: dash");
6325 _handle_block_check_leading_tabs(startmark);
6326 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6327 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6328 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
6329 _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6331 _handle_annotations_before_blck_val_scalar();
6332 m_evt_handler->begin_seq_val_block();
6334 _set_indentation(startindent);
6336 _line_progressed(1);
6338 else if(first ==
':')
6340 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
6342 _handle_annotations_before_start_mapblck(startline);
6344 m_evt_handler->begin_map_val_block();
6345 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6346 m_evt_handler->set_key_scalar_plain_empty();
6348 _line_progressed(1);
6349 _maybe_skip_whitespace_tokens();
6350 goto seqblck_finish;
6352 else if(first ==
'&')
6354 const csubstr anchor = _scan_anchor();
6355 _c4dbgpf(
"seqblck[RVAL]: anchor! {}", _prs(anchor));
6358 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6360 else if(first ==
'*')
6362 csubstr ref = _scan_ref_seq();
6363 _c4dbgpf(
"seqblck[RVAL]: ref! {}", _prs(ref));
6364 if(!_maybe_scan_following_colon())
6366 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
6367 _handle_valref(ref);
6372 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
6374 _handle_annotations_before_start_mapblck(startline);
6375 m_evt_handler->begin_map_val_block();
6376 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6377 _handle_keyref(ref);
6379 _set_indentation(startindent);
6380 _maybe_skip_whitespace_tokens();
6381 goto seqblck_finish;
6384 else if(first ==
'!')
6386 csubstr tag = _scan_tag();
6387 _c4dbgpf(
"seqblck[RVAL]: val tag! {}", _prs(tag));
6390 _add_annotation(&m_pending_tags, tag, startindent, startline);
6392 else if(first ==
'?')
6394 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
6396 m_evt_handler->begin_map_val_block();
6398 _set_indentation(startindent);
6399 _line_progressed(1);
6400 _maybe_skipchars(
' ');
6401 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6403 _c4dbgp(
"seqblck[RVAL]: seqblck starts after ?");
6405 m_evt_handler->begin_seq_key_block();
6407 _save_indentation();
6408 _line_progressed(1);
6409 _maybe_skipchars(
' ');
6411 goto seqblck_finish;
6420 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6421 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6425 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6426 if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6428 _c4dbgp(
"seqblck[RNXT]: at line begin");
6429 if(m_evt_handler->m_curr->indentation_ge())
6431 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6432 _line_progressed(m_evt_handler->m_curr->indref);
6433 if(!m_evt_handler->m_curr->line_contents.rem.len)
6436 else if(m_evt_handler->m_curr->indentation_lt())
6438 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
6439 _handle_indentation_pop_from_block_seq();
6442 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
6443 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6444 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6445 if(!m_evt_handler->m_curr->line_contents.rem.len)
6450 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
6451 goto seqblck_finish;
6454 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6456 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6457 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6458 if(!m_evt_handler->m_curr->line_contents.rem.len)
6464 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6465 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
6472 if(!m_evt_handler->m_curr->line_contents.rem.len)
6474 _c4dbgp(
"seqblck[RNXT]: again");
6482 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6483 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6484 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6487 if(m_evt_handler->m_curr->indref > 0
6488 || m_evt_handler->m_curr->line_contents.indentation > 0
6489 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6491 if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6493 _c4dbgp(
"seqblck[RNXT]: expect next val");
6495 m_evt_handler->add_sibling();
6496 _line_progressed(1);
6505 _c4dbgp(
"seqblck[RNXT]: start doc");
6506 _start_doc_suddenly();
6507 _line_progressed(3);
6508 _maybe_skip_whitespace_tokens();
6509 goto seqblck_finish;
6512 else if(first ==
':')
6518 if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags &
RMAP)))
6520 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6521 m_evt_handler->end_seq_block();
6522 goto seqblck_finish;
6529 else if(first ==
'.')
6531 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6532 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6534 _c4dbgp(
"seqblck[RNXT]: end doc");
6535 _end_doc_suddenly();
6536 _line_progressed(3);
6537 _maybe_skip_whitespace_tokens();
6538 _check_doc_end_tokens();
6539 goto seqblck_finish;
6550 _print_state_stack();
6552 if(m_evt_handler->m_parent
6553 && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent)
6554 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6556 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6557 _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6558 _handle_indentation_pop(m_evt_handler->m_parent);
6559 _RYML_ASSERT_PARSE_(this->callbacks(), has_all(
RMAP|
RBLCK), m_evt_handler->m_curr->pos);
6560 m_evt_handler->add_sibling();
6562 goto seqblck_finish;
6564 else if(first ==
'\t')
6566 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
'\t');
6569 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6578 _c4dbgt(
"seqblck: go again", 0);
6579 if(_finished_line())
6584 if(_finished_file())
6586 _c4dbgp(
"seqblck: finish!");
6588 goto seqblck_finish;
6595 _c4dbgp(
"seqblck: finish");
6601 template<
class EventHandler>
6602 void ParseEngine<EventHandler>::_handle_map_block()
6605 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6609 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
6610 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6611 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
6612 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
6614 _maybe_skip_comment();
6615 if(!m_evt_handler->m_curr->line_contents.rem.len)
6620 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6621 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6622 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6623 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6627 if(m_evt_handler->m_curr->at_line_beginning())
6629 if(m_evt_handler->m_curr->indentation_eq())
6631 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6632 _line_progressed(m_evt_handler->m_curr->indref);
6633 if(!m_evt_handler->m_curr->line_contents.rem.len)
6636 else if(m_evt_handler->m_curr->indentation_lt())
6638 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6639 _handle_indentation_pop_from_block_map();
6640 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6643 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6644 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY), m_evt_handler->m_curr->pos);
6645 if(!m_evt_handler->m_curr->line_contents.rem.len)
6650 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6651 goto mapblck_finish;
6656 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6657 _c4err(
"invalid indentation");
6663 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6664 const size_t startline = m_evt_handler->m_curr->pos.line;
6665 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6666 _c4dbgpf(
"mapblck[RKEY]: '{}'", _c4prc(first));
6670 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6671 sc = _scan_scalar_squot();
6672 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6673 _handle_annotations_before_blck_key_scalar();
6674 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6676 if(!_maybe_scan_following_colon())
6677 _c4err(
"could not find ':' colon after key");
6679 _maybe_skip_whitespace_tokens();
6681 else if(first ==
'"')
6683 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6684 sc = _scan_scalar_dquot();
6685 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6686 _handle_annotations_before_blck_key_scalar();
6687 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6689 if(!_maybe_scan_following_colon())
6690 _c4err(
"could not find ':' colon after key");
6692 _maybe_skip_whitespace_tokens();
6696 else if(C4_UNLIKELY(first ==
'|'))
6698 _c4err(
"block map: literal keys must be enclosed in '?'");
6700 else if(C4_UNLIKELY(first ==
'>'))
6702 _c4err(
"block map: folded keys must be enclosed in '?'");
6704 else if(_scan_scalar_plain_map_blck(&sc))
6706 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6707 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6708 _handle_annotations_before_blck_key_scalar();
6709 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6711 if(!_maybe_scan_following_colon())
6712 _c4err(
"could not find ':' colon after key");
6714 _maybe_skip_whitespace_tokens();
6716 else if(first ==
'?')
6718 _c4dbgp(
"mapblck[RKEY]: key token!");
6720 _line_progressed(1);
6721 _maybe_skipchars(
' ');
6722 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6724 _c4dbgp(
"mapblck[RKEY]: seqblck starts after ?");
6726 m_evt_handler->begin_seq_key_block();
6728 _save_indentation();
6729 _line_progressed(1);
6730 _maybe_skipchars(
' ');
6731 goto mapblck_finish;
6735 else if(first ==
':')
6737 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6738 _handle_annotations_before_blck_key_scalar();
6739 m_evt_handler->set_key_scalar_plain_empty();
6741 _line_progressed(1);
6743 _maybe_skip_whitespace_tokens();
6745 else if(first ==
'*')
6747 csubstr ref = _scan_ref_map();
6748 _c4dbgpf(
"mapblck[RKEY]: key ref! {}", _prs(ref));
6749 _handle_keyref(ref);
6751 if(!_maybe_scan_following_colon())
6752 _c4err(
"could not find ':' colon after key");
6754 _maybe_skip_whitespace_tokens();
6756 else if(first ==
'&')
6758 csubstr anchor = _scan_anchor();
6759 _c4dbgpf(
"mapblck[RKEY]: key anchor! {}", _prs(anchor));
6760 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6762 else if(first ==
'!')
6764 csubstr tag = _scan_tag();
6765 _c4dbgpf(
"mapblck[RKEY]: key tag! {}", _prs(tag));
6766 _add_annotation(&m_pending_tags, tag, startindent, startline);
6768 else if(first ==
'[')
6773 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6774 _handle_annotations_before_blck_key_scalar();
6775 m_evt_handler->begin_seq_key_flow();
6777 _line_progressed(1);
6778 _set_indentation(startindent);
6779 goto mapblck_finish;
6781 else if(first ==
'{')
6786 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6787 _handle_annotations_before_blck_key_scalar();
6788 m_evt_handler->begin_map_key_flow();
6790 _line_progressed(1);
6791 _set_indentation(startindent);
6792 goto mapblck_finish;
6794 else if(first ==
'-')
6796 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6797 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6799 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6800 _start_doc_suddenly();
6801 _line_progressed(3);
6802 _maybe_skip_whitespace_tokens();
6803 goto mapblck_finish;
6810 else if(first ==
'.')
6812 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6813 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6815 _c4dbgp(
"mapblck[RKEY]: end doc");
6816 _end_doc_suddenly();
6817 _line_progressed(3);
6818 _maybe_skip_whitespace_tokens();
6819 _check_doc_end_tokens();
6820 goto mapblck_finish;
6832 else if(has_any(
RVAL))
6834 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6835 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6836 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6837 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6841 if(m_evt_handler->m_curr->at_line_beginning())
6843 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6844 m_evt_handler->m_curr->more_indented =
false;
6845 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6846 if(m_evt_handler->m_curr->indentation_eq_extra())
6848 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6849 _line_progressed(m_evt_handler->m_curr->indref + 1);
6850 if(!m_evt_handler->m_curr->line_contents.rem.len)
6853 else if(m_evt_handler->m_curr->indentation_gt_extra())
6855 _c4dbgp(
"mapblck[RVAL]: more indented!");
6856 m_evt_handler->m_curr->more_indented =
true;
6857 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6858 if(!m_evt_handler->m_curr->line_contents.rem.len)
6861 else if(m_evt_handler->m_curr->indentation_lt_extra())
6863 if(m_evt_handler->m_curr->indentation_eq())
6865 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6867 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6869 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6870 _handle_annotations_before_blck_val_scalar();
6871 m_evt_handler->set_val_scalar_plain_empty();
6878 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RKEY!");
6879 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6880 _handle_indentation_pop_from_block_map();
6883 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6884 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6887 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6888 m_evt_handler->add_sibling();
6895 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6896 goto mapblck_finish;
6901 const size_t startcol = _handle_block_skip_leading_whitespace();
6902 if(startcol ==
npos)
6904 _c4dbgp(
"mapblck[RVAL]: whitespace only");
6907 const size_t tabmark = _handle_block_get_whitespace_mark();
6911 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6912 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6913 const size_t startline = m_evt_handler->m_curr->pos.line;
6914 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6915 _c4dbgpf(
"mapblck[RVAL]: '{}'", _c4prc(first));
6919 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6920 sc = _scan_scalar_squot();
6921 if(!_maybe_scan_following_colon())
6923 _c4dbgp(
"mapblck[RVAL]: set as val");
6924 _handle_annotations_before_blck_val_scalar();
6925 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6926 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6932 _c4assert(startindent > m_evt_handler->m_curr->indref);
6933 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6934 _handle_block_check_leading_tabs(startcol);
6935 _handle_annotations_before_start_mapblck(startline);
6938 m_evt_handler->begin_map_val_block();
6939 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6940 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6941 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6942 _maybe_skip_whitespace_tokens();
6947 else if(first ==
'"')
6949 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6950 sc = _scan_scalar_dquot();
6951 if(!_maybe_scan_following_colon())
6953 _c4dbgp(
"mapblck[RVAL]: set as val");
6954 _handle_annotations_before_blck_val_scalar();
6955 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6956 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6962 _c4assert(startindent > m_evt_handler->m_curr->indref);
6963 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6964 _handle_block_check_leading_tabs(startcol);
6965 _handle_annotations_before_start_mapblck(startline);
6968 m_evt_handler->begin_map_val_block();
6969 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6970 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6971 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6972 _maybe_skip_whitespace_tokens();
6979 else if(first ==
'|')
6981 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6983 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6984 _handle_annotations_before_blck_val_scalar();
6985 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6986 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6989 else if(first ==
'>')
6991 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6993 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6994 _handle_annotations_before_blck_val_scalar();
6995 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6996 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6999 else if(_scan_scalar_plain_map_blck(&sc))
7001 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
7002 if(!_maybe_scan_following_colon())
7004 _c4dbgp(
"mapblck[RVAL]: set as val");
7005 _handle_annotations_before_blck_val_scalar();
7006 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
7007 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7013 _c4assert(startindent > m_evt_handler->m_curr->indref);
7014 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7015 _handle_block_check_leading_tabs(startcol, tabmark);
7017 _handle_annotations_before_start_mapblck(startline);
7019 m_evt_handler->begin_map_val_block();
7020 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7021 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7022 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7023 _maybe_skip_whitespace_tokens();
7028 else if(first ==
'-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7030 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7032 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
7033 _handle_block_check_leading_tabs(startcol);
7035 _handle_annotations_before_blck_val_scalar();
7036 m_evt_handler->begin_seq_val_block();
7038 _set_indentation(startindent);
7039 _line_progressed(1);
7040 _maybe_skip_whitespace_tokens();
7041 goto mapblck_finish;
7043 else if(first ==
'[')
7045 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
7047 _handle_annotations_before_blck_val_scalar();
7048 m_evt_handler->begin_seq_val_flow();
7050 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7051 _line_progressed(1);
7052 goto mapblck_finish;
7054 else if(first ==
'{')
7056 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
7058 _handle_annotations_before_blck_val_scalar();
7059 m_evt_handler->begin_map_val_flow();
7061 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7062 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7063 _line_progressed(1);
7064 goto mapblck_finish;
7066 else if(first ==
'*')
7068 csubstr ref = _scan_ref_map();
7069 _c4dbgpf(
"mapblck[RVAL]: ref! {}", _prs(ref));
7070 if(_maybe_scan_following_colon())
7072 _c4dbgp(
"mapblck[RVAL]: start child map, block");
7074 _handle_annotations_before_blck_val_scalar();
7075 m_evt_handler->begin_map_val_block();
7076 _handle_keyref(ref);
7077 _set_indentation(startindent);
7083 _c4dbgp(
"mapblck[RVAL]: was val ref");
7084 _handle_valref(ref);
7087 _maybe_skip_whitespace_tokens();
7089 else if(first ==
'&')
7091 csubstr anchor = _scan_anchor();
7092 _c4dbgpf(
"mapblck[RVAL]: anchor! {}", _prs(anchor));
7095 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7097 else if(first ==
'!')
7099 csubstr tag = _scan_tag();
7100 _c4dbgpf(
"mapblck[RVAL]: tag! {}", _prs(tag));
7103 _add_annotation(&m_pending_tags, tag, startindent, startline);
7105 else if(first ==
'?')
7107 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7109 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7111 _handle_annotations_before_blck_val_scalar();
7112 m_evt_handler->begin_map_val_block();
7114 _set_indentation(startindent);
7115 _line_progressed(1);
7116 _maybe_skipchars(
' ');
7117 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7119 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7121 m_evt_handler->begin_seq_key_block();
7123 _save_indentation();
7124 _line_progressed(1);
7125 _maybe_skipchars(
' ');
7126 goto mapblck_finish;
7130 else if(first ==
':')
7132 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7134 _handle_annotations_before_start_mapblck(startline);
7136 m_evt_handler->begin_map_val_block();
7137 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7138 m_evt_handler->set_key_scalar_plain_empty();
7141 _line_progressed(1);
7142 _maybe_skip_whitespace_tokens();
7150 else if(has_any(
RNXT))
7152 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7154 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7159 if(m_evt_handler->m_curr->at_line_beginning())
7161 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7162 if(m_evt_handler->m_curr->indentation_eq())
7164 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7165 _line_progressed(m_evt_handler->m_curr->indref);
7166 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7167 m_evt_handler->add_sibling();
7171 else if(m_evt_handler->m_curr->indentation_lt())
7173 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
7174 _handle_indentation_pop_from_block_map();
7177 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7180 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7181 m_evt_handler->add_sibling();
7188 goto mapblck_finish;
7194 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
7195 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
7202 if(!m_evt_handler->m_curr->line_contents.rem.len)
7204 _c4dbgp(
"seqblck[RNXT]: again");
7212 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7213 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7214 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
7217 _c4dbgp(
"mapblck[RNXT]: skip spaces");
7218 _maybe_skip_whitespace_tokens();
7225 else if(has_any(
QMRK))
7227 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7228 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7229 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7230 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7231 if(_handle_map_block_qmrk())
7234 goto mapblck_finish;
7236 else if(has_any(
RKCL))
7238 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7239 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7240 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7241 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7242 if(_handle_map_block_rkcl())
7245 goto mapblck_finish;
7249 _c4dbgt(
"mapblck: again", 0);
7250 if(_finished_line())
7254 if(_finished_file())
7256 _c4dbgp(
"mapblck: file finished!");
7258 goto mapblck_finish;
7265 _c4dbgp(
"mapblck: finish");
7272 template<
class EventHandler>
7273 bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7278 if(m_evt_handler->m_curr->at_line_beginning())
7280 _c4dbgpf(
"mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7281 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos, m_evt_handler->m_curr->pos);
7282 if(m_evt_handler->m_curr->indentation_eq_extra())
7284 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7285 _line_progressed(m_evt_handler->m_curr->indref + 1);
7286 if(!m_evt_handler->m_curr->line_contents.rem.len)
7290 else if(m_evt_handler->m_curr->indentation_gt_extra())
7292 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7293 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7294 if(!m_evt_handler->m_curr->line_contents.rem.len)
7299 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7300 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7301 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7302 if(m_evt_handler->m_curr->indentation_eq()
7304 && m_evt_handler->m_curr->line_contents.rem.str[0] !=
'-')
7306 _c4dbgp(
"mapblck[QMRK]: QMRK finished!");
7307 _handle_annotations_before_blck_key_scalar();
7308 m_evt_handler->set_key_scalar_plain_empty();
7312 else if(m_evt_handler->m_curr->indentation_lt())
7314 _c4dbgp(
"mapblck[QMRK]: indentation pop!");
7315 _handle_indentation_pop_from_block_map();
7316 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7319 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7324 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7333 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7334 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7335 const size_t startline = m_evt_handler->m_curr->pos.line;
7336 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7337 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7341 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7342 sc = _scan_scalar_squot();
7343 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7345 if(!_maybe_scan_following_colon())
7347 _c4dbgp(
"mapblck[QMRK]: set as key");
7348 _handle_annotations_before_blck_key_scalar();
7349 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7353 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7354 _handle_annotations_before_start_mapblck_as_key();
7355 m_evt_handler->begin_map_key_block();
7356 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7357 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7358 _maybe_skip_whitespace_tokens();
7359 _set_indentation(startindent);
7364 else if(first ==
'"')
7366 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7367 sc = _scan_scalar_dquot();
7368 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7370 if(!_maybe_scan_following_colon())
7372 _c4dbgp(
"mapblck[QMRK]: set as key");
7373 _handle_annotations_before_blck_key_scalar();
7374 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7378 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7379 _handle_annotations_before_start_mapblck_as_key();
7380 m_evt_handler->begin_map_key_block();
7381 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7382 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7383 _maybe_skip_whitespace_tokens();
7384 _set_indentation(startindent);
7389 else if(first ==
'|')
7391 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7393 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7394 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7395 _handle_annotations_before_blck_key_scalar();
7396 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7399 else if(first ==
'>')
7401 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7403 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7404 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7405 _handle_annotations_before_blck_key_scalar();
7406 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7409 else if(_scan_scalar_plain_map_blck(&sc))
7411 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7412 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7414 if(!_maybe_scan_following_colon())
7416 _c4dbgp(
"mapblck[QMRK]: set as key");
7417 _handle_annotations_before_blck_key_scalar();
7418 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7422 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7423 _handle_annotations_before_start_mapblck_as_key();
7424 m_evt_handler->begin_map_key_block();
7425 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7426 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7427 _maybe_skip_whitespace_tokens();
7428 _set_indentation(startindent);
7433 else if(first ==
':')
7435 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7437 _handle_annotations_before_start_mapblck_as_key();
7438 m_evt_handler->begin_map_key_block();
7439 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7440 m_evt_handler->set_key_scalar_plain_empty();
7441 _line_progressed(1);
7442 _maybe_skip_whitespace_tokens();
7443 _set_indentation(startindent);
7447 else if(first ==
'*')
7449 csubstr ref = _scan_ref_map();
7450 _c4dbgpf(
"mapblck[QMRK]: key ref! {}", _prs(ref));
7452 if(!_maybe_scan_following_colon())
7454 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7455 _handle_keyref(ref);
7459 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7460 _handle_annotations_before_start_mapblck_as_key();
7461 m_evt_handler->begin_map_key_block();
7462 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7463 _handle_keyref(ref);
7464 _set_indentation(startindent);
7468 _maybe_skip_whitespace_tokens();
7470 else if(first ==
'&')
7472 csubstr anchor = _scan_anchor();
7473 _c4dbgpf(
"mapblck[QMRK]: key anchor! {}", _prs(anchor));
7474 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7476 else if(first ==
'!')
7478 csubstr tag = _scan_tag();
7479 _c4dbgpf(
"mapblck[QMRK]: key tag! {}", _prs(tag));
7480 _add_annotation(&m_pending_tags, tag, startindent, startline);
7482 else if(first ==
'-')
7484 _c4dbgp(
"mapblck[QMRK]: maybe seq or doc?");
7485 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7487 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7489 _handle_annotations_before_blck_key_scalar();
7490 m_evt_handler->begin_seq_key_block();
7492 _set_indentation(startindent);
7493 _line_progressed(1);
7497 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7498 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7499 _start_doc_suddenly();
7500 _line_progressed(3);
7502 _maybe_skip_whitespace_tokens();
7505 else if(first ==
'[')
7507 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7509 _handle_annotations_before_blck_key_scalar();
7510 m_evt_handler->begin_seq_key_flow();
7512 _set_indentation(m_evt_handler->m_parent->indref + 1);
7513 _line_progressed(1);
7516 else if(first ==
'{')
7518 _c4dbgp(
"mapblck[QMRK]: start child mapflow (!)");
7520 _handle_annotations_before_blck_key_scalar();
7521 m_evt_handler->begin_map_key_flow();
7523 _set_indentation(m_evt_handler->m_parent->indref + 1);
7524 _line_progressed(1);
7527 else if(first ==
'?')
7529 _c4dbgpf(
"mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7530 _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7531 _c4dbgp(
"mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7533 _handle_annotations_before_blck_key_scalar();
7534 m_evt_handler->begin_map_key_block();
7536 _set_indentation(startindent);
7538 _line_progressed(1);
7539 _maybe_skipchars(
' ');
7540 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7542 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7544 m_evt_handler->begin_seq_key_block();
7546 _save_indentation();
7547 _line_progressed(1);
7548 _maybe_skipchars(
' ');
7563 template<
class EventHandler>
7564 bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7569 if(m_evt_handler->m_curr->at_line_beginning())
7571 if(m_evt_handler->m_curr->indentation_eq())
7573 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7574 _line_progressed(m_evt_handler->m_curr->indref);
7575 if(!m_evt_handler->m_curr->line_contents.rem.len)
7578 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7580 _c4err(
"invalid indentation");
7583 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7584 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
7587 _c4dbgp(
"mapblck[RKCL]: found the colon");
7588 _line_progressed(1);
7589 _maybe_skipchars(
' ');
7590 #if defined(__GNUC__) && (__GNUC__ >= 12) \
7591 && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))
7592 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7595 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7602 _c4dbgp(
"mapblck[RKCL]: start val seqblck");
7604 m_evt_handler->begin_seq_val_block();
7606 _save_indentation();
7607 _line_progressed(1);
7608 _maybe_skipchars(
' ');
7612 else if(first ==
'?')
7614 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
7615 m_evt_handler->set_val_scalar_plain_empty();
7616 m_evt_handler->add_sibling();
7618 _line_progressed(1);
7619 _maybe_skipchars(
' ');
7620 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7622 _c4dbgp(
"mapblck[RKCL]: seqblck starts after ?");
7624 m_evt_handler->begin_seq_key_block();
7626 _save_indentation();
7627 _line_progressed(1);
7628 _maybe_skipchars(
' ');
7632 else if(first ==
'-')
7634 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7636 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7637 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7638 _start_doc_suddenly();
7639 _line_progressed(3);
7640 _maybe_skip_whitespace_tokens();
7648 else if(first ==
'.')
7650 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
7651 csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7652 if(rs ==
".." || rs.begins_with(
".. "))
7654 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7655 _end_doc_suddenly();
7656 _line_progressed(3);
7657 _maybe_skip_whitespace_tokens();
7658 _check_doc_end_tokens();
7668 _c4dbgp(
"mapblck[RKCL]: missing :");
7669 if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7671 m_evt_handler->set_val_scalar_plain_empty();
7672 m_evt_handler->add_sibling();
7681 template<
class EventHandler>
7682 void ParseEngine<EventHandler>::_handle_unk_json()
7684 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7686 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7687 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7689 _maybe_skip_comment();
7690 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7694 size_t pos = rem.first_not_of(
" \t");
7697 pos = pos !=
npos ? pos : rem.len;
7698 _c4dbgpf(
"skipping indentation of {}", pos);
7699 _line_progressed(pos);
7700 rem = m_evt_handler->m_curr->line_contents.rem;
7703 _c4dbgpf(
"rem is now {}", _prs(rem));
7706 if(rem.begins_with(
'['))
7708 _c4dbgp(
"it's a seq");
7709 _check_trailing_doc_token();
7711 m_evt_handler->begin_seq_val_flow();
7713 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7714 m_doc_empty =
false;
7715 _line_progressed(1);
7717 else if(rem.begins_with(
'{'))
7719 _c4dbgp(
"it's a map");
7720 _check_trailing_doc_token();
7722 m_evt_handler->begin_map_val_flow();
7724 m_doc_empty =
false;
7725 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7726 _line_progressed(1);
7728 else if(_handle_bom())
7730 _c4dbgp(
"byte order mark");
7734 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
7735 _maybe_skip_whitespace_tokens();
7736 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7739 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7740 const char first = s.str[0];
7744 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7745 _check_trailing_doc_token();
7748 m_doc_empty =
false;
7749 sc = _scan_scalar_dquot();
7750 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7751 if(!_maybe_scan_following_colon())
7753 _c4dbgp(
"runk_json: set as val");
7754 _handle_annotations_before_blck_val_scalar();
7755 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7762 else if(_scan_scalar_plain_unk(&sc))
7764 _c4dbgp(
"runk_json: got a plain scalar");
7765 _check_trailing_doc_token();
7768 m_doc_empty =
false;
7769 if(!_maybe_scan_following_colon())
7771 _c4dbgp(
"runk_json: set as val");
7772 _handle_annotations_before_blck_val_scalar();
7773 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7774 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7791 template<
class EventHandler>
7792 void ParseEngine<EventHandler>::_handle_unk()
7794 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7796 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7797 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7799 _maybe_skipchars(
' ');
7800 _maybe_skip_comment();
7802 if(!m_evt_handler->m_curr->line_contents.rem.len)
7805 _c4dbgpf(
"runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7807 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7809 _c4dbgpf(
"runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7810 _c4dbgp(
"runk: check BOM");
7813 m_bom_line = m_evt_handler->m_curr->pos.line;
7814 _c4dbgpf(
"runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7817 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7818 _c4dbgpf(
"runk: rtop: first={}", _c4prc(first));
7821 _c4dbgp(
"runk: rtop: suspecting doc");
7822 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7824 _c4dbgp(
"runk: rtop: begin doc");
7827 _set_indentation(0);
7829 _line_progressed(3u);
7830 _maybe_skip_whitespace_tokens();
7834 else if(first ==
'.')
7836 _c4dbgp(
"runk: rtop: suspecting doc end");
7837 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7839 _c4dbgp(
"runk: rtop: end doc");
7846 _c4dbgp(
"runk: rtop: ignore end doc");
7849 _line_progressed(3u);
7850 _maybe_skip_whitespace_tokens();
7851 _check_doc_end_tokens();
7855 else if(first ==
'%')
7857 _c4dbgpf(
"directive: {}", m_evt_handler->m_curr->line_contents.rem);
7858 if(C4_UNLIKELY(has_any(
RDOC) || (!m_doc_empty && has_none(
NDOC))))
7859 _c4err(
"need document footer before directives");
7860 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7867 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7868 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7871 _c4dbgpf(
"runk: prev BOMlen={}", m_bom_len);
7872 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7874 _c4dbgpf(
"runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7875 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7876 remindent -= m_bom_len;
7884 size_t startcol = _handle_block_skip_leading_whitespace();
7885 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7889 _c4dbgp(
"runk: flow seq?");
7890 _handle_unk_begin_doc();
7891 if(C4_LIKELY( ! _annotations_require_key_container()))
7893 _c4dbgp(
"runk: it's a seq, flow");
7894 _handle_annotations_before_blck_val_scalar();
7895 m_evt_handler->begin_seq_val_flow();
7897 _set_indentation(0);
7901 _c4dbgp(
"runk: start new block map, set flow seq as key (!)");
7902 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7903 m_evt_handler->begin_map_val_block();
7905 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7906 m_evt_handler->begin_seq_key_flow();
7908 _set_indentation(0);
7910 _line_progressed(1);
7912 else if(first ==
'{')
7914 _c4dbgp(
"runk: flow map?");
7915 _handle_unk_begin_doc();
7916 if(C4_LIKELY( ! _annotations_require_key_container()))
7918 _c4dbgp(
"runk: it's a map, flow");
7919 _handle_annotations_before_blck_val_scalar();
7920 m_evt_handler->begin_map_val_flow();
7922 _set_indentation(0);
7926 _c4dbgp(
"runk: start new block map, set flow map as key (!)");
7927 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7928 m_evt_handler->begin_map_val_block();
7930 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7931 m_evt_handler->begin_map_key_flow();
7933 _set_indentation(0);
7935 _line_progressed(1);
7937 else if(first ==
'-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7939 _c4dbgp(
"runk: it's a seq, block");
7940 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7941 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7942 _handle_unk_begin_doc();
7943 _handle_annotations_before_blck_val_scalar();
7944 m_evt_handler->begin_seq_val_block();
7946 _set_indentation(startindent);
7947 _line_progressed(1);
7948 _maybe_skipchars(
' ');
7950 else if(first ==
'?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7952 _c4dbgp(
"runk: it's a map + this key is complex");
7953 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7954 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7955 _handle_block_check_leading_tabs(startcol);
7956 _handle_unk_begin_doc();
7957 _handle_annotations_before_blck_val_scalar();
7958 m_evt_handler->begin_map_val_block();
7960 _set_indentation(startindent);
7961 _line_progressed(1);
7962 _maybe_skipchars(
' ');
7963 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7965 _c4dbgp(
"runk: seqblck key starts after ?");
7967 m_evt_handler->begin_seq_key_block();
7969 _save_indentation();
7970 _line_progressed(1);
7971 _maybe_skipchars(
' ');
7974 else if(first ==
':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7976 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7978 _c4dbgp(
"runk: it's a map with an empty key");
7979 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7980 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7981 _handle_block_check_leading_tabs(startcol);
7982 const size_t startline = m_evt_handler->m_curr->pos.line;
7983 _handle_unk_begin_doc();
7984 _handle_annotations_before_start_mapblck(startline);
7986 m_evt_handler->begin_map_val_block();
7987 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7988 m_evt_handler->set_key_scalar_plain_empty();
7989 _set_indentation(startindent);
7993 _c4err(
"block colon cannot occur on a new line unless ? is used");
7996 _line_progressed(1);
7997 _maybe_skip_whitespace_tokens();
7999 else if(first ==
'&')
8001 csubstr anchor = _scan_anchor();
8002 _c4dbgpf(
"anchor! {}", _prs(anchor));
8003 const size_t line = m_evt_handler->m_curr->pos.line;
8004 _handle_unk_begin_doc();
8005 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8006 _set_indentation(0);
8008 else if(first ==
'*')
8010 csubstr ref = _scan_ref_map();
8011 _c4dbgpf(
"runk: ref! {}", _prs(ref));
8012 _handle_unk_begin_doc();
8013 if(!_maybe_scan_following_colon())
8015 _c4dbgp(
"runk: set val ref");
8016 _handle_valref(ref);
8020 _c4dbgp(
"runk: start new block map, set ref as key");
8021 _handle_block_check_leading_tabs(startcol);
8022 const size_t startline = m_evt_handler->m_curr->pos.line;
8023 _handle_annotations_before_start_mapblck(startline);
8024 m_evt_handler->begin_map_val_block();
8025 _handle_keyref(ref);
8026 _maybe_skip_whitespace_tokens();
8027 _set_indentation(0);
8031 else if(first ==
'!')
8034 csubstr tag = _scan_tag(&tag_orig);
8035 _c4dbgpf(
"runk: val tag! {}", _prs(tag));
8038 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8039 const size_t line = m_evt_handler->m_curr->pos.line;
8040 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8044 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8045 const size_t startscalar = _handle_block_get_whitespace_mark();
8046 const size_t startline = m_evt_handler->m_curr->pos.line;
8047 auto beginmap = [&](
size_t startindent_){
8048 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8049 _c4err(
"multiline scalars cannot be used as implicit keys");
8050 _handle_block_check_leading_tabs(startcol, startscalar);
8051 _handle_annotations_before_start_mapblck(startline);
8053 m_evt_handler->begin_map_val_block();
8054 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8056 auto after_beginmap = [&](
size_t startindent_){
8057 _maybe_skip_whitespace_tokens();
8058 _set_indentation(startindent_);
8063 _c4dbgp(
"runk: block-literal scalar");
8064 _handle_unk_begin_doc();
8066 _scan_block(&sb, startindent);
8067 _handle_annotations_before_blck_val_scalar();
8068 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8069 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8071 else if(first ==
'>')
8073 _c4dbgp(
"runk: block-folded scalar");
8074 _handle_unk_begin_doc();
8076 _scan_block(&sb, startindent);
8077 _handle_annotations_before_blck_val_scalar();
8078 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8079 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8081 else if(first ==
'\'')
8083 _c4dbgp(
"runk: single-quoted scalar");
8084 _handle_unk_begin_doc();
8085 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8086 size_t col = m_evt_handler->m_curr->pos.col;
8087 ScannedScalar sc = _scan_scalar_squot();
8088 if(!_maybe_scan_following_colon())
8090 _c4dbgp(
"runk: set as val");
8091 _handle_annotations_before_blck_val_scalar();
8092 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8093 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8097 _c4dbgp(
"runk: start new block map, set single-quoted scalar as key");
8099 startindent = _handle_unk_check_left_tokens(startindent, col);
8100 beginmap(startindent);
8101 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8102 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8103 after_beginmap(startindent);
8106 else if(first ==
'"')
8108 _c4dbgp(
"runk: double-quoted scalar");
8109 _handle_unk_begin_doc();
8110 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8111 size_t col = m_evt_handler->m_curr->pos.col;
8112 ScannedScalar sc = _scan_scalar_dquot();
8113 if(!_maybe_scan_following_colon())
8115 _c4dbgp(
"runk: set as val");
8116 _handle_annotations_before_blck_val_scalar();
8117 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8118 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8122 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
8124 startindent = _handle_unk_check_left_tokens(startindent, col);
8125 beginmap(startindent);
8126 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8127 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8128 after_beginmap(startindent);
8133 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8134 size_t col = m_evt_handler->m_curr->pos.col;
8136 if(_scan_scalar_plain_unk(&sc))
8138 _c4dbgp(
"runk: plain scalar");
8139 _handle_unk_begin_doc();
8140 if(!_maybe_scan_following_colon())
8142 _c4dbgp(
"runk: set as val");
8143 _handle_annotations_before_blck_val_scalar();
8144 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8145 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8149 _c4dbgp(
"runk: start new block map, set plain scalar as key");
8151 startindent = _handle_unk_check_left_tokens(startindent, col);
8152 beginmap(startindent);
8153 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8154 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8155 after_beginmap(startindent);
8166 template<
class EventHandler>
8167 void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8169 _c4dbgp(
"runk: begin doc");
8170 _check_trailing_doc_token();
8173 m_doc_empty =
false;
8176 template<
class EventHandler>
8177 size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(
size_t realindent,
size_t col,
bool skip_annotations)
8182 csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8184 _c4dbgpf(
"runk: check left tokens: s={}", _prs(s,
true));
8185 if(skip_annotations)
8187 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8188 _c4dbgpf(
"runk: skip annotations: realindent={} pos={}", realindent, pos);
8190 size_t firstns = s.first_not_of(
' ', pos);
8193 _c4dbgpf(
"runk: check left tokens:\n"
8194 " tokens={} skipped={}\n"
8195 " bomlen={} first={} col={}\n"
8196 " (bomlen+first)={} vs {}=col\n"
8197 " startindent={} lineindent={}"
8198 , _prs(s,
true), _prs(s.sub(firstns),
true)
8199 , m_bom_len, firstns, col
8200 , m_bom_len+firstns, col,
8201 realindent, m_evt_handler->m_curr->line_contents.indentation);
8202 if(m_bom_len + firstns != col)
8204 if(!skip_annotations)
8205 realindent = firstns;
8206 _c4dbgpf(
"runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8212 template<
class EventHandler>
8213 void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s,
size_t *indent,
size_t *first_non_token_pos)
8215 csubstr first, second;
8216 uint32_t total = _get_annotations_same_line(s, &first, &second);
8217 _c4dbgpf(
"runk: before skip: {}", _prs(s,
true));
8218 size_t pos = s.first_not_of(
" \t");
8223 *indent = *first_non_token_pos = pos;
8226 _c4assert(!s.sub(pos).begins_with_any(
" \t"));
8227 _c4dbgpf(
"runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos),
true));
8228 _c4dbgpf(
"runk: first annotation: {}", first);
8232 _c4assert(s.sub(pos).begins_with(first));
8235 _c4dbgpf(
"runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos),
true));
8238 _c4dbgpf(
"runk: second annotation: {}", second);
8243 csubstr spos = s.sub(pos);
8244 size_t more = spos.first_not_of(
" \t");
8246 _c4dbgpf(
"runk: next nonspace: {}", pos + more);
8248 _c4dbgpf(
"runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos),
true));
8249 _c4assert(s.sub(pos).begins_with(second));
8251 _c4dbgpf(
"runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos),
true));
8253 *first_non_token_pos = pos;
8257 template<
class EventHandler>
8258 uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_)
const
8260 _c4assert(!m_evt_handler->m_curr->at_first_token());
8262 using EntryPtr =
typename Annotation::Entry
const* C4_RESTRICT;
8263 EntryPtr first =
nullptr;
8264 EntryPtr second =
nullptr;
8265 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8268 _c4dbgpf(
"there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8269 auto valid_if_same_line = [
this](EntryPtr entry){
8270 _c4dbgpf(
"pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8271 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry :
nullptr;
8275 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8276 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8277 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8278 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8279 _c4dbgpf(
"{} annotations on same line", total);
8284 auto get_first_on_same_line = [
this](EntryPtr not_this_one){
8285 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8286 if(&m_pending_anchors.annotations[i] != not_this_one
8287 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8288 return &m_pending_anchors.annotations[i];
8289 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8290 if(&m_pending_tags.annotations[i] != not_this_one
8291 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8292 return &m_pending_tags.annotations[i];
8293 return (EntryPtr)
nullptr;
8297 first = get_first_on_same_line(
nullptr);
8299 _c4dbgpf(
"first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8304 second = get_first_on_same_line(first);
8306 _c4dbgpf(
"second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8308 auto extract_string = [&](EntryPtr e){
8310 if(!e->str.str || e->str.begins_with_any(
"!<"))
8312 csubstr tag = e->orig;
8316 _c4dbgpf(
"tag: {} -> {}", _maybe_null_str(e->str), tag);
8319 csubstr anchor = e->str;
8324 _c4assert(anchor.str - token_soup.str > 0);
8329 _c4dbgpf(
"anchor: {} -> {}", e->str, anchor);
8332 *first_ = first ? extract_string(first) : nullptr;
8333 *second_ = second ? extract_string(second) : nullptr;
8334 if(total > 1 && (first_->str > second_->str))
8336 csubstr tmp = *first_;
8339 _c4dbgpf(
"swap first and second: {} -> {}", *first_, *second_);
8348 template<
class EventHandler>
8349 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
8351 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8353 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW), m_evt_handler->m_curr->pos);
8355 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8358 _c4dbgp(
"usty[RNXT]: finishing!");
8363 _maybe_skip_comment();
8364 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8368 size_t pos = rem.first_not_of(
" \t");
8371 pos = pos !=
npos ? pos : rem.len;
8372 _c4dbgpf(
"skipping indentation of {}", pos);
8373 _line_progressed(pos);
8374 rem = m_evt_handler->m_curr->line_contents.rem;
8377 _c4dbgpf(
"rem is now {}", _prs(rem));
8380 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8381 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
8382 char first = rem.str[0];
8385 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP), m_evt_handler->m_curr->pos);
8386 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
8389 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
8391 m_evt_handler->_push();
8393 _set_indentation(startindent);
8394 _line_progressed(1);
8395 _maybe_skip_whitespace_tokens();
8397 else if(first ==
'-' && _is_blck_token(rem))
8399 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
8401 m_evt_handler->_push();
8403 _set_indentation(startindent);
8404 _line_progressed(1);
8405 _maybe_skip_whitespace_tokens();
8409 _c4err(
"can only parse a seq into an existing seq");
8412 else if(has_any(
RMAP))
8414 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8415 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
8418 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
8420 _handle_annotations_before_blck_val_scalar();
8421 m_evt_handler->_push();
8423 _set_indentation(startindent);
8424 _line_progressed(1);
8425 _maybe_skip_whitespace_tokens();
8427 else if(first ==
'?' && _is_blck_token(rem))
8429 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
8431 _handle_annotations_before_blck_val_scalar();
8432 m_evt_handler->_push();
8434 _save_indentation();
8435 _line_progressed(1);
8436 _maybe_skip_whitespace_tokens();
8438 else if(first ==
':' && _is_blck_token(rem))
8440 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
8442 _handle_annotations_before_blck_val_scalar();
8443 m_evt_handler->_push();
8444 m_evt_handler->set_key_scalar_plain_empty();
8446 _save_indentation();
8447 _line_progressed(1);
8448 _maybe_skip_whitespace_tokens();
8450 else if(rem.begins_with(
'&'))
8452 csubstr anchor = _scan_anchor();
8453 _c4dbgpf(
"usty[RMAP]: anchor! {}", _prs(anchor));
8454 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8455 const size_t line = m_evt_handler->m_curr->pos.line;
8456 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8457 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8459 else if(first ==
'*')
8461 csubstr ref = _scan_ref_map();
8462 _c4dbgpf(
"usty[RMAP]: ref! {}", _prs(ref));
8463 if(!_maybe_scan_following_colon())
8465 _c4err(
"cannot read a VAL to a map");
8469 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
8470 const size_t startline = m_evt_handler->m_curr->pos.line;
8472 _handle_annotations_before_start_mapblck(startline);
8473 m_evt_handler->_push();
8474 _handle_keyref(ref);
8475 _maybe_skip_whitespace_tokens();
8476 _set_indentation(startindent);
8480 else if(first ==
'!')
8482 csubstr tag = _scan_tag();
8483 _c4dbgpf(
"usty[RMAP]: val tag! {}", _prs(tag));
8486 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8487 const size_t line = m_evt_handler->m_curr->pos.line;
8488 _add_annotation(&m_pending_tags, tag, indentation, line);
8490 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
8492 _c4err(
"cannot parse a seq into an existing map");
8496 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8497 startindent = m_evt_handler->m_curr->line_contents.indentation;
8498 const size_t startline = m_evt_handler->m_curr->pos.line;
8500 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8503 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
8504 sc = _scan_scalar_squot();
8505 if(!_maybe_scan_following_colon())
8507 _c4err(
"cannot read a VAL to a map");
8511 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8513 _handle_annotations_before_start_mapblck(startline);
8514 m_evt_handler->_push();
8515 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8516 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8517 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8518 _set_indentation(startindent);
8520 _maybe_skip_whitespace_tokens();
8523 else if(first ==
'"')
8525 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
8526 sc = _scan_scalar_dquot();
8527 if(!_maybe_scan_following_colon())
8529 _c4err(
"cannot read a VAL to a map");
8533 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
8535 _handle_annotations_before_start_mapblck(startline);
8536 m_evt_handler->_push();
8537 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8538 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8539 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8540 _set_indentation(startindent);
8542 _maybe_skip_whitespace_tokens();
8545 else if(first ==
'|')
8547 _c4err(
"block literal keys must be enclosed in '?'");
8549 else if(first ==
'>')
8551 _c4err(
"block literal keys must be enclosed in '?'");
8553 else if(_scan_scalar_plain_unk(&sc))
8555 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8556 if(!_maybe_scan_following_colon())
8558 _c4err(
"cannot read a VAL to a map");
8562 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8564 _handle_annotations_before_start_mapblck(startline);
8565 m_evt_handler->_push();
8566 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8567 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8568 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8569 _set_indentation(startindent);
8571 _maybe_skip_whitespace_tokens();
8582 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8583 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8586 _c4dbgp(
"usty[UNK]: it's a flow seq");
8588 _handle_annotations_before_blck_val_scalar();
8589 m_evt_handler->begin_seq_val_flow();
8591 _set_indentation(startindent);
8592 _line_progressed(1);
8593 _maybe_skip_whitespace_tokens();
8595 else if(first ==
'-' && _is_blck_token(rem))
8597 _c4dbgp(
"usty[UNK]: it's a block seq");
8599 _handle_annotations_before_blck_val_scalar();
8600 m_evt_handler->begin_seq_val_block();
8602 _set_indentation(startindent);
8603 _line_progressed(1);
8604 _maybe_skip_whitespace_tokens();
8606 else if(first ==
'{')
8608 _c4dbgp(
"usty[UNK]: it's a flow map");
8610 _handle_annotations_before_blck_val_scalar();
8611 m_evt_handler->begin_map_val_flow();
8613 _set_indentation(startindent);
8614 _line_progressed(1);
8615 _maybe_skip_whitespace_tokens();
8617 else if(first ==
'?' && _is_blck_token(rem))
8619 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8621 _handle_annotations_before_blck_val_scalar();
8622 m_evt_handler->begin_map_val_block();
8624 _save_indentation();
8625 _line_progressed(1);
8626 _maybe_skip_whitespace_tokens();
8628 else if(first ==
':' && _is_blck_token(rem))
8630 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8632 _handle_annotations_before_blck_val_scalar();
8633 m_evt_handler->begin_map_val_block();
8634 m_evt_handler->set_key_scalar_plain_empty();
8636 _save_indentation();
8637 _line_progressed(1);
8638 _maybe_skip_whitespace_tokens();
8640 else if(first ==
'&')
8642 csubstr anchor = _scan_anchor();
8643 _c4dbgpf(
"usty[UNK]: anchor! {}", _prs(anchor));
8644 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8645 const size_t line = m_evt_handler->m_curr->pos.line;
8646 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8647 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8649 else if(first ==
'*')
8651 csubstr ref = _scan_ref_map();
8652 _c4dbgpf(
"usty[UNK]: ref! {}", _prs(ref));
8653 if(!_maybe_scan_following_colon())
8655 _c4dbgp(
"usty[UNK]: set val ref");
8656 _handle_valref(ref);
8660 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8661 const size_t startline = m_evt_handler->m_curr->pos.line;
8663 _handle_annotations_before_start_mapblck(startline);
8664 m_evt_handler->begin_map_val_block();
8665 _handle_keyref(ref);
8666 _maybe_skip_whitespace_tokens();
8667 _set_indentation(startindent);
8671 else if(first ==
'!')
8673 csubstr tag = _scan_tag();
8674 _c4dbgpf(
"usty[UNK]: val tag! {}", _prs(tag));
8677 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8678 const size_t line = m_evt_handler->m_curr->pos.line;
8679 _add_annotation(&m_pending_tags, tag, indentation, line);
8683 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8684 startindent = m_evt_handler->m_curr->line_contents.indentation;
8685 const size_t startline = m_evt_handler->m_curr->pos.line;
8688 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8691 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8692 sc = _scan_scalar_squot();
8693 if(!_maybe_scan_following_colon())
8695 _c4dbgp(
"usty[UNK]: set as val");
8696 _handle_annotations_before_blck_val_scalar();
8697 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8698 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8703 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8705 _handle_annotations_before_start_mapblck(startline);
8706 m_evt_handler->begin_map_val_block();
8707 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8708 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8709 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8710 _set_indentation(startindent);
8712 _maybe_skip_whitespace_tokens();
8715 else if(first ==
'"')
8717 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8718 sc = _scan_scalar_dquot();
8719 if(!_maybe_scan_following_colon())
8721 _c4dbgp(
"usty[UNK]: set as val");
8722 _handle_annotations_before_blck_val_scalar();
8723 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8724 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8729 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8731 _handle_annotations_before_start_mapblck(startline);
8732 m_evt_handler->begin_map_val_block();
8733 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8734 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8735 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8736 _set_indentation(startindent);
8738 _maybe_skip_whitespace_tokens();
8741 else if(first ==
'|')
8743 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8745 _scan_block(&sb, startindent);
8746 _c4dbgp(
"usty[UNK]: set as val");
8747 _handle_annotations_before_blck_val_scalar();
8748 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8749 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8752 else if(first ==
'>')
8754 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8756 _scan_block(&sb, startindent);
8757 _c4dbgp(
"usty[UNK]: set as val");
8758 _handle_annotations_before_blck_val_scalar();
8759 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8760 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8763 else if(_scan_scalar_plain_unk(&sc))
8765 _c4dbgp(
"usty[UNK]: got a plain scalar");
8766 if(!_maybe_scan_following_colon())
8768 _c4dbgp(
"usty[UNK]: set as val");
8769 _handle_annotations_before_blck_val_scalar();
8770 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8771 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8776 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8778 _handle_annotations_before_start_mapblck(startline);
8779 m_evt_handler->begin_map_val_block();
8780 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8781 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8782 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8783 _set_indentation(startindent);
8785 _maybe_skip_whitespace_tokens();
8799 template<
class EventHandler>
8802 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8804 m_evt_handler->start_parse(filename.str, src);
8805 m_evt_handler->begin_stream();
8807 while( ! _finished_file())
8810 while( ! _finished_line())
8813 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8818 else if(has_any(
RMAP))
8822 else if(has_any(
RUNK))
8828 _c4err(
"internal error");
8831 if(_finished_file())
8836 m_evt_handler->finish_parse();
8842 template<
class EventHandler>
8845 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8847 m_evt_handler->start_parse(filename.str, src);
8848 m_evt_handler->begin_stream();
8850 while( ! _finished_file())
8853 while( ! _finished_line())
8856 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8876 else if(has_any(
RBLCK))
8880 _handle_seq_block();
8884 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8885 _handle_map_block();
8888 else if(has_any(
RUNK))
8892 else if(has_any(
USTY))
8898 _c4err(
"internal error");
8901 if(_finished_file())
8906 m_evt_handler->finish_parse();
8915 #undef _c4dbgnextline
8919 #if defined(_MSC_VER)
8920 # pragma warning(pop)
8921 #elif defined(__clang__)
8922 # pragma clang diagnostic pop
8923 #elif defined(__GNUC__)
8924 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
bool is_valid_tag_handle(csubstr handle)
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
@ UTF16BE
UTF16, Big-Endian.
@ UTF16LE
UTF16, Little-Endian.
@ NOBOM
No Byte Order Mark was found.
@ UTF32BE
UTF32, Big-Endian.
@ UTF32LE
UTF32, Little-Endian.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
#define _ryml_relocate(s)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
csubstr name
name of the file
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark